| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.7141440190438405, |
| "eval_steps": 50, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0023804800634794686, |
| "grad_norm": 43.0, |
| "learning_rate": 1.6875e-07, |
| "loss": 1.6633, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.004760960126958937, |
| "grad_norm": 63.5, |
| "learning_rate": 3.5625000000000003e-07, |
| "loss": 1.6375, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007141440190438405, |
| "grad_norm": 49.25, |
| "learning_rate": 5.4375e-07, |
| "loss": 1.6325, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.009521920253917874, |
| "grad_norm": 43.25, |
| "learning_rate": 7.312500000000001e-07, |
| "loss": 1.6621, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011902400317397342, |
| "grad_norm": 39.75, |
| "learning_rate": 9.1875e-07, |
| "loss": 1.5987, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.01428288038087681, |
| "grad_norm": 35.25, |
| "learning_rate": 1.10625e-06, |
| "loss": 1.6187, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.01666336044435628, |
| "grad_norm": 37.25, |
| "learning_rate": 1.29375e-06, |
| "loss": 1.5005, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.01904384050783575, |
| "grad_norm": 23.875, |
| "learning_rate": 1.48125e-06, |
| "loss": 1.4614, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.021424320571315214, |
| "grad_norm": 15.5625, |
| "learning_rate": 1.6687500000000001e-06, |
| "loss": 1.3327, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.023804800634794683, |
| "grad_norm": 17.0, |
| "learning_rate": 1.85625e-06, |
| "loss": 1.3133, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.026185280698274152, |
| "grad_norm": 14.0625, |
| "learning_rate": 2.04375e-06, |
| "loss": 1.2393, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.02856576076175362, |
| "grad_norm": 10.6875, |
| "learning_rate": 2.23125e-06, |
| "loss": 1.2397, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03094624082523309, |
| "grad_norm": 8.9375, |
| "learning_rate": 2.41875e-06, |
| "loss": 1.1328, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03332672088871256, |
| "grad_norm": 7.75, |
| "learning_rate": 2.60625e-06, |
| "loss": 1.1205, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03570720095219203, |
| "grad_norm": 10.3125, |
| "learning_rate": 2.79375e-06, |
| "loss": 1.0921, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0380876810156715, |
| "grad_norm": 7.90625, |
| "learning_rate": 2.98125e-06, |
| "loss": 1.0515, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04046816107915096, |
| "grad_norm": 6.90625, |
| "learning_rate": 3.16875e-06, |
| "loss": 1.0178, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04284864114263043, |
| "grad_norm": 7.78125, |
| "learning_rate": 3.3562500000000003e-06, |
| "loss": 0.9527, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0452291212061099, |
| "grad_norm": 9.8125, |
| "learning_rate": 3.5437499999999997e-06, |
| "loss": 0.9037, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.047609601269589366, |
| "grad_norm": 7.03125, |
| "learning_rate": 3.73125e-06, |
| "loss": 0.8909, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.049990081333068835, |
| "grad_norm": 7.34375, |
| "learning_rate": 3.9187499999999995e-06, |
| "loss": 0.8759, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.052370561396548304, |
| "grad_norm": 8.9375, |
| "learning_rate": 4.10625e-06, |
| "loss": 0.8669, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.05475104146002777, |
| "grad_norm": 9.5625, |
| "learning_rate": 4.29375e-06, |
| "loss": 0.8139, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.05713152152350724, |
| "grad_norm": 6.0625, |
| "learning_rate": 4.4812500000000005e-06, |
| "loss": 0.813, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.05951200158698671, |
| "grad_norm": 9.875, |
| "learning_rate": 4.668750000000001e-06, |
| "loss": 0.7689, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.06189248165046618, |
| "grad_norm": 40.0, |
| "learning_rate": 4.8562499999999995e-06, |
| "loss": 0.7372, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.06427296171394564, |
| "grad_norm": 7.34375, |
| "learning_rate": 5.04375e-06, |
| "loss": 0.7333, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.06665344177742512, |
| "grad_norm": 7.15625, |
| "learning_rate": 5.23125e-06, |
| "loss": 0.706, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.06903392184090458, |
| "grad_norm": 10.8125, |
| "learning_rate": 5.41875e-06, |
| "loss": 0.6806, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.07141440190438406, |
| "grad_norm": 10.75, |
| "learning_rate": 5.606250000000001e-06, |
| "loss": 0.6685, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.07379488196786352, |
| "grad_norm": 7.59375, |
| "learning_rate": 5.79375e-06, |
| "loss": 0.6782, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.076175362031343, |
| "grad_norm": 10.625, |
| "learning_rate": 5.98125e-06, |
| "loss": 0.6117, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.07855584209482246, |
| "grad_norm": 11.0625, |
| "learning_rate": 6.16875e-06, |
| "loss": 0.6061, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.08093632215830192, |
| "grad_norm": 11.125, |
| "learning_rate": 6.35625e-06, |
| "loss": 0.5569, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.0833168022217814, |
| "grad_norm": 12.375, |
| "learning_rate": 6.543750000000001e-06, |
| "loss": 0.5565, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.08569728228526086, |
| "grad_norm": 9.875, |
| "learning_rate": 6.73125e-06, |
| "loss": 0.5461, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.08807776234874033, |
| "grad_norm": 13.0625, |
| "learning_rate": 6.9187500000000005e-06, |
| "loss": 0.5104, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0904582424122198, |
| "grad_norm": 12.375, |
| "learning_rate": 7.10625e-06, |
| "loss": 0.4818, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.09283872247569927, |
| "grad_norm": 12.75, |
| "learning_rate": 7.29375e-06, |
| "loss": 0.4644, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.09521920253917873, |
| "grad_norm": 23.375, |
| "learning_rate": 7.481250000000001e-06, |
| "loss": 0.4571, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.09759968260265821, |
| "grad_norm": 11.0625, |
| "learning_rate": 7.66875e-06, |
| "loss": 0.4104, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.09998016266613767, |
| "grad_norm": 12.125, |
| "learning_rate": 7.856250000000001e-06, |
| "loss": 0.3985, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.10236064272961715, |
| "grad_norm": 16.0, |
| "learning_rate": 8.04375e-06, |
| "loss": 0.3792, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.10474112279309661, |
| "grad_norm": 15.3125, |
| "learning_rate": 8.23125e-06, |
| "loss": 0.3884, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.10712160285657607, |
| "grad_norm": 13.4375, |
| "learning_rate": 8.418750000000001e-06, |
| "loss": 0.3524, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.10950208292005555, |
| "grad_norm": 10.625, |
| "learning_rate": 8.606249999999999e-06, |
| "loss": 0.3237, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.11188256298353501, |
| "grad_norm": 13.5, |
| "learning_rate": 8.79375e-06, |
| "loss": 0.3263, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.11426304304701448, |
| "grad_norm": 11.5625, |
| "learning_rate": 8.98125e-06, |
| "loss": 0.2895, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.11664352311049395, |
| "grad_norm": 13.75, |
| "learning_rate": 9.16875e-06, |
| "loss": 0.2743, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.11902400317397342, |
| "grad_norm": 8.75, |
| "learning_rate": 9.35625e-06, |
| "loss": 0.2516, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.12140448323745288, |
| "grad_norm": 13.4375, |
| "learning_rate": 9.54375e-06, |
| "loss": 0.2555, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.12378496330093236, |
| "grad_norm": 16.125, |
| "learning_rate": 9.731250000000001e-06, |
| "loss": 0.2468, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.12616544336441182, |
| "grad_norm": 18.125, |
| "learning_rate": 9.91875e-06, |
| "loss": 0.2166, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.12854592342789128, |
| "grad_norm": 13.5625, |
| "learning_rate": 1.010625e-05, |
| "loss": 0.2262, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.13092640349137075, |
| "grad_norm": 16.625, |
| "learning_rate": 1.0293750000000001e-05, |
| "loss": 0.2049, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.13330688355485024, |
| "grad_norm": 10.8125, |
| "learning_rate": 1.048125e-05, |
| "loss": 0.1753, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.1356873636183297, |
| "grad_norm": 9.0625, |
| "learning_rate": 1.0668750000000002e-05, |
| "loss": 0.1566, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.13806784368180916, |
| "grad_norm": 10.6875, |
| "learning_rate": 1.085625e-05, |
| "loss": 0.19, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.14044832374528862, |
| "grad_norm": 8.8125, |
| "learning_rate": 1.1043749999999999e-05, |
| "loss": 0.1594, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.1428288038087681, |
| "grad_norm": 10.875, |
| "learning_rate": 1.123125e-05, |
| "loss": 0.1735, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.14520928387224757, |
| "grad_norm": 8.625, |
| "learning_rate": 1.141875e-05, |
| "loss": 0.1394, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.14758976393572704, |
| "grad_norm": 12.0625, |
| "learning_rate": 1.1606250000000001e-05, |
| "loss": 0.1358, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.1499702439992065, |
| "grad_norm": 8.3125, |
| "learning_rate": 1.179375e-05, |
| "loss": 0.1099, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.152350724062686, |
| "grad_norm": 8.25, |
| "learning_rate": 1.198125e-05, |
| "loss": 0.1155, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.15473120412616545, |
| "grad_norm": 12.5, |
| "learning_rate": 1.2168750000000001e-05, |
| "loss": 0.1133, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.1571116841896449, |
| "grad_norm": 11.125, |
| "learning_rate": 1.235625e-05, |
| "loss": 0.0946, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.15949216425312437, |
| "grad_norm": 9.875, |
| "learning_rate": 1.2543750000000002e-05, |
| "loss": 0.1233, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.16187264431660384, |
| "grad_norm": 10.8125, |
| "learning_rate": 1.2731250000000001e-05, |
| "loss": 0.1115, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.16425312438008333, |
| "grad_norm": 7.9375, |
| "learning_rate": 1.2918749999999999e-05, |
| "loss": 0.1002, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.1666336044435628, |
| "grad_norm": 9.1875, |
| "learning_rate": 1.310625e-05, |
| "loss": 0.0825, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.16901408450704225, |
| "grad_norm": 22.0, |
| "learning_rate": 1.329375e-05, |
| "loss": 0.087, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.1713945645705217, |
| "grad_norm": 6.84375, |
| "learning_rate": 1.348125e-05, |
| "loss": 0.0843, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1737750446340012, |
| "grad_norm": 10.625, |
| "learning_rate": 1.366875e-05, |
| "loss": 0.068, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.17615552469748066, |
| "grad_norm": 6.75, |
| "learning_rate": 1.385625e-05, |
| "loss": 0.0756, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.17853600476096013, |
| "grad_norm": 8.25, |
| "learning_rate": 1.4043750000000001e-05, |
| "loss": 0.0678, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1809164848244396, |
| "grad_norm": 6.71875, |
| "learning_rate": 1.423125e-05, |
| "loss": 0.069, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.18329696488791905, |
| "grad_norm": 7.625, |
| "learning_rate": 1.4418750000000001e-05, |
| "loss": 0.0677, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.18567744495139854, |
| "grad_norm": 6.03125, |
| "learning_rate": 1.4606250000000001e-05, |
| "loss": 0.0644, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.188057925014878, |
| "grad_norm": 5.40625, |
| "learning_rate": 1.479375e-05, |
| "loss": 0.0569, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.19043840507835746, |
| "grad_norm": 5.625, |
| "learning_rate": 1.4981250000000002e-05, |
| "loss": 0.043, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.19281888514183693, |
| "grad_norm": 6.0, |
| "learning_rate": 1.4960305792413996e-05, |
| "loss": 0.0619, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.19519936520531642, |
| "grad_norm": 7.15625, |
| "learning_rate": 1.4916201117318435e-05, |
| "loss": 0.057, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.19757984526879588, |
| "grad_norm": 7.0625, |
| "learning_rate": 1.4872096442222876e-05, |
| "loss": 0.0448, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.19996032533227534, |
| "grad_norm": 7.25, |
| "learning_rate": 1.4827991767127316e-05, |
| "loss": 0.0473, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.2023408053957548, |
| "grad_norm": 5.28125, |
| "learning_rate": 1.4783887092031756e-05, |
| "loss": 0.0494, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2047212854592343, |
| "grad_norm": 6.6875, |
| "learning_rate": 1.4739782416936196e-05, |
| "loss": 0.0503, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.20710176552271375, |
| "grad_norm": 5.25, |
| "learning_rate": 1.4695677741840636e-05, |
| "loss": 0.0387, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.20948224558619322, |
| "grad_norm": 10.875, |
| "learning_rate": 1.4651573066745075e-05, |
| "loss": 0.0414, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.21186272564967268, |
| "grad_norm": 6.5625, |
| "learning_rate": 1.4607468391649515e-05, |
| "loss": 0.0517, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.21424320571315214, |
| "grad_norm": 3.9375, |
| "learning_rate": 1.4563363716553955e-05, |
| "loss": 0.0319, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.21662368577663163, |
| "grad_norm": 6.28125, |
| "learning_rate": 1.4519259041458395e-05, |
| "loss": 0.0413, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.2190041658401111, |
| "grad_norm": 4.6875, |
| "learning_rate": 1.4475154366362835e-05, |
| "loss": 0.0399, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.22138464590359055, |
| "grad_norm": 6.28125, |
| "learning_rate": 1.4431049691267275e-05, |
| "loss": 0.0304, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.22376512596707002, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.4386945016171715e-05, |
| "loss": 0.0295, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.2261456060305495, |
| "grad_norm": 5.5, |
| "learning_rate": 1.4342840341076155e-05, |
| "loss": 0.0329, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.22852608609402897, |
| "grad_norm": 4.75, |
| "learning_rate": 1.4298735665980595e-05, |
| "loss": 0.0313, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.23090656615750843, |
| "grad_norm": 3.796875, |
| "learning_rate": 1.4254630990885035e-05, |
| "loss": 0.0309, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.2332870462209879, |
| "grad_norm": 7.875, |
| "learning_rate": 1.4210526315789473e-05, |
| "loss": 0.0373, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.23566752628446735, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.4166421640693913e-05, |
| "loss": 0.0294, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.23804800634794684, |
| "grad_norm": 3.8125, |
| "learning_rate": 1.4122316965598353e-05, |
| "loss": 0.0266, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2404284864114263, |
| "grad_norm": 5.875, |
| "learning_rate": 1.4078212290502793e-05, |
| "loss": 0.0251, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.24280896647490577, |
| "grad_norm": 6.15625, |
| "learning_rate": 1.4034107615407233e-05, |
| "loss": 0.0216, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.24518944653838523, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.3990002940311673e-05, |
| "loss": 0.0276, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.24756992660186472, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.3945898265216113e-05, |
| "loss": 0.0203, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.24995040666534418, |
| "grad_norm": 6.3125, |
| "learning_rate": 1.3901793590120553e-05, |
| "loss": 0.0209, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.25233088672882364, |
| "grad_norm": 4.125, |
| "learning_rate": 1.3857688915024993e-05, |
| "loss": 0.0189, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.2547113667923031, |
| "grad_norm": 2.1875, |
| "learning_rate": 1.3813584239929433e-05, |
| "loss": 0.027, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.25709184685578257, |
| "grad_norm": 2.734375, |
| "learning_rate": 1.3769479564833873e-05, |
| "loss": 0.0206, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.25947232691926203, |
| "grad_norm": 3.125, |
| "learning_rate": 1.3725374889738312e-05, |
| "loss": 0.028, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.2618528069827415, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.3681270214642752e-05, |
| "loss": 0.0182, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.264233287046221, |
| "grad_norm": 2.65625, |
| "learning_rate": 1.3637165539547192e-05, |
| "loss": 0.0247, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.2666137671097005, |
| "grad_norm": 2.46875, |
| "learning_rate": 1.3593060864451632e-05, |
| "loss": 0.0195, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.26899424717317993, |
| "grad_norm": 2.609375, |
| "learning_rate": 1.3548956189356072e-05, |
| "loss": 0.0185, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.2713747272366594, |
| "grad_norm": 2.546875, |
| "learning_rate": 1.3504851514260514e-05, |
| "loss": 0.0188, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.27375520730013886, |
| "grad_norm": 2.53125, |
| "learning_rate": 1.3460746839164952e-05, |
| "loss": 0.0171, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.2761356873636183, |
| "grad_norm": 4.15625, |
| "learning_rate": 1.3416642164069392e-05, |
| "loss": 0.0208, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.2785161674270978, |
| "grad_norm": 3.328125, |
| "learning_rate": 1.3372537488973832e-05, |
| "loss": 0.0162, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.28089664749057724, |
| "grad_norm": 3.984375, |
| "learning_rate": 1.3328432813878272e-05, |
| "loss": 0.0196, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.28327712755405676, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.3284328138782712e-05, |
| "loss": 0.0166, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.2856576076175362, |
| "grad_norm": 2.28125, |
| "learning_rate": 1.324022346368715e-05, |
| "loss": 0.016, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2880380876810157, |
| "grad_norm": 3.03125, |
| "learning_rate": 1.319611878859159e-05, |
| "loss": 0.0148, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.29041856774449515, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.315201411349603e-05, |
| "loss": 0.014, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.2927990478079746, |
| "grad_norm": 3.078125, |
| "learning_rate": 1.310790943840047e-05, |
| "loss": 0.0129, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.2951795278714541, |
| "grad_norm": 2.234375, |
| "learning_rate": 1.306380476330491e-05, |
| "loss": 0.0185, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.29756000793493353, |
| "grad_norm": 4.125, |
| "learning_rate": 1.301970008820935e-05, |
| "loss": 0.0134, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.299940487998413, |
| "grad_norm": 5.875, |
| "learning_rate": 1.297559541311379e-05, |
| "loss": 0.0166, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.30232096806189246, |
| "grad_norm": 3.140625, |
| "learning_rate": 1.293149073801823e-05, |
| "loss": 0.0147, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.304701448125372, |
| "grad_norm": 15.375, |
| "learning_rate": 1.288738606292267e-05, |
| "loss": 0.0272, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.30708192818885144, |
| "grad_norm": 3.046875, |
| "learning_rate": 1.284328138782711e-05, |
| "loss": 0.0152, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.3094624082523309, |
| "grad_norm": 1.9375, |
| "learning_rate": 1.279917671273155e-05, |
| "loss": 0.0127, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.31184288831581036, |
| "grad_norm": 3.484375, |
| "learning_rate": 1.2755072037635989e-05, |
| "loss": 0.0117, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.3142233683792898, |
| "grad_norm": 8.75, |
| "learning_rate": 1.2710967362540429e-05, |
| "loss": 0.0152, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.3166038484427693, |
| "grad_norm": 2.640625, |
| "learning_rate": 1.2666862687444869e-05, |
| "loss": 0.0116, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.31898432850624875, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.2622758012349309e-05, |
| "loss": 0.0098, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3213648085697282, |
| "grad_norm": 2.59375, |
| "learning_rate": 1.257865333725375e-05, |
| "loss": 0.0116, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.3237452886332077, |
| "grad_norm": 4.21875, |
| "learning_rate": 1.2534548662158189e-05, |
| "loss": 0.0111, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.3261257686966872, |
| "grad_norm": 1.9453125, |
| "learning_rate": 1.2490443987062629e-05, |
| "loss": 0.0137, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.32850624876016665, |
| "grad_norm": 2.546875, |
| "learning_rate": 1.2446339311967069e-05, |
| "loss": 0.009, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.3308867288236461, |
| "grad_norm": 1.5, |
| "learning_rate": 1.2402234636871509e-05, |
| "loss": 0.0089, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.3332672088871256, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.2358129961775949e-05, |
| "loss": 0.0184, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.33564768895060504, |
| "grad_norm": 2.59375, |
| "learning_rate": 1.2314025286680387e-05, |
| "loss": 0.0115, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3380281690140845, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.2269920611584827e-05, |
| "loss": 0.0155, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.34040864907756396, |
| "grad_norm": 3.0625, |
| "learning_rate": 1.2225815936489268e-05, |
| "loss": 0.0147, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.3427891291410434, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.2181711261393708e-05, |
| "loss": 0.0085, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.3451696092045229, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.2137606586298148e-05, |
| "loss": 0.0118, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3475500892680024, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.209350191120259e-05, |
| "loss": 0.0102, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.34993056933148187, |
| "grad_norm": 2.359375, |
| "learning_rate": 1.2049397236107028e-05, |
| "loss": 0.0113, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.35231104939496133, |
| "grad_norm": 2.0, |
| "learning_rate": 1.2005292561011468e-05, |
| "loss": 0.0096, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.3546915294584408, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.1961187885915908e-05, |
| "loss": 0.0103, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.35707200952192025, |
| "grad_norm": 3.28125, |
| "learning_rate": 1.1917083210820348e-05, |
| "loss": 0.0146, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3594524895853997, |
| "grad_norm": 2.53125, |
| "learning_rate": 1.1872978535724788e-05, |
| "loss": 0.0079, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.3618329696488792, |
| "grad_norm": 2.578125, |
| "learning_rate": 1.1828873860629226e-05, |
| "loss": 0.0103, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.36421344971235864, |
| "grad_norm": 2.75, |
| "learning_rate": 1.1784769185533666e-05, |
| "loss": 0.0066, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.3665939297758381, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.1740664510438106e-05, |
| "loss": 0.0081, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.3689744098393176, |
| "grad_norm": 3.1875, |
| "learning_rate": 1.1696559835342546e-05, |
| "loss": 0.0102, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3713548899027971, |
| "grad_norm": 3.6875, |
| "learning_rate": 1.1652455160246988e-05, |
| "loss": 0.0077, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.37373536996627654, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.1608350485151428e-05, |
| "loss": 0.0091, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.376115850029756, |
| "grad_norm": 3.515625, |
| "learning_rate": 1.1564245810055866e-05, |
| "loss": 0.0111, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.37849633009323547, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.1520141134960306e-05, |
| "loss": 0.0086, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.38087681015671493, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.1476036459864746e-05, |
| "loss": 0.0082, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3832572902201944, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.1431931784769186e-05, |
| "loss": 0.0073, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.38563777028367385, |
| "grad_norm": 2.921875, |
| "learning_rate": 1.1387827109673626e-05, |
| "loss": 0.0105, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.3880182503471533, |
| "grad_norm": 1.6953125, |
| "learning_rate": 1.1343722434578065e-05, |
| "loss": 0.0074, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.39039873041063283, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.1299617759482505e-05, |
| "loss": 0.0062, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.3927792104741123, |
| "grad_norm": 1.7734375, |
| "learning_rate": 1.1255513084386945e-05, |
| "loss": 0.0076, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.39515969053759176, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.1211408409291385e-05, |
| "loss": 0.0074, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.3975401706010712, |
| "grad_norm": 2.359375, |
| "learning_rate": 1.1167303734195826e-05, |
| "loss": 0.0088, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.3999206506645507, |
| "grad_norm": 2.203125, |
| "learning_rate": 1.1123199059100265e-05, |
| "loss": 0.0083, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.40230113072803014, |
| "grad_norm": 4.5, |
| "learning_rate": 1.1079094384004705e-05, |
| "loss": 0.0067, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.4046816107915096, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.1034989708909145e-05, |
| "loss": 0.0121, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.40706209085498907, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.0990885033813585e-05, |
| "loss": 0.0061, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.4094425709184686, |
| "grad_norm": 2.078125, |
| "learning_rate": 1.0946780358718025e-05, |
| "loss": 0.0065, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.41182305098194805, |
| "grad_norm": 1.8125, |
| "learning_rate": 1.0902675683622465e-05, |
| "loss": 0.0092, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.4142035310454275, |
| "grad_norm": 1.640625, |
| "learning_rate": 1.0858571008526903e-05, |
| "loss": 0.0066, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.41658401110890697, |
| "grad_norm": 1.6640625, |
| "learning_rate": 1.0814466333431343e-05, |
| "loss": 0.0062, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.41896449117238643, |
| "grad_norm": 1.125, |
| "learning_rate": 1.0770361658335783e-05, |
| "loss": 0.0054, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.4213449712358659, |
| "grad_norm": 2.03125, |
| "learning_rate": 1.0726256983240223e-05, |
| "loss": 0.0077, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.42372545129934536, |
| "grad_norm": 1.8515625, |
| "learning_rate": 1.0682152308144665e-05, |
| "loss": 0.0073, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.4261059313628248, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.0638047633049103e-05, |
| "loss": 0.0065, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.4284864114263043, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.0593942957953543e-05, |
| "loss": 0.0054, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.4308668914897838, |
| "grad_norm": 2.53125, |
| "learning_rate": 1.0549838282857983e-05, |
| "loss": 0.0072, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.43324737155326326, |
| "grad_norm": 1.515625, |
| "learning_rate": 1.0505733607762423e-05, |
| "loss": 0.006, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.4356278516167427, |
| "grad_norm": 2.46875, |
| "learning_rate": 1.0461628932666863e-05, |
| "loss": 0.0086, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.4380083316802222, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.0417524257571302e-05, |
| "loss": 0.0057, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.44038881174370165, |
| "grad_norm": 1.5390625, |
| "learning_rate": 1.0373419582475742e-05, |
| "loss": 0.0062, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.4427692918071811, |
| "grad_norm": 0.9140625, |
| "learning_rate": 1.0329314907380182e-05, |
| "loss": 0.0059, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.44514977187066057, |
| "grad_norm": 2.421875, |
| "learning_rate": 1.0285210232284622e-05, |
| "loss": 0.006, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.44753025193414003, |
| "grad_norm": 2.515625, |
| "learning_rate": 1.0241105557189063e-05, |
| "loss": 0.0061, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.4499107319976195, |
| "grad_norm": 2.75, |
| "learning_rate": 1.0197000882093504e-05, |
| "loss": 0.0056, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.452291212061099, |
| "grad_norm": 2.8125, |
| "learning_rate": 1.0152896206997942e-05, |
| "loss": 0.0057, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.4546716921245785, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.0108791531902382e-05, |
| "loss": 0.0045, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.45705217218805794, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.0064686856806822e-05, |
| "loss": 0.0061, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.4594326522515374, |
| "grad_norm": 1.5078125, |
| "learning_rate": 1.0020582181711262e-05, |
| "loss": 0.0048, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.46181313231501686, |
| "grad_norm": 0.9921875, |
| "learning_rate": 9.976477506615702e-06, |
| "loss": 0.0134, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.4641936123784963, |
| "grad_norm": 3.1875, |
| "learning_rate": 9.93237283152014e-06, |
| "loss": 0.0051, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.4665740924419758, |
| "grad_norm": 1.59375, |
| "learning_rate": 9.88826815642458e-06, |
| "loss": 0.0049, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.46895457250545525, |
| "grad_norm": 1.9453125, |
| "learning_rate": 9.84416348132902e-06, |
| "loss": 0.0048, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.4713350525689347, |
| "grad_norm": 1.7578125, |
| "learning_rate": 9.80005880623346e-06, |
| "loss": 0.0163, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.4737155326324142, |
| "grad_norm": 1.5234375, |
| "learning_rate": 9.755954131137902e-06, |
| "loss": 0.0057, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.4760960126958937, |
| "grad_norm": 0.51953125, |
| "learning_rate": 9.711849456042342e-06, |
| "loss": 0.0047, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47847649275937315, |
| "grad_norm": 2.59375, |
| "learning_rate": 9.66774478094678e-06, |
| "loss": 0.0054, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.4808569728228526, |
| "grad_norm": 2.484375, |
| "learning_rate": 9.62364010585122e-06, |
| "loss": 0.0049, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.4832374528863321, |
| "grad_norm": 1.3125, |
| "learning_rate": 9.57953543075566e-06, |
| "loss": 0.0048, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.48561793294981154, |
| "grad_norm": 1.71875, |
| "learning_rate": 9.5354307556601e-06, |
| "loss": 0.006, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.487998413013291, |
| "grad_norm": 1.703125, |
| "learning_rate": 9.49132608056454e-06, |
| "loss": 0.0051, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.49037889307677046, |
| "grad_norm": 1.3359375, |
| "learning_rate": 9.447221405468979e-06, |
| "loss": 0.0045, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.4927593731402499, |
| "grad_norm": 0.74609375, |
| "learning_rate": 9.403116730373419e-06, |
| "loss": 0.0052, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.49513985320372944, |
| "grad_norm": 1.28125, |
| "learning_rate": 9.359012055277859e-06, |
| "loss": 0.0042, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.4975203332672089, |
| "grad_norm": 0.578125, |
| "learning_rate": 9.3149073801823e-06, |
| "loss": 0.0045, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.49990081333068836, |
| "grad_norm": 1.9140625, |
| "learning_rate": 9.27080270508674e-06, |
| "loss": 0.0063, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5022812933941678, |
| "grad_norm": 3.609375, |
| "learning_rate": 9.226698029991179e-06, |
| "loss": 0.0038, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.5046617734576473, |
| "grad_norm": 0.8359375, |
| "learning_rate": 9.182593354895619e-06, |
| "loss": 0.0045, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.5070422535211268, |
| "grad_norm": 2.40625, |
| "learning_rate": 9.138488679800059e-06, |
| "loss": 0.0053, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.5094227335846062, |
| "grad_norm": 0.9140625, |
| "learning_rate": 9.094384004704499e-06, |
| "loss": 0.0054, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.5118032136480857, |
| "grad_norm": 1.5, |
| "learning_rate": 9.050279329608939e-06, |
| "loss": 0.0035, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.5141836937115651, |
| "grad_norm": 1.1796875, |
| "learning_rate": 9.006174654513379e-06, |
| "loss": 0.0036, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.5165641737750446, |
| "grad_norm": 0.9453125, |
| "learning_rate": 8.962069979417817e-06, |
| "loss": 0.0043, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.5189446538385241, |
| "grad_norm": 1.7265625, |
| "learning_rate": 8.917965304322257e-06, |
| "loss": 0.0119, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.5213251339020035, |
| "grad_norm": 2.09375, |
| "learning_rate": 8.873860629226697e-06, |
| "loss": 0.0057, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.523705613965483, |
| "grad_norm": 0.9140625, |
| "learning_rate": 8.82975595413114e-06, |
| "loss": 0.0045, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.5260860940289626, |
| "grad_norm": 1.203125, |
| "learning_rate": 1.2219810576164167e-05, |
| "loss": 0.005, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.528466574092442, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.2200078926598264e-05, |
| "loss": 0.0068, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.5308470541559215, |
| "grad_norm": 1.9609375, |
| "learning_rate": 1.218034727703236e-05, |
| "loss": 0.0048, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.533227534219401, |
| "grad_norm": 1.6640625, |
| "learning_rate": 1.2160615627466457e-05, |
| "loss": 0.0052, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.5356080142828804, |
| "grad_norm": 1.65625, |
| "learning_rate": 1.2140883977900552e-05, |
| "loss": 0.0036, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.5379884943463599, |
| "grad_norm": 1.4140625, |
| "learning_rate": 1.2121152328334648e-05, |
| "loss": 0.0046, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.5403689744098393, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.2101420678768745e-05, |
| "loss": 0.0065, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.5427494544733188, |
| "grad_norm": 1.640625, |
| "learning_rate": 1.2081689029202841e-05, |
| "loss": 0.0052, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.5451299345367983, |
| "grad_norm": 1.25, |
| "learning_rate": 1.206195737963694e-05, |
| "loss": 0.0053, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.5475104146002777, |
| "grad_norm": 2.125, |
| "learning_rate": 1.2042225730071034e-05, |
| "loss": 0.0058, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.5498908946637572, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.2022494080505131e-05, |
| "loss": 0.0047, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.5522713747272366, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.2002762430939227e-05, |
| "loss": 0.0048, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.5546518547907161, |
| "grad_norm": 0.431640625, |
| "learning_rate": 1.1983030781373324e-05, |
| "loss": 0.007, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.5570323348541956, |
| "grad_norm": 1.28125, |
| "learning_rate": 1.1963299131807419e-05, |
| "loss": 0.0043, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.559412814917675, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.1943567482241515e-05, |
| "loss": 0.0035, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.5617932949811545, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.1923835832675612e-05, |
| "loss": 0.0056, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.564173775044634, |
| "grad_norm": 2.359375, |
| "learning_rate": 1.1904104183109708e-05, |
| "loss": 0.0047, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.5665542551081135, |
| "grad_norm": 1.90625, |
| "learning_rate": 1.1884372533543805e-05, |
| "loss": 0.0051, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.568934735171593, |
| "grad_norm": 1.6796875, |
| "learning_rate": 1.1864640883977901e-05, |
| "loss": 0.0038, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.5713152152350724, |
| "grad_norm": 1.7265625, |
| "learning_rate": 1.1844909234411998e-05, |
| "loss": 0.0043, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.5736956952985519, |
| "grad_norm": 2.09375, |
| "learning_rate": 1.1825177584846094e-05, |
| "loss": 0.0041, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.5760761753620314, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.1805445935280189e-05, |
| "loss": 0.0064, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.5784566554255108, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.1785714285714286e-05, |
| "loss": 0.0043, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.5808371354889903, |
| "grad_norm": 0.92578125, |
| "learning_rate": 1.1765982636148382e-05, |
| "loss": 0.0042, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.5832176155524698, |
| "grad_norm": 1.7421875, |
| "learning_rate": 1.1746250986582479e-05, |
| "loss": 0.0053, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.5855980956159492, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.1726519337016574e-05, |
| "loss": 0.0041, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.5879785756794287, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.1706787687450672e-05, |
| "loss": 0.0049, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.5903590557429081, |
| "grad_norm": 0.69921875, |
| "learning_rate": 1.1687056037884768e-05, |
| "loss": 0.0051, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.5927395358063876, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.1667324388318865e-05, |
| "loss": 0.0045, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.5951200158698671, |
| "grad_norm": 1.359375, |
| "learning_rate": 1.164759273875296e-05, |
| "loss": 0.0043, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.5975004959333465, |
| "grad_norm": 1.0078125, |
| "learning_rate": 1.1627861089187056e-05, |
| "loss": 0.0037, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.599880975996826, |
| "grad_norm": 0.400390625, |
| "learning_rate": 1.1608129439621153e-05, |
| "loss": 0.0052, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.6022614560603055, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.1588397790055249e-05, |
| "loss": 0.0055, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.6046419361237849, |
| "grad_norm": 0.87109375, |
| "learning_rate": 1.1568666140489344e-05, |
| "loss": 0.0032, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.6070224161872644, |
| "grad_norm": 2.3125, |
| "learning_rate": 1.154893449092344e-05, |
| "loss": 0.0046, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.609402896250744, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.1529202841357539e-05, |
| "loss": 0.0039, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.6117833763142234, |
| "grad_norm": 0.42578125, |
| "learning_rate": 1.1509471191791635e-05, |
| "loss": 0.0032, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.6141638563777029, |
| "grad_norm": 0.83984375, |
| "learning_rate": 1.148973954222573e-05, |
| "loss": 0.0036, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.6165443364411823, |
| "grad_norm": 1.8046875, |
| "learning_rate": 1.1470007892659827e-05, |
| "loss": 0.0054, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.6189248165046618, |
| "grad_norm": 0.78515625, |
| "learning_rate": 1.1450276243093923e-05, |
| "loss": 0.004, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.6213052965681413, |
| "grad_norm": 1.0625, |
| "learning_rate": 1.143054459352802e-05, |
| "loss": 0.0049, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.6236857766316207, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.1410812943962114e-05, |
| "loss": 0.0036, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.6260662566951002, |
| "grad_norm": 4.03125, |
| "learning_rate": 1.1391081294396211e-05, |
| "loss": 0.0049, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.6284467367585796, |
| "grad_norm": 1.3203125, |
| "learning_rate": 1.1371349644830309e-05, |
| "loss": 0.0036, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.6308272168220591, |
| "grad_norm": 1.5703125, |
| "learning_rate": 1.1351617995264406e-05, |
| "loss": 0.0031, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.6332076968855386, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.13318863456985e-05, |
| "loss": 0.004, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.635588176949018, |
| "grad_norm": 0.875, |
| "learning_rate": 1.1312154696132597e-05, |
| "loss": 0.0043, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.6379686570124975, |
| "grad_norm": 0.8046875, |
| "learning_rate": 1.1292423046566693e-05, |
| "loss": 0.0044, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.640349137075977, |
| "grad_norm": 1.8984375, |
| "learning_rate": 1.127269139700079e-05, |
| "loss": 0.003, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.6427296171394564, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.1252959747434885e-05, |
| "loss": 0.0033, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.6451100972029359, |
| "grad_norm": 2.796875, |
| "learning_rate": 1.1233228097868981e-05, |
| "loss": 0.0038, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.6474905772664153, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.1213496448303078e-05, |
| "loss": 0.0041, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.6498710573298948, |
| "grad_norm": 1.3828125, |
| "learning_rate": 1.1193764798737176e-05, |
| "loss": 0.0034, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.6522515373933744, |
| "grad_norm": 1.2890625, |
| "learning_rate": 1.1174033149171271e-05, |
| "loss": 0.0033, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.6546320174568538, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.1154301499605367e-05, |
| "loss": 0.0032, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.6570124975203333, |
| "grad_norm": 1.4296875, |
| "learning_rate": 1.1134569850039464e-05, |
| "loss": 0.0037, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.6593929775838128, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.111483820047356e-05, |
| "loss": 0.0053, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.6617734576472922, |
| "grad_norm": 1.296875, |
| "learning_rate": 1.1095106550907655e-05, |
| "loss": 0.0041, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.6641539377107717, |
| "grad_norm": 1.1796875, |
| "learning_rate": 1.1075374901341752e-05, |
| "loss": 0.0037, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.6665344177742512, |
| "grad_norm": 0.94921875, |
| "learning_rate": 1.1055643251775848e-05, |
| "loss": 0.0126, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.6689148978377306, |
| "grad_norm": 1.96875, |
| "learning_rate": 1.1035911602209945e-05, |
| "loss": 0.003, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.6712953779012101, |
| "grad_norm": 2.15625, |
| "learning_rate": 1.1016179952644041e-05, |
| "loss": 0.0046, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.6736758579646895, |
| "grad_norm": 1.1953125, |
| "learning_rate": 1.0996448303078138e-05, |
| "loss": 0.003, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.676056338028169, |
| "grad_norm": 1.859375, |
| "learning_rate": 1.0976716653512234e-05, |
| "loss": 0.0032, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.6784368180916485, |
| "grad_norm": 1.140625, |
| "learning_rate": 1.095698500394633e-05, |
| "loss": 0.0029, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.6808172981551279, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.0937253354380427e-05, |
| "loss": 0.0026, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.6831977782186074, |
| "grad_norm": 1.828125, |
| "learning_rate": 1.0917521704814522e-05, |
| "loss": 0.0035, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.6855782582820868, |
| "grad_norm": 0.44140625, |
| "learning_rate": 1.0897790055248619e-05, |
| "loss": 0.0031, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.6879587383455663, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.0878058405682715e-05, |
| "loss": 0.0034, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.6903392184090458, |
| "grad_norm": 1.3515625, |
| "learning_rate": 1.0858326756116812e-05, |
| "loss": 0.0048, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.6927196984725253, |
| "grad_norm": 1.09375, |
| "learning_rate": 1.0838595106550908e-05, |
| "loss": 0.0028, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.6951001785360048, |
| "grad_norm": 0.41796875, |
| "learning_rate": 1.0818863456985005e-05, |
| "loss": 0.0028, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.6974806585994843, |
| "grad_norm": 0.40625, |
| "learning_rate": 1.0799131807419101e-05, |
| "loss": 0.0028, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.6998611386629637, |
| "grad_norm": 0.94140625, |
| "learning_rate": 1.0779400157853198e-05, |
| "loss": 0.004, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.7022416187264432, |
| "grad_norm": 1.234375, |
| "learning_rate": 1.0759668508287293e-05, |
| "loss": 0.0108, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.7046220987899227, |
| "grad_norm": 1.109375, |
| "learning_rate": 1.0739936858721389e-05, |
| "loss": 0.0027, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.7070025788534021, |
| "grad_norm": 0.96484375, |
| "learning_rate": 1.0720205209155486e-05, |
| "loss": 0.0039, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.7093830589168816, |
| "grad_norm": 4.5625, |
| "learning_rate": 1.0700473559589582e-05, |
| "loss": 0.0027, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.711763538980361, |
| "grad_norm": 2.1875, |
| "learning_rate": 1.0680741910023677e-05, |
| "loss": 0.0028, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.7141440190438405, |
| "grad_norm": 1.15625, |
| "learning_rate": 1.0661010260457775e-05, |
| "loss": 0.0026, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 8402, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 6.82142594445656e+17, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|