| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 24.608, |
| "eval_steps": 500, |
| "global_step": 1550, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.16, |
| "grad_norm": 155.76007080078125, |
| "learning_rate": 9.032258064516129e-07, |
| "loss": 12.094, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 37.119384765625, |
| "learning_rate": 2.1935483870967745e-06, |
| "loss": 7.0819, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 14.752822875976562, |
| "learning_rate": 3.4838709677419357e-06, |
| "loss": 4.4657, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 11.597710609436035, |
| "learning_rate": 4.774193548387097e-06, |
| "loss": 3.5378, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 15.393077850341797, |
| "learning_rate": 6.064516129032259e-06, |
| "loss": 2.8862, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 23.84307861328125, |
| "learning_rate": 7.35483870967742e-06, |
| "loss": 2.6138, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 13.163668632507324, |
| "learning_rate": 8.64516129032258e-06, |
| "loss": 2.3109, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 12.168913841247559, |
| "learning_rate": 9.935483870967742e-06, |
| "loss": 2.2949, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 10.725199699401855, |
| "learning_rate": 1.1225806451612904e-05, |
| "loss": 2.3399, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 8.531355857849121, |
| "learning_rate": 1.2516129032258067e-05, |
| "loss": 2.217, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 6.670936584472656, |
| "learning_rate": 1.3806451612903227e-05, |
| "loss": 2.1938, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 5.666457653045654, |
| "learning_rate": 1.5096774193548389e-05, |
| "loss": 2.0994, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 7.0824384689331055, |
| "learning_rate": 1.638709677419355e-05, |
| "loss": 2.0094, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 5.3269195556640625, |
| "learning_rate": 1.7677419354838713e-05, |
| "loss": 1.8313, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 3.4799787998199463, |
| "learning_rate": 1.896774193548387e-05, |
| "loss": 1.8772, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 4.512059211730957, |
| "learning_rate": 1.9999898566691428e-05, |
| "loss": 1.7948, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 9.884415626525879, |
| "learning_rate": 1.9996348616949673e-05, |
| "loss": 1.7994, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 3.1838889122009277, |
| "learning_rate": 1.998772905933476e-05, |
| "loss": 1.8654, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.016, |
| "grad_norm": 3.452301263809204, |
| "learning_rate": 1.9974044265220564e-05, |
| "loss": 1.6745, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.176, |
| "grad_norm": 3.3805224895477295, |
| "learning_rate": 1.995530117479521e-05, |
| "loss": 1.5509, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.336, |
| "grad_norm": 6.541603088378906, |
| "learning_rate": 1.993150929354139e-05, |
| "loss": 1.4749, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.496, |
| "grad_norm": 2.95489764213562, |
| "learning_rate": 1.9902680687415704e-05, |
| "loss": 1.4165, |
| "step": 220 |
| }, |
| { |
| "epoch": 3.656, |
| "grad_norm": 3.144228458404541, |
| "learning_rate": 1.9868829976729444e-05, |
| "loss": 1.3226, |
| "step": 230 |
| }, |
| { |
| "epoch": 3.816, |
| "grad_norm": 3.747593641281128, |
| "learning_rate": 1.982997432873397e-05, |
| "loss": 1.5257, |
| "step": 240 |
| }, |
| { |
| "epoch": 3.976, |
| "grad_norm": 2.2221176624298096, |
| "learning_rate": 1.978613344891441e-05, |
| "loss": 1.4218, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.128, |
| "grad_norm": 2.854719877243042, |
| "learning_rate": 1.9737329570996098e-05, |
| "loss": 1.2454, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.288, |
| "grad_norm": 3.9374194145202637, |
| "learning_rate": 1.968358744566884e-05, |
| "loss": 1.2503, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.448, |
| "grad_norm": 4.536250591278076, |
| "learning_rate": 1.9624934328034673e-05, |
| "loss": 1.2983, |
| "step": 280 |
| }, |
| { |
| "epoch": 4.608, |
| "grad_norm": 4.311966419219971, |
| "learning_rate": 1.9561399963785586e-05, |
| "loss": 1.2944, |
| "step": 290 |
| }, |
| { |
| "epoch": 4.768, |
| "grad_norm": 4.188143253326416, |
| "learning_rate": 1.9493016574118103e-05, |
| "loss": 1.2997, |
| "step": 300 |
| }, |
| { |
| "epoch": 4.928, |
| "grad_norm": 5.04379415512085, |
| "learning_rate": 1.9419818839392408e-05, |
| "loss": 1.2976, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.08, |
| "grad_norm": 4.528952598571777, |
| "learning_rate": 1.9341843881544372e-05, |
| "loss": 1.1579, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.24, |
| "grad_norm": 4.810428142547607, |
| "learning_rate": 1.9259131245259293e-05, |
| "loss": 1.13, |
| "step": 330 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 3.7566370964050293, |
| "learning_rate": 1.917172287791698e-05, |
| "loss": 1.1387, |
| "step": 340 |
| }, |
| { |
| "epoch": 5.5600000000000005, |
| "grad_norm": 3.8142237663269043, |
| "learning_rate": 1.9079663108318304e-05, |
| "loss": 1.1176, |
| "step": 350 |
| }, |
| { |
| "epoch": 5.72, |
| "grad_norm": 4.0017619132995605, |
| "learning_rate": 1.8982998624204016e-05, |
| "loss": 1.1042, |
| "step": 360 |
| }, |
| { |
| "epoch": 5.88, |
| "grad_norm": 3.9953103065490723, |
| "learning_rate": 1.8881778448577274e-05, |
| "loss": 1.1386, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.032, |
| "grad_norm": 3.269265651702881, |
| "learning_rate": 1.877605391484179e-05, |
| "loss": 0.9651, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.192, |
| "grad_norm": 5.4509172439575195, |
| "learning_rate": 1.8665878640768332e-05, |
| "loss": 0.9487, |
| "step": 390 |
| }, |
| { |
| "epoch": 6.352, |
| "grad_norm": 3.8790087699890137, |
| "learning_rate": 1.855130850130267e-05, |
| "loss": 0.9193, |
| "step": 400 |
| }, |
| { |
| "epoch": 6.5120000000000005, |
| "grad_norm": 5.1756110191345215, |
| "learning_rate": 1.8432401600228823e-05, |
| "loss": 0.9112, |
| "step": 410 |
| }, |
| { |
| "epoch": 6.672, |
| "grad_norm": 4.771461009979248, |
| "learning_rate": 1.8309218240701973e-05, |
| "loss": 0.9371, |
| "step": 420 |
| }, |
| { |
| "epoch": 6.832, |
| "grad_norm": 4.88088846206665, |
| "learning_rate": 1.818182089466595e-05, |
| "loss": 1.0264, |
| "step": 430 |
| }, |
| { |
| "epoch": 6.992, |
| "grad_norm": 4.158401012420654, |
| "learning_rate": 1.8050274171170835e-05, |
| "loss": 0.9534, |
| "step": 440 |
| }, |
| { |
| "epoch": 7.144, |
| "grad_norm": 5.25468635559082, |
| "learning_rate": 1.791464478360676e-05, |
| "loss": 0.7345, |
| "step": 450 |
| }, |
| { |
| "epoch": 7.304, |
| "grad_norm": 4.713033676147461, |
| "learning_rate": 1.7775001515870466e-05, |
| "loss": 0.8399, |
| "step": 460 |
| }, |
| { |
| "epoch": 7.464, |
| "grad_norm": 5.714450359344482, |
| "learning_rate": 1.7631415187481818e-05, |
| "loss": 0.7525, |
| "step": 470 |
| }, |
| { |
| "epoch": 7.624, |
| "grad_norm": 6.085780143737793, |
| "learning_rate": 1.7483958617668e-05, |
| "loss": 0.7276, |
| "step": 480 |
| }, |
| { |
| "epoch": 7.784, |
| "grad_norm": 4.569671630859375, |
| "learning_rate": 1.733270658843351e-05, |
| "loss": 0.8071, |
| "step": 490 |
| }, |
| { |
| "epoch": 7.944, |
| "grad_norm": 6.115426540374756, |
| "learning_rate": 1.717773580663479e-05, |
| "loss": 0.7683, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.096, |
| "grad_norm": 4.305016040802002, |
| "learning_rate": 1.7019124865078625e-05, |
| "loss": 0.6376, |
| "step": 510 |
| }, |
| { |
| "epoch": 8.256, |
| "grad_norm": 6.470266342163086, |
| "learning_rate": 1.6856954202664158e-05, |
| "loss": 0.6286, |
| "step": 520 |
| }, |
| { |
| "epoch": 8.416, |
| "grad_norm": 6.055320739746094, |
| "learning_rate": 1.6691306063588583e-05, |
| "loss": 0.6196, |
| "step": 530 |
| }, |
| { |
| "epoch": 8.576, |
| "grad_norm": 6.73253870010376, |
| "learning_rate": 1.652226445563737e-05, |
| "loss": 0.564, |
| "step": 540 |
| }, |
| { |
| "epoch": 8.736, |
| "grad_norm": 5.043179512023926, |
| "learning_rate": 1.634991510758003e-05, |
| "loss": 0.6122, |
| "step": 550 |
| }, |
| { |
| "epoch": 8.896, |
| "grad_norm": 6.78087854385376, |
| "learning_rate": 1.617434542569313e-05, |
| "loss": 0.6173, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.048, |
| "grad_norm": 6.2355146408081055, |
| "learning_rate": 1.5995644449432538e-05, |
| "loss": 0.5342, |
| "step": 570 |
| }, |
| { |
| "epoch": 9.208, |
| "grad_norm": 5.987257480621338, |
| "learning_rate": 1.5813902806277445e-05, |
| "loss": 0.4269, |
| "step": 580 |
| }, |
| { |
| "epoch": 9.368, |
| "grad_norm": 5.455114364624023, |
| "learning_rate": 1.562921266576898e-05, |
| "loss": 0.4548, |
| "step": 590 |
| }, |
| { |
| "epoch": 9.528, |
| "grad_norm": 5.296268463134766, |
| "learning_rate": 1.5441667692766805e-05, |
| "loss": 0.4038, |
| "step": 600 |
| }, |
| { |
| "epoch": 9.688, |
| "grad_norm": 5.551358699798584, |
| "learning_rate": 1.5251362999947386e-05, |
| "loss": 0.4015, |
| "step": 610 |
| }, |
| { |
| "epoch": 9.848, |
| "grad_norm": 4.464796543121338, |
| "learning_rate": 1.5058395099567935e-05, |
| "loss": 0.4353, |
| "step": 620 |
| }, |
| { |
| "epoch": 10.0, |
| "grad_norm": 3.268158197402954, |
| "learning_rate": 1.4862861854520652e-05, |
| "loss": 0.3927, |
| "step": 630 |
| }, |
| { |
| "epoch": 10.16, |
| "grad_norm": 8.046059608459473, |
| "learning_rate": 1.4664862428701925e-05, |
| "loss": 0.2612, |
| "step": 640 |
| }, |
| { |
| "epoch": 10.32, |
| "grad_norm": 4.157690048217773, |
| "learning_rate": 1.4464497236721779e-05, |
| "loss": 0.2621, |
| "step": 650 |
| }, |
| { |
| "epoch": 10.48, |
| "grad_norm": 5.3797688484191895, |
| "learning_rate": 1.4261867892979e-05, |
| "loss": 0.263, |
| "step": 660 |
| }, |
| { |
| "epoch": 10.64, |
| "grad_norm": 4.068567276000977, |
| "learning_rate": 1.4057077160127806e-05, |
| "loss": 0.2492, |
| "step": 670 |
| }, |
| { |
| "epoch": 10.8, |
| "grad_norm": 5.405711650848389, |
| "learning_rate": 1.3850228896962178e-05, |
| "loss": 0.2523, |
| "step": 680 |
| }, |
| { |
| "epoch": 10.96, |
| "grad_norm": 4.762354373931885, |
| "learning_rate": 1.3641428005744308e-05, |
| "loss": 0.2586, |
| "step": 690 |
| }, |
| { |
| "epoch": 11.112, |
| "grad_norm": 5.127146244049072, |
| "learning_rate": 1.3430780379003814e-05, |
| "loss": 0.1699, |
| "step": 700 |
| }, |
| { |
| "epoch": 11.272, |
| "grad_norm": 3.0993189811706543, |
| "learning_rate": 1.3218392845834789e-05, |
| "loss": 0.1514, |
| "step": 710 |
| }, |
| { |
| "epoch": 11.432, |
| "grad_norm": 5.754135608673096, |
| "learning_rate": 1.300437311771785e-05, |
| "loss": 0.1432, |
| "step": 720 |
| }, |
| { |
| "epoch": 11.592, |
| "grad_norm": 4.12827730178833, |
| "learning_rate": 1.2788829733894698e-05, |
| "loss": 0.1512, |
| "step": 730 |
| }, |
| { |
| "epoch": 11.752, |
| "grad_norm": 4.6962175369262695, |
| "learning_rate": 1.257187200632289e-05, |
| "loss": 0.1534, |
| "step": 740 |
| }, |
| { |
| "epoch": 11.912, |
| "grad_norm": 6.317523002624512, |
| "learning_rate": 1.2353609964238686e-05, |
| "loss": 0.1452, |
| "step": 750 |
| }, |
| { |
| "epoch": 12.064, |
| "grad_norm": 2.793424367904663, |
| "learning_rate": 1.213415429835621e-05, |
| "loss": 0.1167, |
| "step": 760 |
| }, |
| { |
| "epoch": 12.224, |
| "grad_norm": 3.816258668899536, |
| "learning_rate": 1.1913616304731064e-05, |
| "loss": 0.0785, |
| "step": 770 |
| }, |
| { |
| "epoch": 12.384, |
| "grad_norm": 3.989567518234253, |
| "learning_rate": 1.1692107828317014e-05, |
| "loss": 0.0857, |
| "step": 780 |
| }, |
| { |
| "epoch": 12.544, |
| "grad_norm": 4.456111431121826, |
| "learning_rate": 1.1469741206244249e-05, |
| "loss": 0.0862, |
| "step": 790 |
| }, |
| { |
| "epoch": 12.704, |
| "grad_norm": 4.539771556854248, |
| "learning_rate": 1.1246629210848062e-05, |
| "loss": 0.0949, |
| "step": 800 |
| }, |
| { |
| "epoch": 12.864, |
| "grad_norm": 2.4530129432678223, |
| "learning_rate": 1.1022884992476826e-05, |
| "loss": 0.0928, |
| "step": 810 |
| }, |
| { |
| "epoch": 13.016, |
| "grad_norm": 2.042999267578125, |
| "learning_rate": 1.0821068423364156e-05, |
| "loss": 0.0951, |
| "step": 820 |
| }, |
| { |
| "epoch": 13.176, |
| "grad_norm": 2.9049434661865234, |
| "learning_rate": 1.0596435812513276e-05, |
| "loss": 0.0483, |
| "step": 830 |
| }, |
| { |
| "epoch": 13.336, |
| "grad_norm": 2.3502166271209717, |
| "learning_rate": 1.037150072164626e-05, |
| "loss": 0.0559, |
| "step": 840 |
| }, |
| { |
| "epoch": 13.496, |
| "grad_norm": 2.2428765296936035, |
| "learning_rate": 1.0146377225686996e-05, |
| "loss": 0.0801, |
| "step": 850 |
| }, |
| { |
| "epoch": 13.656, |
| "grad_norm": 5.673745155334473, |
| "learning_rate": 9.921179495108249e-06, |
| "loss": 0.0683, |
| "step": 860 |
| }, |
| { |
| "epoch": 13.816, |
| "grad_norm": 3.9386937618255615, |
| "learning_rate": 9.696021738030575e-06, |
| "loss": 0.0616, |
| "step": 870 |
| }, |
| { |
| "epoch": 13.975999999999999, |
| "grad_norm": 4.362432479858398, |
| "learning_rate": 9.471018142302127e-06, |
| "loss": 0.058, |
| "step": 880 |
| }, |
| { |
| "epoch": 14.128, |
| "grad_norm": 2.225241184234619, |
| "learning_rate": 9.24628281758876e-06, |
| "loss": 0.0356, |
| "step": 890 |
| }, |
| { |
| "epoch": 14.288, |
| "grad_norm": 4.0786356925964355, |
| "learning_rate": 9.021929737503757e-06, |
| "loss": 0.0458, |
| "step": 900 |
| }, |
| { |
| "epoch": 14.448, |
| "grad_norm": 2.464179277420044, |
| "learning_rate": 8.79807268180658e-06, |
| "loss": 0.0531, |
| "step": 910 |
| }, |
| { |
| "epoch": 14.608, |
| "grad_norm": 2.679661273956299, |
| "learning_rate": 8.574825178699935e-06, |
| "loss": 0.0359, |
| "step": 920 |
| }, |
| { |
| "epoch": 14.768, |
| "grad_norm": 2.0911498069763184, |
| "learning_rate": 8.352300447254372e-06, |
| "loss": 0.0362, |
| "step": 930 |
| }, |
| { |
| "epoch": 14.928, |
| "grad_norm": 2.3030571937561035, |
| "learning_rate": 8.130611339989731e-06, |
| "loss": 0.0292, |
| "step": 940 |
| }, |
| { |
| "epoch": 15.08, |
| "grad_norm": 1.6733816862106323, |
| "learning_rate": 7.909870285642403e-06, |
| "loss": 0.0241, |
| "step": 950 |
| }, |
| { |
| "epoch": 15.24, |
| "grad_norm": 1.4519929885864258, |
| "learning_rate": 7.690189232147566e-06, |
| "loss": 0.0264, |
| "step": 960 |
| }, |
| { |
| "epoch": 15.4, |
| "grad_norm": 1.980666995048523, |
| "learning_rate": 7.4716795898652615e-06, |
| "loss": 0.0231, |
| "step": 970 |
| }, |
| { |
| "epoch": 15.56, |
| "grad_norm": 2.6794183254241943, |
| "learning_rate": 7.2544521750790345e-06, |
| "loss": 0.0243, |
| "step": 980 |
| }, |
| { |
| "epoch": 15.72, |
| "grad_norm": 1.8193122148513794, |
| "learning_rate": 7.038617153795948e-06, |
| "loss": 0.0226, |
| "step": 990 |
| }, |
| { |
| "epoch": 15.88, |
| "grad_norm": 2.1489455699920654, |
| "learning_rate": 6.82428398587631e-06, |
| "loss": 0.0321, |
| "step": 1000 |
| }, |
| { |
| "epoch": 16.032, |
| "grad_norm": 0.9566267728805542, |
| "learning_rate": 6.611561369521546e-06, |
| "loss": 0.019, |
| "step": 1010 |
| }, |
| { |
| "epoch": 16.192, |
| "grad_norm": 0.45050784945487976, |
| "learning_rate": 6.400557186148371e-06, |
| "loss": 0.0101, |
| "step": 1020 |
| }, |
| { |
| "epoch": 16.352, |
| "grad_norm": 3.0079352855682373, |
| "learning_rate": 6.191378445677125e-06, |
| "loss": 0.0139, |
| "step": 1030 |
| }, |
| { |
| "epoch": 16.512, |
| "grad_norm": 1.0027068853378296, |
| "learning_rate": 5.984131232262167e-06, |
| "loss": 0.0264, |
| "step": 1040 |
| }, |
| { |
| "epoch": 16.672, |
| "grad_norm": 0.34918779134750366, |
| "learning_rate": 5.7789206504916815e-06, |
| "loss": 0.0123, |
| "step": 1050 |
| }, |
| { |
| "epoch": 16.832, |
| "grad_norm": 1.0329653024673462, |
| "learning_rate": 5.5758507720843425e-06, |
| "loss": 0.0115, |
| "step": 1060 |
| }, |
| { |
| "epoch": 16.992, |
| "grad_norm": 1.9161659479141235, |
| "learning_rate": 5.375024583109745e-06, |
| "loss": 0.0135, |
| "step": 1070 |
| }, |
| { |
| "epoch": 17.144, |
| "grad_norm": 0.42554718255996704, |
| "learning_rate": 5.176543931759447e-06, |
| "loss": 0.005, |
| "step": 1080 |
| }, |
| { |
| "epoch": 17.304, |
| "grad_norm": 0.7298970818519592, |
| "learning_rate": 4.980509476695043e-06, |
| "loss": 0.0096, |
| "step": 1090 |
| }, |
| { |
| "epoch": 17.464, |
| "grad_norm": 2.393183946609497, |
| "learning_rate": 4.7870206359995815e-06, |
| "loss": 0.0148, |
| "step": 1100 |
| }, |
| { |
| "epoch": 17.624, |
| "grad_norm": 0.4778424799442291, |
| "learning_rate": 4.596175536758024e-06, |
| "loss": 0.0067, |
| "step": 1110 |
| }, |
| { |
| "epoch": 17.784, |
| "grad_norm": 0.22980810701847076, |
| "learning_rate": 4.408070965292534e-06, |
| "loss": 0.0053, |
| "step": 1120 |
| }, |
| { |
| "epoch": 17.944, |
| "grad_norm": 0.18192055821418762, |
| "learning_rate": 4.222802318077664e-06, |
| "loss": 0.0079, |
| "step": 1130 |
| }, |
| { |
| "epoch": 18.096, |
| "grad_norm": 0.6733874678611755, |
| "learning_rate": 4.040463553360431e-06, |
| "loss": 0.0039, |
| "step": 1140 |
| }, |
| { |
| "epoch": 18.256, |
| "grad_norm": 0.17421452701091766, |
| "learning_rate": 3.861147143509754e-06, |
| "loss": 0.0023, |
| "step": 1150 |
| }, |
| { |
| "epoch": 18.416, |
| "grad_norm": 0.15809208154678345, |
| "learning_rate": 3.6849440281194813e-06, |
| "loss": 0.006, |
| "step": 1160 |
| }, |
| { |
| "epoch": 18.576, |
| "grad_norm": 0.06922920793294907, |
| "learning_rate": 3.5119435678887328e-06, |
| "loss": 0.0023, |
| "step": 1170 |
| }, |
| { |
| "epoch": 18.736, |
| "grad_norm": 0.08193696290254593, |
| "learning_rate": 3.342233499302985e-06, |
| "loss": 0.003, |
| "step": 1180 |
| }, |
| { |
| "epoch": 18.896, |
| "grad_norm": 0.0757126435637474, |
| "learning_rate": 3.175899890138858e-06, |
| "loss": 0.002, |
| "step": 1190 |
| }, |
| { |
| "epoch": 19.048, |
| "grad_norm": 0.057399798184633255, |
| "learning_rate": 3.0130270958152196e-06, |
| "loss": 0.0022, |
| "step": 1200 |
| }, |
| { |
| "epoch": 19.208, |
| "grad_norm": 0.068113774061203, |
| "learning_rate": 2.8536977166126234e-06, |
| "loss": 0.0022, |
| "step": 1210 |
| }, |
| { |
| "epoch": 19.368, |
| "grad_norm": 0.06517008692026138, |
| "learning_rate": 2.697992555782969e-06, |
| "loss": 0.0016, |
| "step": 1220 |
| }, |
| { |
| "epoch": 19.528, |
| "grad_norm": 0.07533544301986694, |
| "learning_rate": 2.545990578570404e-06, |
| "loss": 0.0015, |
| "step": 1230 |
| }, |
| { |
| "epoch": 19.688, |
| "grad_norm": 0.08159100264310837, |
| "learning_rate": 2.397768872164462e-06, |
| "loss": 0.0018, |
| "step": 1240 |
| }, |
| { |
| "epoch": 19.848, |
| "grad_norm": 0.05212102085351944, |
| "learning_rate": 2.253402606605577e-06, |
| "loss": 0.0014, |
| "step": 1250 |
| }, |
| { |
| "epoch": 20.0, |
| "grad_norm": 0.038643430918455124, |
| "learning_rate": 2.1129649966629185e-06, |
| "loss": 0.0013, |
| "step": 1260 |
| }, |
| { |
| "epoch": 20.16, |
| "grad_norm": 0.040117453783750534, |
| "learning_rate": 1.9765272647038038e-06, |
| "loss": 0.0013, |
| "step": 1270 |
| }, |
| { |
| "epoch": 20.32, |
| "grad_norm": 0.03363404422998428, |
| "learning_rate": 1.8441586045735737e-06, |
| "loss": 0.0011, |
| "step": 1280 |
| }, |
| { |
| "epoch": 20.48, |
| "grad_norm": 0.055696483701467514, |
| "learning_rate": 1.7159261465041954e-06, |
| "loss": 0.0013, |
| "step": 1290 |
| }, |
| { |
| "epoch": 20.64, |
| "grad_norm": 0.0553043931722641, |
| "learning_rate": 1.5918949230694635e-06, |
| "loss": 0.0014, |
| "step": 1300 |
| }, |
| { |
| "epoch": 20.8, |
| "grad_norm": 0.049317434430122375, |
| "learning_rate": 1.4721278362039626e-06, |
| "loss": 0.0011, |
| "step": 1310 |
| }, |
| { |
| "epoch": 20.96, |
| "grad_norm": 0.07064161449670792, |
| "learning_rate": 1.356685625302625e-06, |
| "loss": 0.0012, |
| "step": 1320 |
| }, |
| { |
| "epoch": 21.112, |
| "grad_norm": 0.0384482778608799, |
| "learning_rate": 1.2456268364169853e-06, |
| "loss": 0.0011, |
| "step": 1330 |
| }, |
| { |
| "epoch": 21.272, |
| "grad_norm": 0.04504753276705742, |
| "learning_rate": 1.1390077925637865e-06, |
| "loss": 0.0011, |
| "step": 1340 |
| }, |
| { |
| "epoch": 21.432, |
| "grad_norm": 0.04046454280614853, |
| "learning_rate": 1.0368825651609893e-06, |
| "loss": 0.001, |
| "step": 1350 |
| }, |
| { |
| "epoch": 21.592, |
| "grad_norm": 0.04408493638038635, |
| "learning_rate": 9.393029466056714e-07, |
| "loss": 0.0012, |
| "step": 1360 |
| }, |
| { |
| "epoch": 21.752, |
| "grad_norm": 0.03646273910999298, |
| "learning_rate": 8.463184240077172e-07, |
| "loss": 0.0012, |
| "step": 1370 |
| }, |
| { |
| "epoch": 21.912, |
| "grad_norm": 0.03203440457582474, |
| "learning_rate": 7.579761540926434e-07, |
| "loss": 0.0011, |
| "step": 1380 |
| }, |
| { |
| "epoch": 22.064, |
| "grad_norm": 0.03588934242725372, |
| "learning_rate": 6.743209392862349e-07, |
| "loss": 0.001, |
| "step": 1390 |
| }, |
| { |
| "epoch": 22.224, |
| "grad_norm": 0.0342290997505188, |
| "learning_rate": 5.953952049931999e-07, |
| "loss": 0.0011, |
| "step": 1400 |
| }, |
| { |
| "epoch": 22.384, |
| "grad_norm": 0.036632440984249115, |
| "learning_rate": 5.212389780812733e-07, |
| "loss": 0.001, |
| "step": 1410 |
| }, |
| { |
| "epoch": 22.544, |
| "grad_norm": 0.03759520500898361, |
| "learning_rate": 4.518898665817695e-07, |
| "loss": 0.0011, |
| "step": 1420 |
| }, |
| { |
| "epoch": 22.704, |
| "grad_norm": 0.03835231438279152, |
| "learning_rate": 3.8738304061681107e-07, |
| "loss": 0.0011, |
| "step": 1430 |
| }, |
| { |
| "epoch": 22.864, |
| "grad_norm": 0.042444001883268356, |
| "learning_rate": 3.2775121456295024e-07, |
| "loss": 0.0011, |
| "step": 1440 |
| }, |
| { |
| "epoch": 23.016, |
| "grad_norm": 0.033434733748435974, |
| "learning_rate": 2.730246304601991e-07, |
| "loss": 0.001, |
| "step": 1450 |
| }, |
| { |
| "epoch": 23.176, |
| "grad_norm": 0.03470597416162491, |
| "learning_rate": 2.2323104267490404e-07, |
| "loss": 0.0011, |
| "step": 1460 |
| }, |
| { |
| "epoch": 23.336, |
| "grad_norm": 0.04532945156097412, |
| "learning_rate": 1.783957038242279e-07, |
| "loss": 0.001, |
| "step": 1470 |
| }, |
| { |
| "epoch": 23.496, |
| "grad_norm": 0.035716019570827484, |
| "learning_rate": 1.3854135196939345e-07, |
| "loss": 0.001, |
| "step": 1480 |
| }, |
| { |
| "epoch": 23.656, |
| "grad_norm": 0.03435162454843521, |
| "learning_rate": 1.0368819908415983e-07, |
| "loss": 0.0011, |
| "step": 1490 |
| }, |
| { |
| "epoch": 23.816, |
| "grad_norm": 0.04788799211382866, |
| "learning_rate": 7.385392080440535e-08, |
| "loss": 0.0011, |
| "step": 1500 |
| }, |
| { |
| "epoch": 23.976, |
| "grad_norm": 0.037617627531290054, |
| "learning_rate": 4.905364746400021e-08, |
| "loss": 0.0011, |
| "step": 1510 |
| }, |
| { |
| "epoch": 24.128, |
| "grad_norm": 0.04006591811776161, |
| "learning_rate": 2.929995642151906e-08, |
| "loss": 0.001, |
| "step": 1520 |
| }, |
| { |
| "epoch": 24.288, |
| "grad_norm": 0.03150051832199097, |
| "learning_rate": 1.4602865681682122e-08, |
| "loss": 0.001, |
| "step": 1530 |
| }, |
| { |
| "epoch": 24.448, |
| "grad_norm": 0.04720960184931755, |
| "learning_rate": 4.969828814767042e-09, |
| "loss": 0.001, |
| "step": 1540 |
| }, |
| { |
| "epoch": 24.608, |
| "grad_norm": 0.0407867431640625, |
| "learning_rate": 4.0573117655595684e-10, |
| "loss": 0.001, |
| "step": 1550 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1550, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 25, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.324081921088553e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|