| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.984819734345351, | |
| "eval_steps": 500, | |
| "global_step": 10500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003795066413662239, | |
| "grad_norm": 1.7131669521331787, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.6804, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007590132827324478, | |
| "grad_norm": 1.7053213119506836, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.6854, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011385199240986717, | |
| "grad_norm": 1.8932372331619263, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6597, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.015180265654648957, | |
| "grad_norm": 1.516872763633728, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.6243, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.018975332068311195, | |
| "grad_norm": 1.5149081945419312, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.5632, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.022770398481973434, | |
| "grad_norm": 1.0706552267074585, | |
| "learning_rate": 1e-05, | |
| "loss": 0.5289, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.026565464895635674, | |
| "grad_norm": 1.102160930633545, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "loss": 0.5263, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.030360531309297913, | |
| "grad_norm": 1.2059059143066406, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.5466, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03415559772296015, | |
| "grad_norm": 1.0622307062149048, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.4918, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03795066413662239, | |
| "grad_norm": 1.5696407556533813, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.516, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.04174573055028463, | |
| "grad_norm": 1.49858820438385, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "loss": 0.5024, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.04554079696394687, | |
| "grad_norm": 1.5996527671813965, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4775, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.04933586337760911, | |
| "grad_norm": 1.6391699314117432, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "loss": 0.5028, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05313092979127135, | |
| "grad_norm": 1.5045441389083862, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.472, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.056925996204933584, | |
| "grad_norm": 1.1791646480560303, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.4606, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06072106261859583, | |
| "grad_norm": 1.3659300804138184, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.527, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06451612903225806, | |
| "grad_norm": 0.9830155968666077, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "loss": 0.458, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.0683111954459203, | |
| "grad_norm": 1.6211776733398438, | |
| "learning_rate": 3e-05, | |
| "loss": 0.4613, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07210626185958255, | |
| "grad_norm": 1.9507710933685303, | |
| "learning_rate": 3.1666666666666666e-05, | |
| "loss": 0.4531, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07590132827324478, | |
| "grad_norm": 1.312615156173706, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.4384, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07969639468690702, | |
| "grad_norm": 2.034919261932373, | |
| "learning_rate": 3.5e-05, | |
| "loss": 0.4747, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08349146110056926, | |
| "grad_norm": 2.045759677886963, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.4153, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.0872865275142315, | |
| "grad_norm": 2.0934813022613525, | |
| "learning_rate": 3.8333333333333334e-05, | |
| "loss": 0.3829, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.09108159392789374, | |
| "grad_norm": 2.4255552291870117, | |
| "learning_rate": 4e-05, | |
| "loss": 0.3816, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09487666034155598, | |
| "grad_norm": 1.42184579372406, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.3948, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09867172675521822, | |
| "grad_norm": 1.6787000894546509, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.3877, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10246679316888045, | |
| "grad_norm": 2.121290445327759, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.3732, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1062618595825427, | |
| "grad_norm": 1.5, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.3567, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.11005692599620494, | |
| "grad_norm": 3.0193252563476562, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "loss": 0.3916, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11385199240986717, | |
| "grad_norm": 2.7301666736602783, | |
| "learning_rate": 5e-05, | |
| "loss": 0.3723, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11764705882352941, | |
| "grad_norm": 1.8423070907592773, | |
| "learning_rate": 4.9951171875e-05, | |
| "loss": 0.3214, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.12144212523719165, | |
| "grad_norm": 1.204102873802185, | |
| "learning_rate": 4.990234375e-05, | |
| "loss": 0.3251, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.1252371916508539, | |
| "grad_norm": 1.803913950920105, | |
| "learning_rate": 4.9853515625000005e-05, | |
| "loss": 0.3942, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.12903225806451613, | |
| "grad_norm": 3.175114154815674, | |
| "learning_rate": 4.9804687500000004e-05, | |
| "loss": 0.39, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.13282732447817835, | |
| "grad_norm": 2.4476590156555176, | |
| "learning_rate": 4.9755859375e-05, | |
| "loss": 0.349, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.1366223908918406, | |
| "grad_norm": 1.2592339515686035, | |
| "learning_rate": 4.970703125e-05, | |
| "loss": 0.3315, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.14041745730550284, | |
| "grad_norm": 1.6238622665405273, | |
| "learning_rate": 4.9658203125e-05, | |
| "loss": 0.3307, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1442125237191651, | |
| "grad_norm": 1.3984373807907104, | |
| "learning_rate": 4.9609375000000005e-05, | |
| "loss": 0.294, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.14800759013282733, | |
| "grad_norm": 3.1960623264312744, | |
| "learning_rate": 4.9560546875e-05, | |
| "loss": 0.3314, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.15180265654648956, | |
| "grad_norm": 1.5345971584320068, | |
| "learning_rate": 4.951171875e-05, | |
| "loss": 0.3438, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1555977229601518, | |
| "grad_norm": 3.1037323474884033, | |
| "learning_rate": 4.9462890625e-05, | |
| "loss": 0.3246, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15939278937381404, | |
| "grad_norm": 3.519519805908203, | |
| "learning_rate": 4.94140625e-05, | |
| "loss": 0.3087, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.16318785578747627, | |
| "grad_norm": 1.347273826599121, | |
| "learning_rate": 4.9365234375000005e-05, | |
| "loss": 0.3303, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16698292220113853, | |
| "grad_norm": 1.2372374534606934, | |
| "learning_rate": 4.931640625e-05, | |
| "loss": 0.3225, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.17077798861480076, | |
| "grad_norm": 0.9122889637947083, | |
| "learning_rate": 4.9267578125e-05, | |
| "loss": 0.3081, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.174573055028463, | |
| "grad_norm": 3.7750535011291504, | |
| "learning_rate": 4.921875e-05, | |
| "loss": 0.2785, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.17836812144212524, | |
| "grad_norm": 1.0529924631118774, | |
| "learning_rate": 4.9169921875000006e-05, | |
| "loss": 0.283, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.18216318785578747, | |
| "grad_norm": 1.5323132276535034, | |
| "learning_rate": 4.9121093750000004e-05, | |
| "loss": 0.2982, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.1859582542694497, | |
| "grad_norm": 1.1751055717468262, | |
| "learning_rate": 4.9072265625e-05, | |
| "loss": 0.2639, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.18975332068311196, | |
| "grad_norm": 1.0208653211593628, | |
| "learning_rate": 4.90234375e-05, | |
| "loss": 0.2651, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1935483870967742, | |
| "grad_norm": 1.7089987993240356, | |
| "learning_rate": 4.8974609375e-05, | |
| "loss": 0.2572, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.19734345351043645, | |
| "grad_norm": 4.918070316314697, | |
| "learning_rate": 4.8925781250000006e-05, | |
| "loss": 0.299, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.20113851992409867, | |
| "grad_norm": 1.117162823677063, | |
| "learning_rate": 4.8876953125000004e-05, | |
| "loss": 0.2699, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.2049335863377609, | |
| "grad_norm": 1.813411831855774, | |
| "learning_rate": 4.8828125e-05, | |
| "loss": 0.2391, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.20872865275142316, | |
| "grad_norm": 3.368643283843994, | |
| "learning_rate": 4.8779296875e-05, | |
| "loss": 0.3022, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.2125237191650854, | |
| "grad_norm": 16.486289978027344, | |
| "learning_rate": 4.873046875e-05, | |
| "loss": 0.2837, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21631878557874762, | |
| "grad_norm": 1.3590037822723389, | |
| "learning_rate": 4.8681640625000005e-05, | |
| "loss": 0.2182, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.22011385199240988, | |
| "grad_norm": 1.8672986030578613, | |
| "learning_rate": 4.8632812500000004e-05, | |
| "loss": 0.2925, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.2239089184060721, | |
| "grad_norm": 2.350752592086792, | |
| "learning_rate": 4.8583984375e-05, | |
| "loss": 0.2585, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.22770398481973433, | |
| "grad_norm": 2.4918649196624756, | |
| "learning_rate": 4.853515625e-05, | |
| "loss": 0.2824, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.2314990512333966, | |
| "grad_norm": 2.4856553077697754, | |
| "learning_rate": 4.8486328125e-05, | |
| "loss": 0.2444, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.23529411764705882, | |
| "grad_norm": 1.87199866771698, | |
| "learning_rate": 4.8437500000000005e-05, | |
| "loss": 0.256, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.23908918406072105, | |
| "grad_norm": 1.0694291591644287, | |
| "learning_rate": 4.8388671875000004e-05, | |
| "loss": 0.245, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2428842504743833, | |
| "grad_norm": 0.7904035449028015, | |
| "learning_rate": 4.833984375e-05, | |
| "loss": 0.2588, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.24667931688804554, | |
| "grad_norm": 2.714871883392334, | |
| "learning_rate": 4.8291015625e-05, | |
| "loss": 0.2741, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.2504743833017078, | |
| "grad_norm": 3.948547124862671, | |
| "learning_rate": 4.82421875e-05, | |
| "loss": 0.2335, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.25426944971537, | |
| "grad_norm": 1.6354694366455078, | |
| "learning_rate": 4.8193359375000005e-05, | |
| "loss": 0.2298, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.25806451612903225, | |
| "grad_norm": 1.1305994987487793, | |
| "learning_rate": 4.8144531250000003e-05, | |
| "loss": 0.2279, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.2618595825426945, | |
| "grad_norm": 1.804825782775879, | |
| "learning_rate": 4.8095703125e-05, | |
| "loss": 0.2401, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.2656546489563567, | |
| "grad_norm": 1.0778950452804565, | |
| "learning_rate": 4.8046875e-05, | |
| "loss": 0.2498, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.269449715370019, | |
| "grad_norm": 2.672403335571289, | |
| "learning_rate": 4.7998046875e-05, | |
| "loss": 0.2521, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.2732447817836812, | |
| "grad_norm": 1.0559144020080566, | |
| "learning_rate": 4.7949218750000005e-05, | |
| "loss": 0.1855, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.27703984819734345, | |
| "grad_norm": 1.3226491212844849, | |
| "learning_rate": 4.7900390625e-05, | |
| "loss": 0.21, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.2808349146110057, | |
| "grad_norm": 2.1266074180603027, | |
| "learning_rate": 4.78515625e-05, | |
| "loss": 0.2232, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2846299810246679, | |
| "grad_norm": 2.9967539310455322, | |
| "learning_rate": 4.7802734375e-05, | |
| "loss": 0.2554, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2884250474383302, | |
| "grad_norm": 2.6614627838134766, | |
| "learning_rate": 4.775390625e-05, | |
| "loss": 0.2811, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2922201138519924, | |
| "grad_norm": 1.64667546749115, | |
| "learning_rate": 4.7705078125000004e-05, | |
| "loss": 0.2102, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.29601518026565465, | |
| "grad_norm": 2.339608669281006, | |
| "learning_rate": 4.765625e-05, | |
| "loss": 0.2125, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2998102466793169, | |
| "grad_norm": 1.6804083585739136, | |
| "learning_rate": 4.7607421875e-05, | |
| "loss": 0.2722, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.3036053130929791, | |
| "grad_norm": 2.6005263328552246, | |
| "learning_rate": 4.755859375e-05, | |
| "loss": 0.2067, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.30740037950664134, | |
| "grad_norm": 5.113396167755127, | |
| "learning_rate": 4.7509765625000006e-05, | |
| "loss": 0.1988, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.3111954459203036, | |
| "grad_norm": 1.9176031351089478, | |
| "learning_rate": 4.7460937500000004e-05, | |
| "loss": 0.2416, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.31499051233396586, | |
| "grad_norm": 1.5946362018585205, | |
| "learning_rate": 4.7412109375e-05, | |
| "loss": 0.2416, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.3187855787476281, | |
| "grad_norm": 1.6692804098129272, | |
| "learning_rate": 4.736328125e-05, | |
| "loss": 0.2139, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.3225806451612903, | |
| "grad_norm": 4.5298285484313965, | |
| "learning_rate": 4.7314453125e-05, | |
| "loss": 0.2285, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.32637571157495254, | |
| "grad_norm": 1.9948817491531372, | |
| "learning_rate": 4.7265625000000005e-05, | |
| "loss": 0.2453, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3301707779886148, | |
| "grad_norm": 2.5353565216064453, | |
| "learning_rate": 4.7216796875000004e-05, | |
| "loss": 0.2259, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.33396584440227706, | |
| "grad_norm": 5.23643684387207, | |
| "learning_rate": 4.716796875e-05, | |
| "loss": 0.2318, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3377609108159393, | |
| "grad_norm": 3.062701463699341, | |
| "learning_rate": 4.7119140625e-05, | |
| "loss": 0.1835, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3415559772296015, | |
| "grad_norm": 1.5771597623825073, | |
| "learning_rate": 4.70703125e-05, | |
| "loss": 0.2195, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.34535104364326374, | |
| "grad_norm": 0.9039077162742615, | |
| "learning_rate": 4.7021484375000005e-05, | |
| "loss": 0.1545, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.349146110056926, | |
| "grad_norm": 2.7035298347473145, | |
| "learning_rate": 4.6972656250000004e-05, | |
| "loss": 0.2221, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.35294117647058826, | |
| "grad_norm": 2.3225386142730713, | |
| "learning_rate": 4.6923828125e-05, | |
| "loss": 0.1912, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.3567362428842505, | |
| "grad_norm": 1.1066793203353882, | |
| "learning_rate": 4.6875e-05, | |
| "loss": 0.2003, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.3605313092979127, | |
| "grad_norm": 1.2358715534210205, | |
| "learning_rate": 4.6826171875e-05, | |
| "loss": 0.1944, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.36432637571157495, | |
| "grad_norm": 0.5866732001304626, | |
| "learning_rate": 4.6777343750000005e-05, | |
| "loss": 0.1885, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.3681214421252372, | |
| "grad_norm": 1.436168909072876, | |
| "learning_rate": 4.6728515625000004e-05, | |
| "loss": 0.182, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.3719165085388994, | |
| "grad_norm": 1.5037955045700073, | |
| "learning_rate": 4.66796875e-05, | |
| "loss": 0.2024, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3757115749525617, | |
| "grad_norm": 1.4837393760681152, | |
| "learning_rate": 4.6630859375e-05, | |
| "loss": 0.2249, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.3795066413662239, | |
| "grad_norm": 12.082221031188965, | |
| "learning_rate": 4.658203125e-05, | |
| "loss": 0.2191, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.38330170777988615, | |
| "grad_norm": 0.7743262648582458, | |
| "learning_rate": 4.6533203125000005e-05, | |
| "loss": 0.1654, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.3870967741935484, | |
| "grad_norm": 3.7393670082092285, | |
| "learning_rate": 4.6484375e-05, | |
| "loss": 0.1595, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3908918406072106, | |
| "grad_norm": 1.2153229713439941, | |
| "learning_rate": 4.6435546875e-05, | |
| "loss": 0.2276, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3946869070208729, | |
| "grad_norm": 0.9271629452705383, | |
| "learning_rate": 4.638671875e-05, | |
| "loss": 0.2039, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3984819734345351, | |
| "grad_norm": 1.0829685926437378, | |
| "learning_rate": 4.6337890625e-05, | |
| "loss": 0.1731, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.40227703984819735, | |
| "grad_norm": 1.2705596685409546, | |
| "learning_rate": 4.6289062500000005e-05, | |
| "loss": 0.1359, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.4060721062618596, | |
| "grad_norm": 4.376911163330078, | |
| "learning_rate": 4.6240234375e-05, | |
| "loss": 0.2095, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.4098671726755218, | |
| "grad_norm": 2.1292335987091064, | |
| "learning_rate": 4.619140625e-05, | |
| "loss": 0.1916, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.41366223908918404, | |
| "grad_norm": 1.6525979042053223, | |
| "learning_rate": 4.6142578125e-05, | |
| "loss": 0.173, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.4174573055028463, | |
| "grad_norm": 4.228000164031982, | |
| "learning_rate": 4.609375e-05, | |
| "loss": 0.2117, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.42125237191650855, | |
| "grad_norm": 5.334222316741943, | |
| "learning_rate": 4.6044921875000004e-05, | |
| "loss": 0.185, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.4250474383301708, | |
| "grad_norm": 1.7326403856277466, | |
| "learning_rate": 4.599609375e-05, | |
| "loss": 0.1875, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.428842504743833, | |
| "grad_norm": 2.4292402267456055, | |
| "learning_rate": 4.5947265625e-05, | |
| "loss": 0.1747, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.43263757115749524, | |
| "grad_norm": 1.6561298370361328, | |
| "learning_rate": 4.58984375e-05, | |
| "loss": 0.2017, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4364326375711575, | |
| "grad_norm": 2.659874439239502, | |
| "learning_rate": 4.5849609375000005e-05, | |
| "loss": 0.2415, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.44022770398481975, | |
| "grad_norm": 2.743425130844116, | |
| "learning_rate": 4.5800781250000004e-05, | |
| "loss": 0.2332, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.444022770398482, | |
| "grad_norm": 2.3197848796844482, | |
| "learning_rate": 4.5751953125e-05, | |
| "loss": 0.1946, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.4478178368121442, | |
| "grad_norm": 2.110534191131592, | |
| "learning_rate": 4.5703125e-05, | |
| "loss": 0.1948, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.45161290322580644, | |
| "grad_norm": 1.3609685897827148, | |
| "learning_rate": 4.5654296875e-05, | |
| "loss": 0.1801, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.45540796963946867, | |
| "grad_norm": 3.159426689147949, | |
| "learning_rate": 4.5605468750000005e-05, | |
| "loss": 0.2184, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.45920303605313095, | |
| "grad_norm": 1.7927987575531006, | |
| "learning_rate": 4.5556640625000004e-05, | |
| "loss": 0.1604, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4629981024667932, | |
| "grad_norm": 1.5928328037261963, | |
| "learning_rate": 4.55078125e-05, | |
| "loss": 0.1693, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.4667931688804554, | |
| "grad_norm": 0.8145284056663513, | |
| "learning_rate": 4.5458984375e-05, | |
| "loss": 0.1761, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.47058823529411764, | |
| "grad_norm": 0.7765156030654907, | |
| "learning_rate": 4.541015625e-05, | |
| "loss": 0.1799, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.47438330170777987, | |
| "grad_norm": 1.8456169366836548, | |
| "learning_rate": 4.5361328125000005e-05, | |
| "loss": 0.168, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4781783681214421, | |
| "grad_norm": 1.6953251361846924, | |
| "learning_rate": 4.5312500000000004e-05, | |
| "loss": 0.1945, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4819734345351044, | |
| "grad_norm": 1.5285083055496216, | |
| "learning_rate": 4.5263671875e-05, | |
| "loss": 0.2075, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.4857685009487666, | |
| "grad_norm": 2.95650577545166, | |
| "learning_rate": 4.521484375e-05, | |
| "loss": 0.1601, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.48956356736242884, | |
| "grad_norm": 0.7677034735679626, | |
| "learning_rate": 4.5166015625e-05, | |
| "loss": 0.1695, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.49335863377609107, | |
| "grad_norm": 1.9959975481033325, | |
| "learning_rate": 4.5117187500000005e-05, | |
| "loss": 0.2183, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4971537001897533, | |
| "grad_norm": 1.8000417947769165, | |
| "learning_rate": 4.5068359375000003e-05, | |
| "loss": 0.175, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.5009487666034156, | |
| "grad_norm": 1.400612473487854, | |
| "learning_rate": 4.501953125e-05, | |
| "loss": 0.2085, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.5047438330170778, | |
| "grad_norm": 1.6406989097595215, | |
| "learning_rate": 4.4970703125e-05, | |
| "loss": 0.1537, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.50853889943074, | |
| "grad_norm": 2.0849852561950684, | |
| "learning_rate": 4.4921875e-05, | |
| "loss": 0.1579, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5123339658444023, | |
| "grad_norm": 2.6497225761413574, | |
| "learning_rate": 4.4873046875000005e-05, | |
| "loss": 0.1888, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5161290322580645, | |
| "grad_norm": 2.2594399452209473, | |
| "learning_rate": 4.482421875e-05, | |
| "loss": 0.1645, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5199240986717267, | |
| "grad_norm": 1.3591111898422241, | |
| "learning_rate": 4.4775390625e-05, | |
| "loss": 0.1876, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.523719165085389, | |
| "grad_norm": 5.060487747192383, | |
| "learning_rate": 4.47265625e-05, | |
| "loss": 0.1946, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.5275142314990512, | |
| "grad_norm": 1.7694716453552246, | |
| "learning_rate": 4.4677734375e-05, | |
| "loss": 0.0966, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5313092979127134, | |
| "grad_norm": 2.8661625385284424, | |
| "learning_rate": 4.4628906250000004e-05, | |
| "loss": 0.1614, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5351043643263758, | |
| "grad_norm": 2.2955727577209473, | |
| "learning_rate": 4.4580078125e-05, | |
| "loss": 0.193, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.538899430740038, | |
| "grad_norm": 1.4596924781799316, | |
| "learning_rate": 4.453125e-05, | |
| "loss": 0.1971, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5426944971537002, | |
| "grad_norm": 1.039890170097351, | |
| "learning_rate": 4.4482421875e-05, | |
| "loss": 0.1909, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5464895635673624, | |
| "grad_norm": 1.433979868888855, | |
| "learning_rate": 4.443359375e-05, | |
| "loss": 0.1832, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5502846299810247, | |
| "grad_norm": 1.306391954421997, | |
| "learning_rate": 4.4384765625000004e-05, | |
| "loss": 0.1867, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5540796963946869, | |
| "grad_norm": 1.2681069374084473, | |
| "learning_rate": 4.43359375e-05, | |
| "loss": 0.1506, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5578747628083491, | |
| "grad_norm": 3.947502613067627, | |
| "learning_rate": 4.4287109375e-05, | |
| "loss": 0.1343, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5616698292220114, | |
| "grad_norm": 4.928821563720703, | |
| "learning_rate": 4.423828125e-05, | |
| "loss": 0.2057, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.5654648956356736, | |
| "grad_norm": 2.162473201751709, | |
| "learning_rate": 4.4189453125000005e-05, | |
| "loss": 0.1942, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5692599620493358, | |
| "grad_norm": 5.402246475219727, | |
| "learning_rate": 4.4140625000000004e-05, | |
| "loss": 0.1727, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.573055028462998, | |
| "grad_norm": 0.2728889286518097, | |
| "learning_rate": 4.4091796875e-05, | |
| "loss": 0.1345, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5768500948766604, | |
| "grad_norm": 2.027841567993164, | |
| "learning_rate": 4.404296875e-05, | |
| "loss": 0.213, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5806451612903226, | |
| "grad_norm": 1.3224737644195557, | |
| "learning_rate": 4.3994140625e-05, | |
| "loss": 0.1735, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5844402277039848, | |
| "grad_norm": 2.3124992847442627, | |
| "learning_rate": 4.3945312500000005e-05, | |
| "loss": 0.2177, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 1.2521787881851196, | |
| "learning_rate": 4.3896484375000004e-05, | |
| "loss": 0.1332, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5920303605313093, | |
| "grad_norm": 2.5216283798217773, | |
| "learning_rate": 4.384765625e-05, | |
| "loss": 0.1318, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5958254269449715, | |
| "grad_norm": 1.8268439769744873, | |
| "learning_rate": 4.3798828125e-05, | |
| "loss": 0.1269, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.5996204933586338, | |
| "grad_norm": 0.6268766522407532, | |
| "learning_rate": 4.375e-05, | |
| "loss": 0.1381, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.603415559772296, | |
| "grad_norm": 1.979546308517456, | |
| "learning_rate": 4.3701171875000005e-05, | |
| "loss": 0.1351, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.6072106261859582, | |
| "grad_norm": 1.5526436567306519, | |
| "learning_rate": 4.3652343750000004e-05, | |
| "loss": 0.2163, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.6110056925996205, | |
| "grad_norm": 0.9428083896636963, | |
| "learning_rate": 4.3603515625e-05, | |
| "loss": 0.1398, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6148007590132827, | |
| "grad_norm": 2.1224870681762695, | |
| "learning_rate": 4.35546875e-05, | |
| "loss": 0.1891, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.618595825426945, | |
| "grad_norm": 0.3401525914669037, | |
| "learning_rate": 4.3505859375e-05, | |
| "loss": 0.1068, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6223908918406073, | |
| "grad_norm": 1.1070092916488647, | |
| "learning_rate": 4.3457031250000005e-05, | |
| "loss": 0.1407, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6261859582542695, | |
| "grad_norm": 1.1588579416275024, | |
| "learning_rate": 4.3408203125e-05, | |
| "loss": 0.2238, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6299810246679317, | |
| "grad_norm": 1.3201090097427368, | |
| "learning_rate": 4.3359375e-05, | |
| "loss": 0.2135, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.6337760910815939, | |
| "grad_norm": 1.2257441282272339, | |
| "learning_rate": 4.3310546875e-05, | |
| "loss": 0.1261, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6375711574952562, | |
| "grad_norm": 1.4213567972183228, | |
| "learning_rate": 4.326171875e-05, | |
| "loss": 0.1439, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6413662239089184, | |
| "grad_norm": 1.0983916521072388, | |
| "learning_rate": 4.3212890625000004e-05, | |
| "loss": 0.1356, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6451612903225806, | |
| "grad_norm": 1.6485854387283325, | |
| "learning_rate": 4.31640625e-05, | |
| "loss": 0.1549, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6489563567362429, | |
| "grad_norm": 5.49334716796875, | |
| "learning_rate": 4.3115234375e-05, | |
| "loss": 0.1519, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6527514231499051, | |
| "grad_norm": 0.26703280210494995, | |
| "learning_rate": 4.306640625e-05, | |
| "loss": 0.1499, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6565464895635673, | |
| "grad_norm": 1.5822151899337769, | |
| "learning_rate": 4.3017578125e-05, | |
| "loss": 0.1733, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6603415559772297, | |
| "grad_norm": 1.1510590314865112, | |
| "learning_rate": 4.2968750000000004e-05, | |
| "loss": 0.1665, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6641366223908919, | |
| "grad_norm": 2.48427152633667, | |
| "learning_rate": 4.2919921875e-05, | |
| "loss": 0.1598, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6679316888045541, | |
| "grad_norm": 2.0076019763946533, | |
| "learning_rate": 4.287109375e-05, | |
| "loss": 0.1642, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.6717267552182163, | |
| "grad_norm": 2.1611413955688477, | |
| "learning_rate": 4.2822265625e-05, | |
| "loss": 0.1538, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6755218216318786, | |
| "grad_norm": 2.476008415222168, | |
| "learning_rate": 4.27734375e-05, | |
| "loss": 0.1193, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6793168880455408, | |
| "grad_norm": 2.426025867462158, | |
| "learning_rate": 4.2724609375000004e-05, | |
| "loss": 0.161, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.683111954459203, | |
| "grad_norm": 2.2168385982513428, | |
| "learning_rate": 4.267578125e-05, | |
| "loss": 0.1429, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6869070208728653, | |
| "grad_norm": 1.63054358959198, | |
| "learning_rate": 4.2626953125e-05, | |
| "loss": 0.1561, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6907020872865275, | |
| "grad_norm": 5.170077323913574, | |
| "learning_rate": 4.2578125e-05, | |
| "loss": 0.1685, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6944971537001897, | |
| "grad_norm": 2.700263023376465, | |
| "learning_rate": 4.2529296875000005e-05, | |
| "loss": 0.1601, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.698292220113852, | |
| "grad_norm": 1.6965094804763794, | |
| "learning_rate": 4.2480468750000004e-05, | |
| "loss": 0.1046, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.7020872865275142, | |
| "grad_norm": 5.461817264556885, | |
| "learning_rate": 4.2431640625e-05, | |
| "loss": 0.1421, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.7058823529411765, | |
| "grad_norm": 1.584050178527832, | |
| "learning_rate": 4.23828125e-05, | |
| "loss": 0.1781, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.7096774193548387, | |
| "grad_norm": 2.42586088180542, | |
| "learning_rate": 4.2333984375e-05, | |
| "loss": 0.1274, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.713472485768501, | |
| "grad_norm": 3.151433229446411, | |
| "learning_rate": 4.2285156250000005e-05, | |
| "loss": 0.1825, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7172675521821632, | |
| "grad_norm": 1.1808427572250366, | |
| "learning_rate": 4.2236328125000004e-05, | |
| "loss": 0.2085, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7210626185958254, | |
| "grad_norm": 1.981814980506897, | |
| "learning_rate": 4.21875e-05, | |
| "loss": 0.1718, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7248576850094877, | |
| "grad_norm": 0.9719598293304443, | |
| "learning_rate": 4.2138671875e-05, | |
| "loss": 0.1461, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7286527514231499, | |
| "grad_norm": 1.493422031402588, | |
| "learning_rate": 4.208984375e-05, | |
| "loss": 0.1902, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7324478178368121, | |
| "grad_norm": 1.4552210569381714, | |
| "learning_rate": 4.2041015625000005e-05, | |
| "loss": 0.1253, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7362428842504743, | |
| "grad_norm": 2.0822556018829346, | |
| "learning_rate": 4.1992187500000003e-05, | |
| "loss": 0.144, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.7400379506641366, | |
| "grad_norm": 2.461090326309204, | |
| "learning_rate": 4.1943359375e-05, | |
| "loss": 0.2084, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7438330170777988, | |
| "grad_norm": 1.8043471574783325, | |
| "learning_rate": 4.189453125e-05, | |
| "loss": 0.1904, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7476280834914611, | |
| "grad_norm": 1.6388760805130005, | |
| "learning_rate": 4.1845703125e-05, | |
| "loss": 0.2071, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7514231499051234, | |
| "grad_norm": 2.5029492378234863, | |
| "learning_rate": 4.1796875000000005e-05, | |
| "loss": 0.1881, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7552182163187856, | |
| "grad_norm": 1.3092814683914185, | |
| "learning_rate": 4.1748046875e-05, | |
| "loss": 0.1356, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7590132827324478, | |
| "grad_norm": 1.2208425998687744, | |
| "learning_rate": 4.169921875e-05, | |
| "loss": 0.1378, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7628083491461101, | |
| "grad_norm": 3.214336633682251, | |
| "learning_rate": 4.1650390625e-05, | |
| "loss": 0.1954, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7666034155597723, | |
| "grad_norm": 4.104292392730713, | |
| "learning_rate": 4.16015625e-05, | |
| "loss": 0.1886, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7703984819734345, | |
| "grad_norm": 2.170186996459961, | |
| "learning_rate": 4.1552734375000004e-05, | |
| "loss": 0.1705, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.7741935483870968, | |
| "grad_norm": 2.6494083404541016, | |
| "learning_rate": 4.150390625e-05, | |
| "loss": 0.1986, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.777988614800759, | |
| "grad_norm": 0.7542719841003418, | |
| "learning_rate": 4.1455078125e-05, | |
| "loss": 0.1255, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7817836812144212, | |
| "grad_norm": 3.126569986343384, | |
| "learning_rate": 4.140625e-05, | |
| "loss": 0.1576, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7855787476280834, | |
| "grad_norm": 1.0665310621261597, | |
| "learning_rate": 4.1357421875e-05, | |
| "loss": 0.174, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7893738140417458, | |
| "grad_norm": 1.3480401039123535, | |
| "learning_rate": 4.1308593750000004e-05, | |
| "loss": 0.1203, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.793168880455408, | |
| "grad_norm": 2.358405113220215, | |
| "learning_rate": 4.1259765625e-05, | |
| "loss": 0.1394, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7969639468690702, | |
| "grad_norm": 3.2337498664855957, | |
| "learning_rate": 4.12109375e-05, | |
| "loss": 0.1711, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.8007590132827325, | |
| "grad_norm": 2.7708380222320557, | |
| "learning_rate": 4.1162109375e-05, | |
| "loss": 0.1265, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.8045540796963947, | |
| "grad_norm": 3.3023488521575928, | |
| "learning_rate": 4.1113281250000005e-05, | |
| "loss": 0.1706, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.8083491461100569, | |
| "grad_norm": 1.758325219154358, | |
| "learning_rate": 4.1064453125000004e-05, | |
| "loss": 0.1371, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.8121442125237192, | |
| "grad_norm": 1.5623672008514404, | |
| "learning_rate": 4.1015625e-05, | |
| "loss": 0.1756, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.8159392789373814, | |
| "grad_norm": 1.3145450353622437, | |
| "learning_rate": 4.0966796875e-05, | |
| "loss": 0.1328, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8197343453510436, | |
| "grad_norm": 2.432619094848633, | |
| "learning_rate": 4.091796875e-05, | |
| "loss": 0.1286, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8235294117647058, | |
| "grad_norm": 0.4147840142250061, | |
| "learning_rate": 4.0869140625000005e-05, | |
| "loss": 0.1509, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8273244781783681, | |
| "grad_norm": 1.6098836660385132, | |
| "learning_rate": 4.0820312500000004e-05, | |
| "loss": 0.1746, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8311195445920304, | |
| "grad_norm": 2.5355212688446045, | |
| "learning_rate": 4.0771484375e-05, | |
| "loss": 0.1238, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8349146110056926, | |
| "grad_norm": 1.5544086694717407, | |
| "learning_rate": 4.072265625e-05, | |
| "loss": 0.2168, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8387096774193549, | |
| "grad_norm": 2.1792962551116943, | |
| "learning_rate": 4.0673828125e-05, | |
| "loss": 0.1338, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.8425047438330171, | |
| "grad_norm": 2.667340040206909, | |
| "learning_rate": 4.0625000000000005e-05, | |
| "loss": 0.1505, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.8462998102466793, | |
| "grad_norm": 0.8551260232925415, | |
| "learning_rate": 4.0576171875000004e-05, | |
| "loss": 0.1081, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8500948766603416, | |
| "grad_norm": 2.8773763179779053, | |
| "learning_rate": 4.052734375e-05, | |
| "loss": 0.1089, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.8538899430740038, | |
| "grad_norm": 2.12497878074646, | |
| "learning_rate": 4.0478515625e-05, | |
| "loss": 0.1268, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.857685009487666, | |
| "grad_norm": 1.8039929866790771, | |
| "learning_rate": 4.04296875e-05, | |
| "loss": 0.1544, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.8614800759013282, | |
| "grad_norm": 0.4839627742767334, | |
| "learning_rate": 4.0380859375000005e-05, | |
| "loss": 0.1421, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8652751423149905, | |
| "grad_norm": 3.672240734100342, | |
| "learning_rate": 4.033203125e-05, | |
| "loss": 0.134, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8690702087286527, | |
| "grad_norm": 2.4371728897094727, | |
| "learning_rate": 4.0283203125e-05, | |
| "loss": 0.1419, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.872865275142315, | |
| "grad_norm": 1.8469904661178589, | |
| "learning_rate": 4.0234375e-05, | |
| "loss": 0.1846, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8766603415559773, | |
| "grad_norm": 0.7639700174331665, | |
| "learning_rate": 4.0185546875e-05, | |
| "loss": 0.106, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.8804554079696395, | |
| "grad_norm": 1.4450427293777466, | |
| "learning_rate": 4.0136718750000004e-05, | |
| "loss": 0.1408, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8842504743833017, | |
| "grad_norm": 1.3033993244171143, | |
| "learning_rate": 4.0087890625e-05, | |
| "loss": 0.1456, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.888045540796964, | |
| "grad_norm": 1.3045791387557983, | |
| "learning_rate": 4.00390625e-05, | |
| "loss": 0.1531, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8918406072106262, | |
| "grad_norm": 3.4357423782348633, | |
| "learning_rate": 3.9990234375e-05, | |
| "loss": 0.1417, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8956356736242884, | |
| "grad_norm": 3.5311038494110107, | |
| "learning_rate": 3.994140625e-05, | |
| "loss": 0.1317, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8994307400379506, | |
| "grad_norm": 4.028538227081299, | |
| "learning_rate": 3.9892578125000004e-05, | |
| "loss": 0.1644, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.9032258064516129, | |
| "grad_norm": 1.4089256525039673, | |
| "learning_rate": 3.984375e-05, | |
| "loss": 0.1087, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.9070208728652751, | |
| "grad_norm": 0.2230881005525589, | |
| "learning_rate": 3.9794921875e-05, | |
| "loss": 0.1387, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.9108159392789373, | |
| "grad_norm": 2.5647592544555664, | |
| "learning_rate": 3.974609375e-05, | |
| "loss": 0.1475, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.9146110056925996, | |
| "grad_norm": 1.2803542613983154, | |
| "learning_rate": 3.9697265625e-05, | |
| "loss": 0.126, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.9184060721062619, | |
| "grad_norm": 3.2023112773895264, | |
| "learning_rate": 3.9648437500000004e-05, | |
| "loss": 0.1458, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9222011385199241, | |
| "grad_norm": 3.615530252456665, | |
| "learning_rate": 3.9599609375e-05, | |
| "loss": 0.1297, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9259962049335864, | |
| "grad_norm": 3.396568536758423, | |
| "learning_rate": 3.955078125e-05, | |
| "loss": 0.1486, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9297912713472486, | |
| "grad_norm": 1.7030583620071411, | |
| "learning_rate": 3.9501953125e-05, | |
| "loss": 0.1464, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9335863377609108, | |
| "grad_norm": 1.0317497253417969, | |
| "learning_rate": 3.9453125000000005e-05, | |
| "loss": 0.1658, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.937381404174573, | |
| "grad_norm": 1.1268532276153564, | |
| "learning_rate": 3.9404296875000004e-05, | |
| "loss": 0.1425, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9411764705882353, | |
| "grad_norm": 0.9238561391830444, | |
| "learning_rate": 3.935546875e-05, | |
| "loss": 0.1565, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9449715370018975, | |
| "grad_norm": 1.4960806369781494, | |
| "learning_rate": 3.9306640625e-05, | |
| "loss": 0.1681, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.9487666034155597, | |
| "grad_norm": 1.306814193725586, | |
| "learning_rate": 3.92578125e-05, | |
| "loss": 0.1719, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.952561669829222, | |
| "grad_norm": 0.391342431306839, | |
| "learning_rate": 3.9208984375000005e-05, | |
| "loss": 0.1497, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.9563567362428842, | |
| "grad_norm": 1.9634449481964111, | |
| "learning_rate": 3.9160156250000004e-05, | |
| "loss": 0.124, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9601518026565465, | |
| "grad_norm": 2.7319021224975586, | |
| "learning_rate": 3.9111328125e-05, | |
| "loss": 0.1029, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9639468690702088, | |
| "grad_norm": 1.062157392501831, | |
| "learning_rate": 3.90625e-05, | |
| "loss": 0.1612, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.967741935483871, | |
| "grad_norm": 2.737459182739258, | |
| "learning_rate": 3.9013671875e-05, | |
| "loss": 0.1817, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9715370018975332, | |
| "grad_norm": 1.4106887578964233, | |
| "learning_rate": 3.8964843750000005e-05, | |
| "loss": 0.1875, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9753320683111955, | |
| "grad_norm": 7.118113040924072, | |
| "learning_rate": 3.8916015625000003e-05, | |
| "loss": 0.2243, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.9791271347248577, | |
| "grad_norm": 2.956235647201538, | |
| "learning_rate": 3.88671875e-05, | |
| "loss": 0.1059, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.9829222011385199, | |
| "grad_norm": 1.2888784408569336, | |
| "learning_rate": 3.8818359375e-05, | |
| "loss": 0.1546, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.9867172675521821, | |
| "grad_norm": 2.5757930278778076, | |
| "learning_rate": 3.876953125e-05, | |
| "loss": 0.115, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9905123339658444, | |
| "grad_norm": 0.7105236053466797, | |
| "learning_rate": 3.8720703125000005e-05, | |
| "loss": 0.1218, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9943074003795066, | |
| "grad_norm": 2.5876383781433105, | |
| "learning_rate": 3.8671875e-05, | |
| "loss": 0.1487, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.9981024667931688, | |
| "grad_norm": 0.2208087146282196, | |
| "learning_rate": 3.8623046875e-05, | |
| "loss": 0.1429, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 1.0018975332068312, | |
| "grad_norm": 0.6170036196708679, | |
| "learning_rate": 3.857421875e-05, | |
| "loss": 0.128, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 1.0056925996204933, | |
| "grad_norm": 1.1868369579315186, | |
| "learning_rate": 3.8525390625e-05, | |
| "loss": 0.0923, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.0094876660341556, | |
| "grad_norm": 3.0359079837799072, | |
| "learning_rate": 3.8476562500000004e-05, | |
| "loss": 0.1104, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 1.0132827324478177, | |
| "grad_norm": 0.6559151411056519, | |
| "learning_rate": 3.8427734375e-05, | |
| "loss": 0.1089, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 1.01707779886148, | |
| "grad_norm": 10.784985542297363, | |
| "learning_rate": 3.837890625e-05, | |
| "loss": 0.1408, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 1.0208728652751424, | |
| "grad_norm": 1.7095699310302734, | |
| "learning_rate": 3.8330078125e-05, | |
| "loss": 0.1358, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 1.0246679316888045, | |
| "grad_norm": 1.3584043979644775, | |
| "learning_rate": 3.828125e-05, | |
| "loss": 0.1248, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.0284629981024669, | |
| "grad_norm": 5.567887783050537, | |
| "learning_rate": 3.8232421875000004e-05, | |
| "loss": 0.0992, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 1.032258064516129, | |
| "grad_norm": 1.6698075532913208, | |
| "learning_rate": 3.818359375e-05, | |
| "loss": 0.1503, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 1.0360531309297913, | |
| "grad_norm": 0.29519161581993103, | |
| "learning_rate": 3.8134765625e-05, | |
| "loss": 0.1247, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 1.0398481973434535, | |
| "grad_norm": 2.3616697788238525, | |
| "learning_rate": 3.80859375e-05, | |
| "loss": 0.1459, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 1.0436432637571158, | |
| "grad_norm": 1.219618320465088, | |
| "learning_rate": 3.8037109375e-05, | |
| "loss": 0.1036, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.047438330170778, | |
| "grad_norm": 1.3592404127120972, | |
| "learning_rate": 3.7988281250000004e-05, | |
| "loss": 0.1399, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 1.0512333965844403, | |
| "grad_norm": 1.2837351560592651, | |
| "learning_rate": 3.7939453125e-05, | |
| "loss": 0.1581, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 1.0550284629981024, | |
| "grad_norm": 1.3627588748931885, | |
| "learning_rate": 3.7890625e-05, | |
| "loss": 0.1093, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 1.0588235294117647, | |
| "grad_norm": 4.571230888366699, | |
| "learning_rate": 3.7841796875e-05, | |
| "loss": 0.1693, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 1.0626185958254268, | |
| "grad_norm": 1.575040578842163, | |
| "learning_rate": 3.7792968750000005e-05, | |
| "loss": 0.1646, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.0664136622390892, | |
| "grad_norm": 2.594174861907959, | |
| "learning_rate": 3.7744140625000004e-05, | |
| "loss": 0.0976, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 1.0702087286527515, | |
| "grad_norm": 4.076402187347412, | |
| "learning_rate": 3.76953125e-05, | |
| "loss": 0.1301, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 1.0740037950664136, | |
| "grad_norm": 2.7510082721710205, | |
| "learning_rate": 3.7646484375e-05, | |
| "loss": 0.1337, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 1.077798861480076, | |
| "grad_norm": 0.8219005465507507, | |
| "learning_rate": 3.759765625e-05, | |
| "loss": 0.1122, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 1.081593927893738, | |
| "grad_norm": 1.9153568744659424, | |
| "learning_rate": 3.7548828125000005e-05, | |
| "loss": 0.1428, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.0853889943074004, | |
| "grad_norm": 2.93013858795166, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.1872, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 1.0891840607210626, | |
| "grad_norm": 0.7126034498214722, | |
| "learning_rate": 3.7451171875e-05, | |
| "loss": 0.1106, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 1.092979127134725, | |
| "grad_norm": 1.8968008756637573, | |
| "learning_rate": 3.740234375e-05, | |
| "loss": 0.1131, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 1.096774193548387, | |
| "grad_norm": 5.133113861083984, | |
| "learning_rate": 3.7353515625e-05, | |
| "loss": 0.0884, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 1.1005692599620494, | |
| "grad_norm": 3.756060838699341, | |
| "learning_rate": 3.7304687500000005e-05, | |
| "loss": 0.1373, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.1043643263757117, | |
| "grad_norm": 7.563070297241211, | |
| "learning_rate": 3.7255859375e-05, | |
| "loss": 0.1353, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 1.1081593927893738, | |
| "grad_norm": 4.473198413848877, | |
| "learning_rate": 3.720703125e-05, | |
| "loss": 0.1639, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 1.1119544592030361, | |
| "grad_norm": 2.689405679702759, | |
| "learning_rate": 3.7158203125e-05, | |
| "loss": 0.1117, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 1.1157495256166983, | |
| "grad_norm": 0.2793045938014984, | |
| "learning_rate": 3.7109375e-05, | |
| "loss": 0.1073, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 1.1195445920303606, | |
| "grad_norm": 1.4892089366912842, | |
| "learning_rate": 3.7060546875000004e-05, | |
| "loss": 0.1541, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 1.1233396584440227, | |
| "grad_norm": 1.1303538084030151, | |
| "learning_rate": 3.701171875e-05, | |
| "loss": 0.0961, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 1.127134724857685, | |
| "grad_norm": 0.6085264682769775, | |
| "learning_rate": 3.6962890625e-05, | |
| "loss": 0.111, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 1.1309297912713472, | |
| "grad_norm": 0.44500744342803955, | |
| "learning_rate": 3.69140625e-05, | |
| "loss": 0.0939, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 1.1347248576850095, | |
| "grad_norm": 1.8215651512145996, | |
| "learning_rate": 3.6865234375e-05, | |
| "loss": 0.1112, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 1.1385199240986716, | |
| "grad_norm": 0.7494792938232422, | |
| "learning_rate": 3.6816406250000004e-05, | |
| "loss": 0.1407, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.142314990512334, | |
| "grad_norm": 1.2958310842514038, | |
| "learning_rate": 3.6767578125e-05, | |
| "loss": 0.086, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 1.146110056925996, | |
| "grad_norm": 1.223376989364624, | |
| "learning_rate": 3.671875e-05, | |
| "loss": 0.1152, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 1.1499051233396584, | |
| "grad_norm": 5.232940196990967, | |
| "learning_rate": 3.6669921875e-05, | |
| "loss": 0.1308, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 1.1537001897533208, | |
| "grad_norm": 1.4690934419631958, | |
| "learning_rate": 3.662109375e-05, | |
| "loss": 0.1275, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 1.157495256166983, | |
| "grad_norm": 0.8882303833961487, | |
| "learning_rate": 3.6572265625000004e-05, | |
| "loss": 0.0709, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 1.1612903225806452, | |
| "grad_norm": 7.125335216522217, | |
| "learning_rate": 3.65234375e-05, | |
| "loss": 0.0991, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 1.1650853889943074, | |
| "grad_norm": 2.321225881576538, | |
| "learning_rate": 3.6474609375e-05, | |
| "loss": 0.1986, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 1.1688804554079697, | |
| "grad_norm": 2.8146891593933105, | |
| "learning_rate": 3.642578125e-05, | |
| "loss": 0.1497, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 1.1726755218216318, | |
| "grad_norm": 2.781428575515747, | |
| "learning_rate": 3.6376953125e-05, | |
| "loss": 0.1075, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 1.1764705882352942, | |
| "grad_norm": 7.027383327484131, | |
| "learning_rate": 3.6328125000000004e-05, | |
| "loss": 0.0921, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 1.1802656546489563, | |
| "grad_norm": 2.3189167976379395, | |
| "learning_rate": 3.6279296875e-05, | |
| "loss": 0.0784, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 1.1840607210626186, | |
| "grad_norm": 3.060039758682251, | |
| "learning_rate": 3.623046875e-05, | |
| "loss": 0.1262, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 1.187855787476281, | |
| "grad_norm": 6.099356174468994, | |
| "learning_rate": 3.6181640625e-05, | |
| "loss": 0.1506, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 1.191650853889943, | |
| "grad_norm": 3.1299543380737305, | |
| "learning_rate": 3.6132812500000005e-05, | |
| "loss": 0.1431, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 1.1954459203036052, | |
| "grad_norm": 1.5676418542861938, | |
| "learning_rate": 3.6083984375000004e-05, | |
| "loss": 0.1018, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 1.1992409867172675, | |
| "grad_norm": 0.786465585231781, | |
| "learning_rate": 3.603515625e-05, | |
| "loss": 0.1471, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 1.2030360531309299, | |
| "grad_norm": 0.6863810420036316, | |
| "learning_rate": 3.5986328125e-05, | |
| "loss": 0.1144, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 1.206831119544592, | |
| "grad_norm": 6.13245964050293, | |
| "learning_rate": 3.59375e-05, | |
| "loss": 0.1378, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 1.2106261859582543, | |
| "grad_norm": 0.9144377112388611, | |
| "learning_rate": 3.5888671875000005e-05, | |
| "loss": 0.1024, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 1.2144212523719164, | |
| "grad_norm": 13.092443466186523, | |
| "learning_rate": 3.583984375e-05, | |
| "loss": 0.1241, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 1.2182163187855788, | |
| "grad_norm": 5.453747272491455, | |
| "learning_rate": 3.5791015625e-05, | |
| "loss": 0.1307, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 1.222011385199241, | |
| "grad_norm": 5.696516036987305, | |
| "learning_rate": 3.57421875e-05, | |
| "loss": 0.1661, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 1.2258064516129032, | |
| "grad_norm": 1.4154207706451416, | |
| "learning_rate": 3.5693359375e-05, | |
| "loss": 0.1017, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 1.2296015180265654, | |
| "grad_norm": 3.1260204315185547, | |
| "learning_rate": 3.5644531250000005e-05, | |
| "loss": 0.1224, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 1.2333965844402277, | |
| "grad_norm": 1.4753592014312744, | |
| "learning_rate": 3.5595703125e-05, | |
| "loss": 0.1, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 1.23719165085389, | |
| "grad_norm": 2.7512917518615723, | |
| "learning_rate": 3.5546875e-05, | |
| "loss": 0.152, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 1.2409867172675522, | |
| "grad_norm": 0.1835506409406662, | |
| "learning_rate": 3.5498046875e-05, | |
| "loss": 0.0897, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 1.2447817836812145, | |
| "grad_norm": 2.484245777130127, | |
| "learning_rate": 3.544921875e-05, | |
| "loss": 0.1284, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 1.2485768500948766, | |
| "grad_norm": 2.778939962387085, | |
| "learning_rate": 3.5400390625000004e-05, | |
| "loss": 0.1225, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 1.252371916508539, | |
| "grad_norm": 4.067395210266113, | |
| "learning_rate": 3.53515625e-05, | |
| "loss": 0.1687, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 1.256166982922201, | |
| "grad_norm": 0.2922412157058716, | |
| "learning_rate": 3.5302734375e-05, | |
| "loss": 0.066, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 1.2599620493358634, | |
| "grad_norm": 2.992678165435791, | |
| "learning_rate": 3.525390625e-05, | |
| "loss": 0.1016, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 1.2637571157495255, | |
| "grad_norm": 0.5019288063049316, | |
| "learning_rate": 3.5205078125e-05, | |
| "loss": 0.0877, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 1.2675521821631879, | |
| "grad_norm": 5.55689811706543, | |
| "learning_rate": 3.5156250000000004e-05, | |
| "loss": 0.1191, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 1.2713472485768502, | |
| "grad_norm": 3.2791213989257812, | |
| "learning_rate": 3.5107421875e-05, | |
| "loss": 0.1086, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 1.2751423149905123, | |
| "grad_norm": 7.413064956665039, | |
| "learning_rate": 3.505859375e-05, | |
| "loss": 0.1063, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 1.2789373814041745, | |
| "grad_norm": 4.541271686553955, | |
| "learning_rate": 3.5009765625e-05, | |
| "loss": 0.0959, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 1.2827324478178368, | |
| "grad_norm": 2.8879811763763428, | |
| "learning_rate": 3.49609375e-05, | |
| "loss": 0.1178, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 1.2865275142314991, | |
| "grad_norm": 3.210865020751953, | |
| "learning_rate": 3.4912109375000004e-05, | |
| "loss": 0.1464, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 1.2903225806451613, | |
| "grad_norm": 0.654231071472168, | |
| "learning_rate": 3.486328125e-05, | |
| "loss": 0.1404, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 1.2941176470588236, | |
| "grad_norm": 2.9404890537261963, | |
| "learning_rate": 3.4814453125e-05, | |
| "loss": 0.1213, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 1.2979127134724857, | |
| "grad_norm": 2.2991085052490234, | |
| "learning_rate": 3.4765625e-05, | |
| "loss": 0.1131, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 1.301707779886148, | |
| "grad_norm": 0.30925440788269043, | |
| "learning_rate": 3.4716796875e-05, | |
| "loss": 0.1166, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 1.3055028462998102, | |
| "grad_norm": 1.3804266452789307, | |
| "learning_rate": 3.4667968750000004e-05, | |
| "loss": 0.0634, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 1.3092979127134725, | |
| "grad_norm": 3.1803112030029297, | |
| "learning_rate": 3.4619140625e-05, | |
| "loss": 0.1916, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 1.3130929791271346, | |
| "grad_norm": 2.8847222328186035, | |
| "learning_rate": 3.45703125e-05, | |
| "loss": 0.1856, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 1.316888045540797, | |
| "grad_norm": 7.0924973487854, | |
| "learning_rate": 3.4521484375e-05, | |
| "loss": 0.1292, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 1.3206831119544593, | |
| "grad_norm": 4.695943355560303, | |
| "learning_rate": 3.4472656250000005e-05, | |
| "loss": 0.1518, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 1.3244781783681214, | |
| "grad_norm": 4.995908260345459, | |
| "learning_rate": 3.4423828125000003e-05, | |
| "loss": 0.12, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 1.3282732447817835, | |
| "grad_norm": 4.585287570953369, | |
| "learning_rate": 3.4375e-05, | |
| "loss": 0.0933, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.3320683111954459, | |
| "grad_norm": 1.5841524600982666, | |
| "learning_rate": 3.4326171875e-05, | |
| "loss": 0.1172, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 1.3358633776091082, | |
| "grad_norm": 3.6837852001190186, | |
| "learning_rate": 3.427734375e-05, | |
| "loss": 0.1164, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 1.3396584440227703, | |
| "grad_norm": 2.470222234725952, | |
| "learning_rate": 3.4228515625000005e-05, | |
| "loss": 0.1258, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 1.3434535104364327, | |
| "grad_norm": 1.8782237768173218, | |
| "learning_rate": 3.41796875e-05, | |
| "loss": 0.1078, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 1.3472485768500948, | |
| "grad_norm": 0.29535171389579773, | |
| "learning_rate": 3.4130859375e-05, | |
| "loss": 0.1658, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 1.3510436432637571, | |
| "grad_norm": 3.8535208702087402, | |
| "learning_rate": 3.408203125e-05, | |
| "loss": 0.1632, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 1.3548387096774195, | |
| "grad_norm": 2.0340235233306885, | |
| "learning_rate": 3.4033203125e-05, | |
| "loss": 0.1498, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 1.3586337760910816, | |
| "grad_norm": 3.015774726867676, | |
| "learning_rate": 3.3984375000000004e-05, | |
| "loss": 0.1099, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 1.3624288425047437, | |
| "grad_norm": 5.396883487701416, | |
| "learning_rate": 3.3935546875e-05, | |
| "loss": 0.1308, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 1.366223908918406, | |
| "grad_norm": 4.15665864944458, | |
| "learning_rate": 3.388671875e-05, | |
| "loss": 0.0893, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 1.3700189753320684, | |
| "grad_norm": 2.0461652278900146, | |
| "learning_rate": 3.3837890625e-05, | |
| "loss": 0.1157, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 1.3738140417457305, | |
| "grad_norm": 1.5953052043914795, | |
| "learning_rate": 3.37890625e-05, | |
| "loss": 0.1611, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 1.3776091081593929, | |
| "grad_norm": 3.8149826526641846, | |
| "learning_rate": 3.3740234375000004e-05, | |
| "loss": 0.1582, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 1.381404174573055, | |
| "grad_norm": 5.658437252044678, | |
| "learning_rate": 3.369140625e-05, | |
| "loss": 0.1481, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 1.3851992409867173, | |
| "grad_norm": 0.47566506266593933, | |
| "learning_rate": 3.3642578125e-05, | |
| "loss": 0.1336, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 1.3889943074003794, | |
| "grad_norm": 2.9851224422454834, | |
| "learning_rate": 3.359375e-05, | |
| "loss": 0.1274, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 1.3927893738140418, | |
| "grad_norm": 2.3793752193450928, | |
| "learning_rate": 3.3544921875e-05, | |
| "loss": 0.1189, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 1.396584440227704, | |
| "grad_norm": 0.35333120822906494, | |
| "learning_rate": 3.3496093750000004e-05, | |
| "loss": 0.1021, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 1.4003795066413662, | |
| "grad_norm": 2.170039653778076, | |
| "learning_rate": 3.3447265625e-05, | |
| "loss": 0.1016, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 1.4041745730550286, | |
| "grad_norm": 3.225989818572998, | |
| "learning_rate": 3.33984375e-05, | |
| "loss": 0.1559, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 1.4079696394686907, | |
| "grad_norm": 5.81306266784668, | |
| "learning_rate": 3.3349609375e-05, | |
| "loss": 0.1378, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 1.4117647058823528, | |
| "grad_norm": 0.839579701423645, | |
| "learning_rate": 3.330078125e-05, | |
| "loss": 0.0981, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 1.4155597722960152, | |
| "grad_norm": 2.421964645385742, | |
| "learning_rate": 3.3251953125000004e-05, | |
| "loss": 0.1267, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 1.4193548387096775, | |
| "grad_norm": 0.298155814409256, | |
| "learning_rate": 3.3203125e-05, | |
| "loss": 0.1619, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 1.4231499051233396, | |
| "grad_norm": 5.643527030944824, | |
| "learning_rate": 3.3154296875e-05, | |
| "loss": 0.0844, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 1.426944971537002, | |
| "grad_norm": 1.7513082027435303, | |
| "learning_rate": 3.310546875e-05, | |
| "loss": 0.133, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 1.430740037950664, | |
| "grad_norm": 1.2837634086608887, | |
| "learning_rate": 3.3056640625000005e-05, | |
| "loss": 0.1241, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 1.4345351043643264, | |
| "grad_norm": 0.7017351984977722, | |
| "learning_rate": 3.3007812500000004e-05, | |
| "loss": 0.1123, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 1.4383301707779887, | |
| "grad_norm": 6.043475151062012, | |
| "learning_rate": 3.2958984375e-05, | |
| "loss": 0.1249, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 1.4421252371916509, | |
| "grad_norm": 4.449422359466553, | |
| "learning_rate": 3.291015625e-05, | |
| "loss": 0.173, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 1.445920303605313, | |
| "grad_norm": 1.7111449241638184, | |
| "learning_rate": 3.2861328125e-05, | |
| "loss": 0.1473, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 1.4497153700189753, | |
| "grad_norm": 1.3379569053649902, | |
| "learning_rate": 3.2812500000000005e-05, | |
| "loss": 0.1119, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 1.4535104364326377, | |
| "grad_norm": 7.154158115386963, | |
| "learning_rate": 3.2763671875e-05, | |
| "loss": 0.1273, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 1.4573055028462998, | |
| "grad_norm": 1.2248731851577759, | |
| "learning_rate": 3.271484375e-05, | |
| "loss": 0.1081, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 1.4611005692599621, | |
| "grad_norm": 1.219230055809021, | |
| "learning_rate": 3.2666015625e-05, | |
| "loss": 0.0945, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 1.4648956356736242, | |
| "grad_norm": 4.3124189376831055, | |
| "learning_rate": 3.26171875e-05, | |
| "loss": 0.1039, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 1.4686907020872866, | |
| "grad_norm": 2.915302038192749, | |
| "learning_rate": 3.2568359375000005e-05, | |
| "loss": 0.1236, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 1.4724857685009487, | |
| "grad_norm": 0.3403218984603882, | |
| "learning_rate": 3.251953125e-05, | |
| "loss": 0.146, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 1.476280834914611, | |
| "grad_norm": 1.74779212474823, | |
| "learning_rate": 3.2470703125e-05, | |
| "loss": 0.1096, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 1.4800759013282732, | |
| "grad_norm": 2.724412202835083, | |
| "learning_rate": 3.2421875e-05, | |
| "loss": 0.1147, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 1.4838709677419355, | |
| "grad_norm": 3.6029605865478516, | |
| "learning_rate": 3.2373046875e-05, | |
| "loss": 0.1293, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 1.4876660341555978, | |
| "grad_norm": 1.7680699825286865, | |
| "learning_rate": 3.2324218750000004e-05, | |
| "loss": 0.0891, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 1.49146110056926, | |
| "grad_norm": 0.7916316390037537, | |
| "learning_rate": 3.2275390625e-05, | |
| "loss": 0.1223, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 1.495256166982922, | |
| "grad_norm": 0.9054811596870422, | |
| "learning_rate": 3.22265625e-05, | |
| "loss": 0.0934, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 1.4990512333965844, | |
| "grad_norm": 0.14054611325263977, | |
| "learning_rate": 3.2177734375e-05, | |
| "loss": 0.0494, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 1.5028462998102468, | |
| "grad_norm": 3.1943421363830566, | |
| "learning_rate": 3.212890625e-05, | |
| "loss": 0.1156, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 1.5066413662239089, | |
| "grad_norm": 1.0965791940689087, | |
| "learning_rate": 3.2080078125000004e-05, | |
| "loss": 0.1016, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 1.510436432637571, | |
| "grad_norm": 1.3087248802185059, | |
| "learning_rate": 3.203125e-05, | |
| "loss": 0.0764, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 1.5142314990512333, | |
| "grad_norm": 2.760798692703247, | |
| "learning_rate": 3.1982421875e-05, | |
| "loss": 0.114, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 1.5180265654648957, | |
| "grad_norm": 0.1450069397687912, | |
| "learning_rate": 3.193359375e-05, | |
| "loss": 0.1192, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.521821631878558, | |
| "grad_norm": 4.504504680633545, | |
| "learning_rate": 3.1884765625e-05, | |
| "loss": 0.1046, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 1.5256166982922201, | |
| "grad_norm": 0.7182434797286987, | |
| "learning_rate": 3.1835937500000004e-05, | |
| "loss": 0.0932, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 1.5294117647058822, | |
| "grad_norm": 4.370609283447266, | |
| "learning_rate": 3.1787109375e-05, | |
| "loss": 0.144, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 1.5332068311195446, | |
| "grad_norm": 3.8300323486328125, | |
| "learning_rate": 3.173828125e-05, | |
| "loss": 0.0982, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 1.537001897533207, | |
| "grad_norm": 0.25771814584732056, | |
| "learning_rate": 3.1689453125e-05, | |
| "loss": 0.0691, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 1.540796963946869, | |
| "grad_norm": 2.758225917816162, | |
| "learning_rate": 3.1640625e-05, | |
| "loss": 0.1308, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 1.5445920303605312, | |
| "grad_norm": 2.7619638442993164, | |
| "learning_rate": 3.1591796875000004e-05, | |
| "loss": 0.094, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 1.5483870967741935, | |
| "grad_norm": 0.9765902757644653, | |
| "learning_rate": 3.154296875e-05, | |
| "loss": 0.0811, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 1.5521821631878558, | |
| "grad_norm": 4.361360549926758, | |
| "learning_rate": 3.1494140625e-05, | |
| "loss": 0.1742, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 1.5559772296015182, | |
| "grad_norm": 2.249197244644165, | |
| "learning_rate": 3.14453125e-05, | |
| "loss": 0.0807, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 1.5597722960151803, | |
| "grad_norm": 3.4518532752990723, | |
| "learning_rate": 3.1396484375000005e-05, | |
| "loss": 0.1422, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 1.5635673624288424, | |
| "grad_norm": 0.6679037809371948, | |
| "learning_rate": 3.1347656250000003e-05, | |
| "loss": 0.1214, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 1.5673624288425048, | |
| "grad_norm": 3.879596710205078, | |
| "learning_rate": 3.1298828125e-05, | |
| "loss": 0.1084, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 1.571157495256167, | |
| "grad_norm": 5.232009410858154, | |
| "learning_rate": 3.125e-05, | |
| "loss": 0.1192, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 1.5749525616698292, | |
| "grad_norm": 3.875843048095703, | |
| "learning_rate": 3.1201171875e-05, | |
| "loss": 0.1099, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 1.5787476280834913, | |
| "grad_norm": 0.17772170901298523, | |
| "learning_rate": 3.1152343750000005e-05, | |
| "loss": 0.1001, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 1.5825426944971537, | |
| "grad_norm": 0.6866888403892517, | |
| "learning_rate": 3.1103515625e-05, | |
| "loss": 0.1598, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 1.586337760910816, | |
| "grad_norm": 2.2445452213287354, | |
| "learning_rate": 3.10546875e-05, | |
| "loss": 0.1532, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 1.5901328273244781, | |
| "grad_norm": 1.2135056257247925, | |
| "learning_rate": 3.1005859375e-05, | |
| "loss": 0.1337, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 1.5939278937381403, | |
| "grad_norm": 0.8548033833503723, | |
| "learning_rate": 3.095703125e-05, | |
| "loss": 0.1142, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 1.5977229601518026, | |
| "grad_norm": 1.7404321432113647, | |
| "learning_rate": 3.0908203125000004e-05, | |
| "loss": 0.1195, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 1.601518026565465, | |
| "grad_norm": 1.4047428369522095, | |
| "learning_rate": 3.0859375e-05, | |
| "loss": 0.1853, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 1.6053130929791273, | |
| "grad_norm": 2.793487071990967, | |
| "learning_rate": 3.0810546875e-05, | |
| "loss": 0.1231, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 1.6091081593927894, | |
| "grad_norm": 0.928959310054779, | |
| "learning_rate": 3.076171875e-05, | |
| "loss": 0.0891, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 1.1571967601776123, | |
| "learning_rate": 3.0712890625e-05, | |
| "loss": 0.1119, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 1.6166982922201139, | |
| "grad_norm": 3.0740041732788086, | |
| "learning_rate": 3.0664062500000004e-05, | |
| "loss": 0.1518, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 1.6204933586337762, | |
| "grad_norm": 5.726138114929199, | |
| "learning_rate": 3.0615234375e-05, | |
| "loss": 0.1121, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 1.6242884250474383, | |
| "grad_norm": 3.900777816772461, | |
| "learning_rate": 3.056640625e-05, | |
| "loss": 0.1513, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 1.6280834914611004, | |
| "grad_norm": 3.43808913230896, | |
| "learning_rate": 3.0517578125e-05, | |
| "loss": 0.1259, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 1.6318785578747628, | |
| "grad_norm": 1.2054848670959473, | |
| "learning_rate": 3.0468750000000002e-05, | |
| "loss": 0.1446, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 1.635673624288425, | |
| "grad_norm": 3.756579875946045, | |
| "learning_rate": 3.0419921875e-05, | |
| "loss": 0.1348, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 1.6394686907020875, | |
| "grad_norm": 1.4033925533294678, | |
| "learning_rate": 3.0371093750000003e-05, | |
| "loss": 0.1053, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 1.6432637571157496, | |
| "grad_norm": 1.6513621807098389, | |
| "learning_rate": 3.0322265625e-05, | |
| "loss": 0.1217, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 1.6470588235294117, | |
| "grad_norm": 1.9821256399154663, | |
| "learning_rate": 3.02734375e-05, | |
| "loss": 0.0959, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 1.650853889943074, | |
| "grad_norm": 7.50634241104126, | |
| "learning_rate": 3.0224609375000002e-05, | |
| "loss": 0.1487, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 1.6546489563567364, | |
| "grad_norm": 1.1505802869796753, | |
| "learning_rate": 3.017578125e-05, | |
| "loss": 0.1246, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 1.6584440227703985, | |
| "grad_norm": 1.774200677871704, | |
| "learning_rate": 3.0126953125000002e-05, | |
| "loss": 0.086, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 1.6622390891840606, | |
| "grad_norm": 1.566748023033142, | |
| "learning_rate": 3.0078125e-05, | |
| "loss": 0.1088, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 1.666034155597723, | |
| "grad_norm": 2.8167648315429688, | |
| "learning_rate": 3.0029296875000003e-05, | |
| "loss": 0.122, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 1.6698292220113853, | |
| "grad_norm": 1.7637346982955933, | |
| "learning_rate": 2.998046875e-05, | |
| "loss": 0.1036, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 1.6736242884250474, | |
| "grad_norm": 0.3347111642360687, | |
| "learning_rate": 2.9931640625e-05, | |
| "loss": 0.1259, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 1.6774193548387095, | |
| "grad_norm": 4.920076370239258, | |
| "learning_rate": 2.9882812500000002e-05, | |
| "loss": 0.1594, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 1.6812144212523719, | |
| "grad_norm": 3.4409444332122803, | |
| "learning_rate": 2.9833984375e-05, | |
| "loss": 0.1541, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 1.6850094876660342, | |
| "grad_norm": 0.639980673789978, | |
| "learning_rate": 2.9785156250000003e-05, | |
| "loss": 0.0826, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 1.6888045540796965, | |
| "grad_norm": 3.240345001220703, | |
| "learning_rate": 2.9736328125e-05, | |
| "loss": 0.1473, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 1.6925996204933587, | |
| "grad_norm": 2.2682647705078125, | |
| "learning_rate": 2.96875e-05, | |
| "loss": 0.0959, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 1.6963946869070208, | |
| "grad_norm": 2.3791496753692627, | |
| "learning_rate": 2.9638671875000002e-05, | |
| "loss": 0.0953, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 1.7001897533206831, | |
| "grad_norm": 1.5654246807098389, | |
| "learning_rate": 2.958984375e-05, | |
| "loss": 0.113, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 1.7039848197343455, | |
| "grad_norm": 5.17665958404541, | |
| "learning_rate": 2.9541015625000003e-05, | |
| "loss": 0.1164, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 1.7077798861480076, | |
| "grad_norm": 18.226165771484375, | |
| "learning_rate": 2.94921875e-05, | |
| "loss": 0.1293, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 1.7115749525616697, | |
| "grad_norm": 3.5760374069213867, | |
| "learning_rate": 2.9443359375e-05, | |
| "loss": 0.0931, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 1.715370018975332, | |
| "grad_norm": 2.9964776039123535, | |
| "learning_rate": 2.9394531250000002e-05, | |
| "loss": 0.0932, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 1.7191650853889944, | |
| "grad_norm": 10.505178451538086, | |
| "learning_rate": 2.9345703125e-05, | |
| "loss": 0.139, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 1.7229601518026565, | |
| "grad_norm": 0.9944730997085571, | |
| "learning_rate": 2.9296875000000002e-05, | |
| "loss": 0.159, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 1.7267552182163188, | |
| "grad_norm": 1.2323939800262451, | |
| "learning_rate": 2.9248046875e-05, | |
| "loss": 0.118, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 1.730550284629981, | |
| "grad_norm": 0.8581392765045166, | |
| "learning_rate": 2.9199218750000003e-05, | |
| "loss": 0.1165, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 1.7343453510436433, | |
| "grad_norm": 2.196648120880127, | |
| "learning_rate": 2.9150390625e-05, | |
| "loss": 0.0803, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 1.7381404174573056, | |
| "grad_norm": 3.5112388134002686, | |
| "learning_rate": 2.91015625e-05, | |
| "loss": 0.1348, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.7419354838709677, | |
| "grad_norm": 1.1738495826721191, | |
| "learning_rate": 2.9052734375000002e-05, | |
| "loss": 0.1114, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.7457305502846299, | |
| "grad_norm": 1.6850240230560303, | |
| "learning_rate": 2.900390625e-05, | |
| "loss": 0.1457, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.7495256166982922, | |
| "grad_norm": 1.4865467548370361, | |
| "learning_rate": 2.8955078125000003e-05, | |
| "loss": 0.1078, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.7533206831119545, | |
| "grad_norm": 1.445610523223877, | |
| "learning_rate": 2.890625e-05, | |
| "loss": 0.0839, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.7571157495256167, | |
| "grad_norm": 1.649983525276184, | |
| "learning_rate": 2.8857421875e-05, | |
| "loss": 0.1028, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.7609108159392788, | |
| "grad_norm": 2.717585802078247, | |
| "learning_rate": 2.8808593750000002e-05, | |
| "loss": 0.1127, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.7647058823529411, | |
| "grad_norm": 2.902244806289673, | |
| "learning_rate": 2.8759765625e-05, | |
| "loss": 0.0743, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.7685009487666035, | |
| "grad_norm": 1.8880512714385986, | |
| "learning_rate": 2.8710937500000002e-05, | |
| "loss": 0.0875, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.7722960151802658, | |
| "grad_norm": 1.119419813156128, | |
| "learning_rate": 2.8662109375e-05, | |
| "loss": 0.1028, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.776091081593928, | |
| "grad_norm": 2.3372507095336914, | |
| "learning_rate": 2.8613281250000003e-05, | |
| "loss": 0.161, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.77988614800759, | |
| "grad_norm": 0.6809380054473877, | |
| "learning_rate": 2.8564453125e-05, | |
| "loss": 0.091, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.7836812144212524, | |
| "grad_norm": 4.871325969696045, | |
| "learning_rate": 2.8515625e-05, | |
| "loss": 0.1495, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.7874762808349147, | |
| "grad_norm": 10.103543281555176, | |
| "learning_rate": 2.8466796875000002e-05, | |
| "loss": 0.0847, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.7912713472485768, | |
| "grad_norm": 0.719699501991272, | |
| "learning_rate": 2.841796875e-05, | |
| "loss": 0.0991, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.795066413662239, | |
| "grad_norm": 2.012406826019287, | |
| "learning_rate": 2.8369140625000003e-05, | |
| "loss": 0.069, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.7988614800759013, | |
| "grad_norm": 2.038810968399048, | |
| "learning_rate": 2.83203125e-05, | |
| "loss": 0.0946, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.8026565464895636, | |
| "grad_norm": 1.991003394126892, | |
| "learning_rate": 2.8271484375e-05, | |
| "loss": 0.1033, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.8064516129032258, | |
| "grad_norm": 1.9379823207855225, | |
| "learning_rate": 2.8222656250000002e-05, | |
| "loss": 0.0738, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.810246679316888, | |
| "grad_norm": 0.9378390312194824, | |
| "learning_rate": 2.8173828125e-05, | |
| "loss": 0.0907, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.8140417457305502, | |
| "grad_norm": 2.5683369636535645, | |
| "learning_rate": 2.8125000000000003e-05, | |
| "loss": 0.1156, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.8178368121442126, | |
| "grad_norm": 2.95536470413208, | |
| "learning_rate": 2.8076171875e-05, | |
| "loss": 0.0959, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.821631878557875, | |
| "grad_norm": 11.215580940246582, | |
| "learning_rate": 2.802734375e-05, | |
| "loss": 0.0812, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.825426944971537, | |
| "grad_norm": 0.4500042498111725, | |
| "learning_rate": 2.7978515625000002e-05, | |
| "loss": 0.1114, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.8292220113851991, | |
| "grad_norm": 0.5829250812530518, | |
| "learning_rate": 2.79296875e-05, | |
| "loss": 0.1284, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.8330170777988615, | |
| "grad_norm": 3.114776134490967, | |
| "learning_rate": 2.7880859375000002e-05, | |
| "loss": 0.1283, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.8368121442125238, | |
| "grad_norm": 0.47552067041397095, | |
| "learning_rate": 2.783203125e-05, | |
| "loss": 0.0752, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.840607210626186, | |
| "grad_norm": 4.794514179229736, | |
| "learning_rate": 2.7783203125000003e-05, | |
| "loss": 0.1012, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.844402277039848, | |
| "grad_norm": 5.392133712768555, | |
| "learning_rate": 2.7734375e-05, | |
| "loss": 0.178, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.8481973434535104, | |
| "grad_norm": 1.1505749225616455, | |
| "learning_rate": 2.7685546875e-05, | |
| "loss": 0.126, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.8519924098671727, | |
| "grad_norm": 1.1924586296081543, | |
| "learning_rate": 2.7636718750000002e-05, | |
| "loss": 0.1109, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.855787476280835, | |
| "grad_norm": 0.12782755494117737, | |
| "learning_rate": 2.7587890625e-05, | |
| "loss": 0.0732, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.8595825426944972, | |
| "grad_norm": 1.1095064878463745, | |
| "learning_rate": 2.7539062500000003e-05, | |
| "loss": 0.0802, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.8633776091081593, | |
| "grad_norm": 8.920310020446777, | |
| "learning_rate": 2.7490234375e-05, | |
| "loss": 0.0964, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.8671726755218216, | |
| "grad_norm": 1.8678808212280273, | |
| "learning_rate": 2.744140625e-05, | |
| "loss": 0.1072, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.870967741935484, | |
| "grad_norm": 1.8633017539978027, | |
| "learning_rate": 2.7392578125000002e-05, | |
| "loss": 0.0835, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.874762808349146, | |
| "grad_norm": 1.7576115131378174, | |
| "learning_rate": 2.734375e-05, | |
| "loss": 0.1327, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.8785578747628082, | |
| "grad_norm": 3.504157304763794, | |
| "learning_rate": 2.7294921875000003e-05, | |
| "loss": 0.1609, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.8823529411764706, | |
| "grad_norm": 1.7668483257293701, | |
| "learning_rate": 2.724609375e-05, | |
| "loss": 0.1316, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.886148007590133, | |
| "grad_norm": 0.659870982170105, | |
| "learning_rate": 2.7197265625e-05, | |
| "loss": 0.0913, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.889943074003795, | |
| "grad_norm": 1.428725004196167, | |
| "learning_rate": 2.7148437500000002e-05, | |
| "loss": 0.118, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.8937381404174574, | |
| "grad_norm": 1.8446964025497437, | |
| "learning_rate": 2.7099609375e-05, | |
| "loss": 0.1203, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.8975332068311195, | |
| "grad_norm": 2.9335217475891113, | |
| "learning_rate": 2.7050781250000002e-05, | |
| "loss": 0.1301, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.9013282732447818, | |
| "grad_norm": 0.8534810543060303, | |
| "learning_rate": 2.7001953125e-05, | |
| "loss": 0.0555, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.9051233396584442, | |
| "grad_norm": 0.5556221604347229, | |
| "learning_rate": 2.6953125000000003e-05, | |
| "loss": 0.1036, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.9089184060721063, | |
| "grad_norm": 1.7097387313842773, | |
| "learning_rate": 2.6904296875e-05, | |
| "loss": 0.0869, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.9127134724857684, | |
| "grad_norm": 2.324669122695923, | |
| "learning_rate": 2.685546875e-05, | |
| "loss": 0.1233, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.9165085388994307, | |
| "grad_norm": 2.4764981269836426, | |
| "learning_rate": 2.6806640625000002e-05, | |
| "loss": 0.1379, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.920303605313093, | |
| "grad_norm": 4.731557846069336, | |
| "learning_rate": 2.67578125e-05, | |
| "loss": 0.189, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.9240986717267552, | |
| "grad_norm": 0.4868462383747101, | |
| "learning_rate": 2.6708984375000003e-05, | |
| "loss": 0.0765, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.9278937381404173, | |
| "grad_norm": 1.3497892618179321, | |
| "learning_rate": 2.666015625e-05, | |
| "loss": 0.1039, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.9316888045540797, | |
| "grad_norm": 15.007429122924805, | |
| "learning_rate": 2.6611328125e-05, | |
| "loss": 0.0996, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.935483870967742, | |
| "grad_norm": 8.113617897033691, | |
| "learning_rate": 2.6562500000000002e-05, | |
| "loss": 0.1316, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.9392789373814043, | |
| "grad_norm": 0.4574742913246155, | |
| "learning_rate": 2.6513671875e-05, | |
| "loss": 0.1044, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.9430740037950665, | |
| "grad_norm": 2.1475601196289062, | |
| "learning_rate": 2.6464843750000002e-05, | |
| "loss": 0.1236, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.9468690702087286, | |
| "grad_norm": 2.370619058609009, | |
| "learning_rate": 2.6416015625e-05, | |
| "loss": 0.1358, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.950664136622391, | |
| "grad_norm": 0.7283152937889099, | |
| "learning_rate": 2.63671875e-05, | |
| "loss": 0.1348, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.9544592030360532, | |
| "grad_norm": 2.8883001804351807, | |
| "learning_rate": 2.6318359375e-05, | |
| "loss": 0.083, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.9582542694497154, | |
| "grad_norm": 0.26794353127479553, | |
| "learning_rate": 2.626953125e-05, | |
| "loss": 0.1229, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.9620493358633775, | |
| "grad_norm": 0.10836785286664963, | |
| "learning_rate": 2.6220703125000002e-05, | |
| "loss": 0.0731, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.9658444022770398, | |
| "grad_norm": 1.5825821161270142, | |
| "learning_rate": 2.6171875e-05, | |
| "loss": 0.1394, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.9696394686907022, | |
| "grad_norm": 2.9467551708221436, | |
| "learning_rate": 2.6123046875000003e-05, | |
| "loss": 0.0986, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.9734345351043643, | |
| "grad_norm": 0.14293566346168518, | |
| "learning_rate": 2.607421875e-05, | |
| "loss": 0.0824, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.9772296015180264, | |
| "grad_norm": 0.4912210702896118, | |
| "learning_rate": 2.6025390625e-05, | |
| "loss": 0.0863, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.9810246679316887, | |
| "grad_norm": 0.2447841614484787, | |
| "learning_rate": 2.5976562500000002e-05, | |
| "loss": 0.0877, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.984819734345351, | |
| "grad_norm": 0.13301405310630798, | |
| "learning_rate": 2.5927734375e-05, | |
| "loss": 0.104, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.9886148007590134, | |
| "grad_norm": 3.25866961479187, | |
| "learning_rate": 2.5878906250000003e-05, | |
| "loss": 0.0806, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.9924098671726755, | |
| "grad_norm": 3.9567527770996094, | |
| "learning_rate": 2.5830078125e-05, | |
| "loss": 0.1226, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.9962049335863377, | |
| "grad_norm": 3.6540729999542236, | |
| "learning_rate": 2.578125e-05, | |
| "loss": 0.0628, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 2.9958958625793457, | |
| "learning_rate": 2.5732421875000002e-05, | |
| "loss": 0.1229, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 2.0037950664136623, | |
| "grad_norm": 4.634014129638672, | |
| "learning_rate": 2.568359375e-05, | |
| "loss": 0.1, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 2.0075901328273247, | |
| "grad_norm": 1.0794429779052734, | |
| "learning_rate": 2.5634765625000002e-05, | |
| "loss": 0.1, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 2.0113851992409866, | |
| "grad_norm": 2.6222951412200928, | |
| "learning_rate": 2.55859375e-05, | |
| "loss": 0.057, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 2.015180265654649, | |
| "grad_norm": 1.499935507774353, | |
| "learning_rate": 2.5537109375e-05, | |
| "loss": 0.0766, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 2.0189753320683113, | |
| "grad_norm": 2.614969491958618, | |
| "learning_rate": 2.548828125e-05, | |
| "loss": 0.1003, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 2.0227703984819736, | |
| "grad_norm": 1.4524706602096558, | |
| "learning_rate": 2.5439453125e-05, | |
| "loss": 0.1681, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 2.0265654648956355, | |
| "grad_norm": 1.5427693128585815, | |
| "learning_rate": 2.5390625000000002e-05, | |
| "loss": 0.0745, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 2.030360531309298, | |
| "grad_norm": 0.6060462594032288, | |
| "learning_rate": 2.5341796875e-05, | |
| "loss": 0.0557, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 2.03415559772296, | |
| "grad_norm": 2.1763222217559814, | |
| "learning_rate": 2.5292968750000003e-05, | |
| "loss": 0.0962, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 2.0379506641366225, | |
| "grad_norm": 0.9857283234596252, | |
| "learning_rate": 2.5244140625e-05, | |
| "loss": 0.0646, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 2.041745730550285, | |
| "grad_norm": 0.14561018347740173, | |
| "learning_rate": 2.51953125e-05, | |
| "loss": 0.0686, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 2.0455407969639468, | |
| "grad_norm": 5.825016498565674, | |
| "learning_rate": 2.5146484375000002e-05, | |
| "loss": 0.1106, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 2.049335863377609, | |
| "grad_norm": 0.4656510353088379, | |
| "learning_rate": 2.509765625e-05, | |
| "loss": 0.0793, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 2.0531309297912714, | |
| "grad_norm": 5.336658954620361, | |
| "learning_rate": 2.5048828125000003e-05, | |
| "loss": 0.1136, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 2.0569259962049338, | |
| "grad_norm": 1.3186858892440796, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0908, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 2.0607210626185957, | |
| "grad_norm": 2.3468871116638184, | |
| "learning_rate": 2.4951171875e-05, | |
| "loss": 0.1127, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 2.064516129032258, | |
| "grad_norm": 1.6484739780426025, | |
| "learning_rate": 2.4902343750000002e-05, | |
| "loss": 0.0921, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 2.0683111954459203, | |
| "grad_norm": 1.97286856174469, | |
| "learning_rate": 2.4853515625e-05, | |
| "loss": 0.064, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 2.0721062618595827, | |
| "grad_norm": 0.7309706211090088, | |
| "learning_rate": 2.4804687500000002e-05, | |
| "loss": 0.1256, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 2.0759013282732446, | |
| "grad_norm": 3.2271645069122314, | |
| "learning_rate": 2.4755859375e-05, | |
| "loss": 0.0889, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 2.079696394686907, | |
| "grad_norm": 18.506216049194336, | |
| "learning_rate": 2.470703125e-05, | |
| "loss": 0.1328, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 2.0834914611005693, | |
| "grad_norm": 1.2257277965545654, | |
| "learning_rate": 2.4658203125e-05, | |
| "loss": 0.0673, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 2.0872865275142316, | |
| "grad_norm": 0.1906469613313675, | |
| "learning_rate": 2.4609375e-05, | |
| "loss": 0.0808, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.091081593927894, | |
| "grad_norm": 0.9694260954856873, | |
| "learning_rate": 2.4560546875000002e-05, | |
| "loss": 0.0558, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 2.094876660341556, | |
| "grad_norm": 5.630046844482422, | |
| "learning_rate": 2.451171875e-05, | |
| "loss": 0.1262, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 2.098671726755218, | |
| "grad_norm": 0.13950304687023163, | |
| "learning_rate": 2.4462890625000003e-05, | |
| "loss": 0.0711, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 2.1024667931688805, | |
| "grad_norm": 0.424904465675354, | |
| "learning_rate": 2.44140625e-05, | |
| "loss": 0.0841, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 2.106261859582543, | |
| "grad_norm": 7.330411434173584, | |
| "learning_rate": 2.4365234375e-05, | |
| "loss": 0.1482, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 2.1100569259962048, | |
| "grad_norm": 0.2741791009902954, | |
| "learning_rate": 2.4316406250000002e-05, | |
| "loss": 0.0945, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 2.113851992409867, | |
| "grad_norm": 1.025099277496338, | |
| "learning_rate": 2.4267578125e-05, | |
| "loss": 0.0981, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 2.1176470588235294, | |
| "grad_norm": 2.723508596420288, | |
| "learning_rate": 2.4218750000000003e-05, | |
| "loss": 0.067, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 2.121442125237192, | |
| "grad_norm": 0.18666787445545197, | |
| "learning_rate": 2.4169921875e-05, | |
| "loss": 0.077, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 2.1252371916508537, | |
| "grad_norm": 2.304980754852295, | |
| "learning_rate": 2.412109375e-05, | |
| "loss": 0.1016, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 2.129032258064516, | |
| "grad_norm": 1.6174981594085693, | |
| "learning_rate": 2.4072265625000002e-05, | |
| "loss": 0.0735, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 2.1328273244781784, | |
| "grad_norm": 5.401015758514404, | |
| "learning_rate": 2.40234375e-05, | |
| "loss": 0.087, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 2.1366223908918407, | |
| "grad_norm": 2.5387024879455566, | |
| "learning_rate": 2.3974609375000002e-05, | |
| "loss": 0.1006, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 2.140417457305503, | |
| "grad_norm": 4.753091812133789, | |
| "learning_rate": 2.392578125e-05, | |
| "loss": 0.1013, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 2.144212523719165, | |
| "grad_norm": 3.540262460708618, | |
| "learning_rate": 2.3876953125e-05, | |
| "loss": 0.0697, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 2.1480075901328273, | |
| "grad_norm": 1.53217613697052, | |
| "learning_rate": 2.3828125e-05, | |
| "loss": 0.0812, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 2.1518026565464896, | |
| "grad_norm": 2.652308940887451, | |
| "learning_rate": 2.3779296875e-05, | |
| "loss": 0.092, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 2.155597722960152, | |
| "grad_norm": 2.7964372634887695, | |
| "learning_rate": 2.3730468750000002e-05, | |
| "loss": 0.0658, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 2.159392789373814, | |
| "grad_norm": 0.11225280165672302, | |
| "learning_rate": 2.3681640625e-05, | |
| "loss": 0.0939, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 2.163187855787476, | |
| "grad_norm": 1.5736573934555054, | |
| "learning_rate": 2.3632812500000003e-05, | |
| "loss": 0.0727, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 2.1669829222011385, | |
| "grad_norm": 2.087057113647461, | |
| "learning_rate": 2.3583984375e-05, | |
| "loss": 0.0654, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 2.170777988614801, | |
| "grad_norm": 1.598823070526123, | |
| "learning_rate": 2.353515625e-05, | |
| "loss": 0.0874, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 2.174573055028463, | |
| "grad_norm": 1.7258918285369873, | |
| "learning_rate": 2.3486328125000002e-05, | |
| "loss": 0.0703, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 2.178368121442125, | |
| "grad_norm": 12.662415504455566, | |
| "learning_rate": 2.34375e-05, | |
| "loss": 0.0998, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 2.1821631878557874, | |
| "grad_norm": 5.9703803062438965, | |
| "learning_rate": 2.3388671875000002e-05, | |
| "loss": 0.1021, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 2.18595825426945, | |
| "grad_norm": 1.9118971824645996, | |
| "learning_rate": 2.333984375e-05, | |
| "loss": 0.0574, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 2.189753320683112, | |
| "grad_norm": 2.8925118446350098, | |
| "learning_rate": 2.3291015625e-05, | |
| "loss": 0.0804, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 2.193548387096774, | |
| "grad_norm": 0.9911293387413025, | |
| "learning_rate": 2.32421875e-05, | |
| "loss": 0.0673, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 2.1973434535104364, | |
| "grad_norm": 3.4294886589050293, | |
| "learning_rate": 2.3193359375e-05, | |
| "loss": 0.0729, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 2.2011385199240987, | |
| "grad_norm": 5.382150650024414, | |
| "learning_rate": 2.3144531250000002e-05, | |
| "loss": 0.1117, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 2.204933586337761, | |
| "grad_norm": 3.5237820148468018, | |
| "learning_rate": 2.3095703125e-05, | |
| "loss": 0.0674, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 2.2087286527514234, | |
| "grad_norm": 5.6236772537231445, | |
| "learning_rate": 2.3046875e-05, | |
| "loss": 0.0279, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 2.2125237191650853, | |
| "grad_norm": 1.1168630123138428, | |
| "learning_rate": 2.2998046875e-05, | |
| "loss": 0.0773, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 2.2163187855787476, | |
| "grad_norm": 1.0353121757507324, | |
| "learning_rate": 2.294921875e-05, | |
| "loss": 0.062, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 2.22011385199241, | |
| "grad_norm": 1.4820594787597656, | |
| "learning_rate": 2.2900390625000002e-05, | |
| "loss": 0.0778, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 2.2239089184060723, | |
| "grad_norm": 8.295422554016113, | |
| "learning_rate": 2.28515625e-05, | |
| "loss": 0.1192, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 2.227703984819734, | |
| "grad_norm": 1.5980597734451294, | |
| "learning_rate": 2.2802734375000003e-05, | |
| "loss": 0.0648, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 2.2314990512333965, | |
| "grad_norm": 0.2760424315929413, | |
| "learning_rate": 2.275390625e-05, | |
| "loss": 0.0722, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 2.235294117647059, | |
| "grad_norm": 0.8219416737556458, | |
| "learning_rate": 2.2705078125e-05, | |
| "loss": 0.0935, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 2.239089184060721, | |
| "grad_norm": 0.16338910162448883, | |
| "learning_rate": 2.2656250000000002e-05, | |
| "loss": 0.0876, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 2.242884250474383, | |
| "grad_norm": 0.5857824683189392, | |
| "learning_rate": 2.2607421875e-05, | |
| "loss": 0.117, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 2.2466793168880455, | |
| "grad_norm": 0.1616586148738861, | |
| "learning_rate": 2.2558593750000002e-05, | |
| "loss": 0.072, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 2.250474383301708, | |
| "grad_norm": 0.26469337940216064, | |
| "learning_rate": 2.2509765625e-05, | |
| "loss": 0.0902, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 2.25426944971537, | |
| "grad_norm": 3.576016426086426, | |
| "learning_rate": 2.24609375e-05, | |
| "loss": 0.1647, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 2.258064516129032, | |
| "grad_norm": 6.523315906524658, | |
| "learning_rate": 2.2412109375e-05, | |
| "loss": 0.0705, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 2.2618595825426944, | |
| "grad_norm": 4.0901689529418945, | |
| "learning_rate": 2.236328125e-05, | |
| "loss": 0.0786, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 2.2656546489563567, | |
| "grad_norm": 0.5081945061683655, | |
| "learning_rate": 2.2314453125000002e-05, | |
| "loss": 0.1158, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 2.269449715370019, | |
| "grad_norm": 0.10847347974777222, | |
| "learning_rate": 2.2265625e-05, | |
| "loss": 0.0825, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 2.2732447817836814, | |
| "grad_norm": 9.521303176879883, | |
| "learning_rate": 2.2216796875e-05, | |
| "loss": 0.0875, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 2.2770398481973433, | |
| "grad_norm": 6.0424580574035645, | |
| "learning_rate": 2.216796875e-05, | |
| "loss": 0.0994, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.2808349146110056, | |
| "grad_norm": 0.3634886145591736, | |
| "learning_rate": 2.2119140625e-05, | |
| "loss": 0.0813, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 2.284629981024668, | |
| "grad_norm": 1.929626703262329, | |
| "learning_rate": 2.2070312500000002e-05, | |
| "loss": 0.0705, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 2.2884250474383303, | |
| "grad_norm": 4.993653297424316, | |
| "learning_rate": 2.2021484375e-05, | |
| "loss": 0.0731, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 2.292220113851992, | |
| "grad_norm": 0.4869803190231323, | |
| "learning_rate": 2.1972656250000003e-05, | |
| "loss": 0.1123, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 2.2960151802656545, | |
| "grad_norm": 1.1776117086410522, | |
| "learning_rate": 2.1923828125e-05, | |
| "loss": 0.0643, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 2.299810246679317, | |
| "grad_norm": 1.7794570922851562, | |
| "learning_rate": 2.1875e-05, | |
| "loss": 0.0852, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 2.3036053130929792, | |
| "grad_norm": 2.7579660415649414, | |
| "learning_rate": 2.1826171875000002e-05, | |
| "loss": 0.0975, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 2.3074003795066416, | |
| "grad_norm": 2.9852662086486816, | |
| "learning_rate": 2.177734375e-05, | |
| "loss": 0.0724, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 2.3111954459203035, | |
| "grad_norm": 3.543381452560425, | |
| "learning_rate": 2.1728515625000002e-05, | |
| "loss": 0.1108, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 2.314990512333966, | |
| "grad_norm": 6.476046085357666, | |
| "learning_rate": 2.16796875e-05, | |
| "loss": 0.1231, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 2.318785578747628, | |
| "grad_norm": 3.2935097217559814, | |
| "learning_rate": 2.1630859375e-05, | |
| "loss": 0.1052, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 2.3225806451612905, | |
| "grad_norm": 1.1247642040252686, | |
| "learning_rate": 2.158203125e-05, | |
| "loss": 0.0817, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 2.3263757115749524, | |
| "grad_norm": 6.793920993804932, | |
| "learning_rate": 2.1533203125e-05, | |
| "loss": 0.0623, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 2.3301707779886147, | |
| "grad_norm": 0.12885475158691406, | |
| "learning_rate": 2.1484375000000002e-05, | |
| "loss": 0.0942, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 2.333965844402277, | |
| "grad_norm": 1.4963340759277344, | |
| "learning_rate": 2.1435546875e-05, | |
| "loss": 0.0549, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 2.3377609108159394, | |
| "grad_norm": 1.460093379020691, | |
| "learning_rate": 2.138671875e-05, | |
| "loss": 0.094, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 2.3415559772296017, | |
| "grad_norm": 4.440692901611328, | |
| "learning_rate": 2.1337890625e-05, | |
| "loss": 0.1673, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 2.3453510436432636, | |
| "grad_norm": 2.9689061641693115, | |
| "learning_rate": 2.12890625e-05, | |
| "loss": 0.0772, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 2.349146110056926, | |
| "grad_norm": 8.890856742858887, | |
| "learning_rate": 2.1240234375000002e-05, | |
| "loss": 0.0588, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 2.3529411764705883, | |
| "grad_norm": 0.12126415222883224, | |
| "learning_rate": 2.119140625e-05, | |
| "loss": 0.0624, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 2.3567362428842507, | |
| "grad_norm": 0.5167102217674255, | |
| "learning_rate": 2.1142578125000003e-05, | |
| "loss": 0.0732, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 2.3605313092979125, | |
| "grad_norm": 0.18846435844898224, | |
| "learning_rate": 2.109375e-05, | |
| "loss": 0.1007, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 2.364326375711575, | |
| "grad_norm": 1.9389616250991821, | |
| "learning_rate": 2.1044921875e-05, | |
| "loss": 0.0912, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 2.3681214421252372, | |
| "grad_norm": 5.2946457862854, | |
| "learning_rate": 2.0996093750000002e-05, | |
| "loss": 0.057, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 2.3719165085388996, | |
| "grad_norm": 0.13522082567214966, | |
| "learning_rate": 2.0947265625e-05, | |
| "loss": 0.0877, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 2.375711574952562, | |
| "grad_norm": 0.43759119510650635, | |
| "learning_rate": 2.0898437500000002e-05, | |
| "loss": 0.0791, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 2.379506641366224, | |
| "grad_norm": 4.369633197784424, | |
| "learning_rate": 2.0849609375e-05, | |
| "loss": 0.0793, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 2.383301707779886, | |
| "grad_norm": 3.1445748805999756, | |
| "learning_rate": 2.080078125e-05, | |
| "loss": 0.0994, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 2.3870967741935485, | |
| "grad_norm": 0.5459542274475098, | |
| "learning_rate": 2.0751953125e-05, | |
| "loss": 0.0493, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 2.3908918406072104, | |
| "grad_norm": 0.8807210326194763, | |
| "learning_rate": 2.0703125e-05, | |
| "loss": 0.0669, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 2.3946869070208727, | |
| "grad_norm": 2.931506872177124, | |
| "learning_rate": 2.0654296875000002e-05, | |
| "loss": 0.1014, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 2.398481973434535, | |
| "grad_norm": 1.1972861289978027, | |
| "learning_rate": 2.060546875e-05, | |
| "loss": 0.0643, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 2.4022770398481974, | |
| "grad_norm": 2.670483112335205, | |
| "learning_rate": 2.0556640625000003e-05, | |
| "loss": 0.0651, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 2.4060721062618597, | |
| "grad_norm": 2.790907382965088, | |
| "learning_rate": 2.05078125e-05, | |
| "loss": 0.0979, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 2.4098671726755216, | |
| "grad_norm": 1.7010408639907837, | |
| "learning_rate": 2.0458984375e-05, | |
| "loss": 0.0616, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 2.413662239089184, | |
| "grad_norm": 2.3590617179870605, | |
| "learning_rate": 2.0410156250000002e-05, | |
| "loss": 0.0877, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 2.4174573055028463, | |
| "grad_norm": 0.7550681829452515, | |
| "learning_rate": 2.0361328125e-05, | |
| "loss": 0.0351, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 2.4212523719165087, | |
| "grad_norm": 2.2927632331848145, | |
| "learning_rate": 2.0312500000000002e-05, | |
| "loss": 0.102, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 2.4250474383301706, | |
| "grad_norm": 8.239547729492188, | |
| "learning_rate": 2.0263671875e-05, | |
| "loss": 0.1315, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 2.428842504743833, | |
| "grad_norm": 0.12305755913257599, | |
| "learning_rate": 2.021484375e-05, | |
| "loss": 0.0508, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 2.4326375711574952, | |
| "grad_norm": 0.24204160273075104, | |
| "learning_rate": 2.0166015625e-05, | |
| "loss": 0.1154, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 2.4364326375711576, | |
| "grad_norm": 1.9680283069610596, | |
| "learning_rate": 2.01171875e-05, | |
| "loss": 0.0576, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 2.44022770398482, | |
| "grad_norm": 2.9172940254211426, | |
| "learning_rate": 2.0068359375000002e-05, | |
| "loss": 0.0457, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 2.444022770398482, | |
| "grad_norm": 4.63267707824707, | |
| "learning_rate": 2.001953125e-05, | |
| "loss": 0.0544, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 2.447817836812144, | |
| "grad_norm": 1.447266936302185, | |
| "learning_rate": 1.9970703125e-05, | |
| "loss": 0.0885, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 2.4516129032258065, | |
| "grad_norm": 2.839066505432129, | |
| "learning_rate": 1.9921875e-05, | |
| "loss": 0.1266, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 2.455407969639469, | |
| "grad_norm": 2.1036999225616455, | |
| "learning_rate": 1.9873046875e-05, | |
| "loss": 0.1107, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 2.4592030360531307, | |
| "grad_norm": 2.6435329914093018, | |
| "learning_rate": 1.9824218750000002e-05, | |
| "loss": 0.0539, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 2.462998102466793, | |
| "grad_norm": 0.2627769112586975, | |
| "learning_rate": 1.9775390625e-05, | |
| "loss": 0.0713, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 2.4667931688804554, | |
| "grad_norm": 3.5408475399017334, | |
| "learning_rate": 1.9726562500000003e-05, | |
| "loss": 0.1061, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 2.4705882352941178, | |
| "grad_norm": 2.456315279006958, | |
| "learning_rate": 1.9677734375e-05, | |
| "loss": 0.0782, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 2.47438330170778, | |
| "grad_norm": 5.217021942138672, | |
| "learning_rate": 1.962890625e-05, | |
| "loss": 0.1009, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 2.478178368121442, | |
| "grad_norm": 4.218019962310791, | |
| "learning_rate": 1.9580078125000002e-05, | |
| "loss": 0.0663, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 2.4819734345351043, | |
| "grad_norm": 2.7066123485565186, | |
| "learning_rate": 1.953125e-05, | |
| "loss": 0.0891, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 2.4857685009487667, | |
| "grad_norm": 0.1062941625714302, | |
| "learning_rate": 1.9482421875000002e-05, | |
| "loss": 0.1085, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 2.489563567362429, | |
| "grad_norm": 5.984579086303711, | |
| "learning_rate": 1.943359375e-05, | |
| "loss": 0.092, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 2.493358633776091, | |
| "grad_norm": 0.7308592796325684, | |
| "learning_rate": 1.9384765625e-05, | |
| "loss": 0.072, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 2.4971537001897532, | |
| "grad_norm": 0.8086015582084656, | |
| "learning_rate": 1.93359375e-05, | |
| "loss": 0.1052, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 2.5009487666034156, | |
| "grad_norm": 1.8991528749465942, | |
| "learning_rate": 1.9287109375e-05, | |
| "loss": 0.0737, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 2.504743833017078, | |
| "grad_norm": 6.63985013961792, | |
| "learning_rate": 1.9238281250000002e-05, | |
| "loss": 0.1096, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 2.5085388994307403, | |
| "grad_norm": 0.17855627834796906, | |
| "learning_rate": 1.9189453125e-05, | |
| "loss": 0.0624, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 2.512333965844402, | |
| "grad_norm": 4.877336502075195, | |
| "learning_rate": 1.9140625e-05, | |
| "loss": 0.1211, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 2.5161290322580645, | |
| "grad_norm": 0.27590852975845337, | |
| "learning_rate": 1.9091796875e-05, | |
| "loss": 0.0521, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 2.519924098671727, | |
| "grad_norm": 0.45393088459968567, | |
| "learning_rate": 1.904296875e-05, | |
| "loss": 0.0707, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 2.5237191650853887, | |
| "grad_norm": 2.1049611568450928, | |
| "learning_rate": 1.8994140625000002e-05, | |
| "loss": 0.1105, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 2.527514231499051, | |
| "grad_norm": 1.805330753326416, | |
| "learning_rate": 1.89453125e-05, | |
| "loss": 0.068, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 2.5313092979127134, | |
| "grad_norm": 1.1227184534072876, | |
| "learning_rate": 1.8896484375000003e-05, | |
| "loss": 0.0572, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 2.5351043643263758, | |
| "grad_norm": 2.483306646347046, | |
| "learning_rate": 1.884765625e-05, | |
| "loss": 0.1095, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 2.538899430740038, | |
| "grad_norm": 0.1452198177576065, | |
| "learning_rate": 1.8798828125e-05, | |
| "loss": 0.0401, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 2.5426944971537004, | |
| "grad_norm": 0.14945687353610992, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.0796, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 2.5464895635673623, | |
| "grad_norm": 1.3936477899551392, | |
| "learning_rate": 1.8701171875e-05, | |
| "loss": 0.0688, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 2.5502846299810247, | |
| "grad_norm": 0.16819104552268982, | |
| "learning_rate": 1.8652343750000002e-05, | |
| "loss": 0.0454, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 2.554079696394687, | |
| "grad_norm": 1.2239612340927124, | |
| "learning_rate": 1.8603515625e-05, | |
| "loss": 0.0588, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 2.557874762808349, | |
| "grad_norm": 7.471010684967041, | |
| "learning_rate": 1.85546875e-05, | |
| "loss": 0.0528, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 2.5616698292220113, | |
| "grad_norm": 4.900544166564941, | |
| "learning_rate": 1.8505859375e-05, | |
| "loss": 0.0858, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 2.5654648956356736, | |
| "grad_norm": 3.8821702003479004, | |
| "learning_rate": 1.845703125e-05, | |
| "loss": 0.046, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 2.569259962049336, | |
| "grad_norm": 0.17730577290058136, | |
| "learning_rate": 1.8408203125000002e-05, | |
| "loss": 0.0673, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 2.5730550284629983, | |
| "grad_norm": 3.4757065773010254, | |
| "learning_rate": 1.8359375e-05, | |
| "loss": 0.094, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 2.5768500948766606, | |
| "grad_norm": 3.2091782093048096, | |
| "learning_rate": 1.8310546875e-05, | |
| "loss": 0.08, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 2.5806451612903225, | |
| "grad_norm": 5.548855304718018, | |
| "learning_rate": 1.826171875e-05, | |
| "loss": 0.0996, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 2.584440227703985, | |
| "grad_norm": 0.17017248272895813, | |
| "learning_rate": 1.8212890625e-05, | |
| "loss": 0.0828, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 2.588235294117647, | |
| "grad_norm": 9.512433052062988, | |
| "learning_rate": 1.8164062500000002e-05, | |
| "loss": 0.0696, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 2.592030360531309, | |
| "grad_norm": 0.9737806916236877, | |
| "learning_rate": 1.8115234375e-05, | |
| "loss": 0.0881, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 2.5958254269449714, | |
| "grad_norm": 7.027744293212891, | |
| "learning_rate": 1.8066406250000002e-05, | |
| "loss": 0.06, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 2.5996204933586338, | |
| "grad_norm": 2.162301778793335, | |
| "learning_rate": 1.8017578125e-05, | |
| "loss": 0.0833, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 2.603415559772296, | |
| "grad_norm": 0.30585893988609314, | |
| "learning_rate": 1.796875e-05, | |
| "loss": 0.0794, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 2.6072106261859584, | |
| "grad_norm": 0.22574108839035034, | |
| "learning_rate": 1.7919921875e-05, | |
| "loss": 0.0965, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 2.6110056925996203, | |
| "grad_norm": 0.6627634763717651, | |
| "learning_rate": 1.787109375e-05, | |
| "loss": 0.0622, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 2.6148007590132827, | |
| "grad_norm": 0.17045138776302338, | |
| "learning_rate": 1.7822265625000002e-05, | |
| "loss": 0.0471, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 2.618595825426945, | |
| "grad_norm": 0.31901392340660095, | |
| "learning_rate": 1.77734375e-05, | |
| "loss": 0.0607, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 2.6223908918406074, | |
| "grad_norm": 0.21171316504478455, | |
| "learning_rate": 1.7724609375e-05, | |
| "loss": 0.0789, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 2.6261859582542693, | |
| "grad_norm": 0.8109591007232666, | |
| "learning_rate": 1.767578125e-05, | |
| "loss": 0.0973, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 2.6299810246679316, | |
| "grad_norm": 2.583545446395874, | |
| "learning_rate": 1.7626953125e-05, | |
| "loss": 0.0512, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 2.633776091081594, | |
| "grad_norm": 1.5937598943710327, | |
| "learning_rate": 1.7578125000000002e-05, | |
| "loss": 0.0861, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 2.6375711574952563, | |
| "grad_norm": 1.3143688440322876, | |
| "learning_rate": 1.7529296875e-05, | |
| "loss": 0.098, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 2.6413662239089186, | |
| "grad_norm": 2.390667676925659, | |
| "learning_rate": 1.748046875e-05, | |
| "loss": 0.0621, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 2.6451612903225805, | |
| "grad_norm": 0.30924805998802185, | |
| "learning_rate": 1.7431640625e-05, | |
| "loss": 0.0807, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 2.648956356736243, | |
| "grad_norm": 1.6821314096450806, | |
| "learning_rate": 1.73828125e-05, | |
| "loss": 0.0598, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 2.652751423149905, | |
| "grad_norm": 1.8624871969223022, | |
| "learning_rate": 1.7333984375000002e-05, | |
| "loss": 0.0841, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 2.656546489563567, | |
| "grad_norm": 1.0055333375930786, | |
| "learning_rate": 1.728515625e-05, | |
| "loss": 0.0853, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 2.6603415559772294, | |
| "grad_norm": 0.11686267703771591, | |
| "learning_rate": 1.7236328125000002e-05, | |
| "loss": 0.0455, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 2.6641366223908918, | |
| "grad_norm": 5.000795841217041, | |
| "learning_rate": 1.71875e-05, | |
| "loss": 0.1102, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 2.667931688804554, | |
| "grad_norm": 5.362839221954346, | |
| "learning_rate": 1.7138671875e-05, | |
| "loss": 0.0864, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 2.6717267552182165, | |
| "grad_norm": 4.031505584716797, | |
| "learning_rate": 1.708984375e-05, | |
| "loss": 0.0753, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 2.675521821631879, | |
| "grad_norm": 3.553187608718872, | |
| "learning_rate": 1.7041015625e-05, | |
| "loss": 0.0802, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 2.6793168880455407, | |
| "grad_norm": 2.1504125595092773, | |
| "learning_rate": 1.6992187500000002e-05, | |
| "loss": 0.0798, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 2.683111954459203, | |
| "grad_norm": 0.17360809445381165, | |
| "learning_rate": 1.6943359375e-05, | |
| "loss": 0.1064, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 2.6869070208728654, | |
| "grad_norm": 0.16311465203762054, | |
| "learning_rate": 1.689453125e-05, | |
| "loss": 0.1194, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 2.6907020872865273, | |
| "grad_norm": 3.6088805198669434, | |
| "learning_rate": 1.6845703125e-05, | |
| "loss": 0.0586, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 2.6944971537001896, | |
| "grad_norm": 5.143406867980957, | |
| "learning_rate": 1.6796875e-05, | |
| "loss": 0.0892, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 2.698292220113852, | |
| "grad_norm": 27.002168655395508, | |
| "learning_rate": 1.6748046875000002e-05, | |
| "loss": 0.089, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 2.7020872865275143, | |
| "grad_norm": 1.443231225013733, | |
| "learning_rate": 1.669921875e-05, | |
| "loss": 0.1328, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 2.7058823529411766, | |
| "grad_norm": 7.007279396057129, | |
| "learning_rate": 1.6650390625e-05, | |
| "loss": 0.0652, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 2.709677419354839, | |
| "grad_norm": 0.25469958782196045, | |
| "learning_rate": 1.66015625e-05, | |
| "loss": 0.045, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 2.713472485768501, | |
| "grad_norm": 4.693950653076172, | |
| "learning_rate": 1.6552734375e-05, | |
| "loss": 0.1245, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 2.717267552182163, | |
| "grad_norm": 0.3287486732006073, | |
| "learning_rate": 1.6503906250000002e-05, | |
| "loss": 0.068, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 2.7210626185958255, | |
| "grad_norm": 9.82812786102295, | |
| "learning_rate": 1.6455078125e-05, | |
| "loss": 0.0909, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 2.7248576850094874, | |
| "grad_norm": 14.501320838928223, | |
| "learning_rate": 1.6406250000000002e-05, | |
| "loss": 0.0972, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 2.72865275142315, | |
| "grad_norm": 5.130281448364258, | |
| "learning_rate": 1.6357421875e-05, | |
| "loss": 0.1253, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 2.732447817836812, | |
| "grad_norm": 3.5541763305664062, | |
| "learning_rate": 1.630859375e-05, | |
| "loss": 0.0822, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 2.7362428842504745, | |
| "grad_norm": 0.9670690894126892, | |
| "learning_rate": 1.6259765625e-05, | |
| "loss": 0.0231, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 2.740037950664137, | |
| "grad_norm": 0.676513135433197, | |
| "learning_rate": 1.62109375e-05, | |
| "loss": 0.0972, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 2.7438330170777987, | |
| "grad_norm": 7.5943217277526855, | |
| "learning_rate": 1.6162109375000002e-05, | |
| "loss": 0.0989, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 2.747628083491461, | |
| "grad_norm": 0.20399871468544006, | |
| "learning_rate": 1.611328125e-05, | |
| "loss": 0.1036, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 2.7514231499051234, | |
| "grad_norm": 0.43629199266433716, | |
| "learning_rate": 1.6064453125e-05, | |
| "loss": 0.0311, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 2.7552182163187857, | |
| "grad_norm": 1.144394040107727, | |
| "learning_rate": 1.6015625e-05, | |
| "loss": 0.0815, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 2.7590132827324476, | |
| "grad_norm": 0.06812827289104462, | |
| "learning_rate": 1.5966796875e-05, | |
| "loss": 0.0539, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 2.76280834914611, | |
| "grad_norm": 2.913031578063965, | |
| "learning_rate": 1.5917968750000002e-05, | |
| "loss": 0.0443, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 2.7666034155597723, | |
| "grad_norm": 2.4026944637298584, | |
| "learning_rate": 1.5869140625e-05, | |
| "loss": 0.0957, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 2.7703984819734346, | |
| "grad_norm": 3.89658784866333, | |
| "learning_rate": 1.58203125e-05, | |
| "loss": 0.1125, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 2.774193548387097, | |
| "grad_norm": 0.4522351920604706, | |
| "learning_rate": 1.5771484375e-05, | |
| "loss": 0.0889, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 2.777988614800759, | |
| "grad_norm": 5.769268989562988, | |
| "learning_rate": 1.572265625e-05, | |
| "loss": 0.0631, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 2.781783681214421, | |
| "grad_norm": 1.7276089191436768, | |
| "learning_rate": 1.5673828125000002e-05, | |
| "loss": 0.091, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 2.7855787476280836, | |
| "grad_norm": 2.0759644508361816, | |
| "learning_rate": 1.5625e-05, | |
| "loss": 0.0655, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 2.789373814041746, | |
| "grad_norm": 0.7582204937934875, | |
| "learning_rate": 1.5576171875000002e-05, | |
| "loss": 0.0541, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 2.793168880455408, | |
| "grad_norm": 16.55638885498047, | |
| "learning_rate": 1.552734375e-05, | |
| "loss": 0.1178, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 2.79696394686907, | |
| "grad_norm": 0.7026536464691162, | |
| "learning_rate": 1.5478515625e-05, | |
| "loss": 0.0459, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 2.8007590132827325, | |
| "grad_norm": 4.089038372039795, | |
| "learning_rate": 1.54296875e-05, | |
| "loss": 0.0663, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 2.804554079696395, | |
| "grad_norm": 3.8286547660827637, | |
| "learning_rate": 1.5380859375e-05, | |
| "loss": 0.1096, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 2.808349146110057, | |
| "grad_norm": 2.5993642807006836, | |
| "learning_rate": 1.5332031250000002e-05, | |
| "loss": 0.0685, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 2.812144212523719, | |
| "grad_norm": 1.0880334377288818, | |
| "learning_rate": 1.5283203125e-05, | |
| "loss": 0.0631, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 2.8159392789373814, | |
| "grad_norm": 1.036834478378296, | |
| "learning_rate": 1.5234375000000001e-05, | |
| "loss": 0.086, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 2.8197343453510437, | |
| "grad_norm": 5.436180114746094, | |
| "learning_rate": 1.5185546875000001e-05, | |
| "loss": 0.1121, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 2.8235294117647056, | |
| "grad_norm": 3.7009427547454834, | |
| "learning_rate": 1.513671875e-05, | |
| "loss": 0.0764, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 2.827324478178368, | |
| "grad_norm": 2.5197298526763916, | |
| "learning_rate": 1.5087890625e-05, | |
| "loss": 0.082, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 2.8311195445920303, | |
| "grad_norm": 3.15004563331604, | |
| "learning_rate": 1.50390625e-05, | |
| "loss": 0.112, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 2.8349146110056926, | |
| "grad_norm": 2.9666614532470703, | |
| "learning_rate": 1.4990234375e-05, | |
| "loss": 0.0872, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 2.838709677419355, | |
| "grad_norm": 6.0326385498046875, | |
| "learning_rate": 1.4941406250000001e-05, | |
| "loss": 0.0817, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 2.8425047438330173, | |
| "grad_norm": 1.699873685836792, | |
| "learning_rate": 1.4892578125000001e-05, | |
| "loss": 0.0816, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 2.846299810246679, | |
| "grad_norm": 0.14119946956634521, | |
| "learning_rate": 1.484375e-05, | |
| "loss": 0.0725, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 2.8500948766603416, | |
| "grad_norm": 6.737262725830078, | |
| "learning_rate": 1.4794921875e-05, | |
| "loss": 0.1205, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 2.853889943074004, | |
| "grad_norm": 4.460575103759766, | |
| "learning_rate": 1.474609375e-05, | |
| "loss": 0.123, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 2.857685009487666, | |
| "grad_norm": 0.09714975953102112, | |
| "learning_rate": 1.4697265625000001e-05, | |
| "loss": 0.0687, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 2.861480075901328, | |
| "grad_norm": 3.972470760345459, | |
| "learning_rate": 1.4648437500000001e-05, | |
| "loss": 0.1089, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 2.8652751423149905, | |
| "grad_norm": 2.0776712894439697, | |
| "learning_rate": 1.4599609375000001e-05, | |
| "loss": 0.1318, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 2.869070208728653, | |
| "grad_norm": 0.21448436379432678, | |
| "learning_rate": 1.455078125e-05, | |
| "loss": 0.0639, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 2.872865275142315, | |
| "grad_norm": 0.19727276265621185, | |
| "learning_rate": 1.4501953125e-05, | |
| "loss": 0.0464, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 2.8766603415559775, | |
| "grad_norm": 2.9958267211914062, | |
| "learning_rate": 1.4453125e-05, | |
| "loss": 0.0715, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 2.8804554079696394, | |
| "grad_norm": 1.823538064956665, | |
| "learning_rate": 1.4404296875000001e-05, | |
| "loss": 0.0781, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 2.8842504743833017, | |
| "grad_norm": 2.5351407527923584, | |
| "learning_rate": 1.4355468750000001e-05, | |
| "loss": 0.0888, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 2.888045540796964, | |
| "grad_norm": 4.274851322174072, | |
| "learning_rate": 1.4306640625000002e-05, | |
| "loss": 0.0228, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 2.891840607210626, | |
| "grad_norm": 4.665604591369629, | |
| "learning_rate": 1.42578125e-05, | |
| "loss": 0.083, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 2.8956356736242883, | |
| "grad_norm": 4.373048782348633, | |
| "learning_rate": 1.4208984375e-05, | |
| "loss": 0.0936, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 2.8994307400379506, | |
| "grad_norm": 1.5743074417114258, | |
| "learning_rate": 1.416015625e-05, | |
| "loss": 0.0414, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 2.903225806451613, | |
| "grad_norm": 2.3043341636657715, | |
| "learning_rate": 1.4111328125000001e-05, | |
| "loss": 0.0739, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 2.9070208728652753, | |
| "grad_norm": 2.980686902999878, | |
| "learning_rate": 1.4062500000000001e-05, | |
| "loss": 0.0755, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 2.9108159392789372, | |
| "grad_norm": 0.5928072929382324, | |
| "learning_rate": 1.4013671875e-05, | |
| "loss": 0.116, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 2.9146110056925996, | |
| "grad_norm": 0.14647921919822693, | |
| "learning_rate": 1.396484375e-05, | |
| "loss": 0.0367, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 2.918406072106262, | |
| "grad_norm": 6.466022968292236, | |
| "learning_rate": 1.3916015625e-05, | |
| "loss": 0.0365, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 2.9222011385199242, | |
| "grad_norm": 13.139077186584473, | |
| "learning_rate": 1.38671875e-05, | |
| "loss": 0.1295, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 2.925996204933586, | |
| "grad_norm": 0.3945586383342743, | |
| "learning_rate": 1.3818359375000001e-05, | |
| "loss": 0.0559, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 2.9297912713472485, | |
| "grad_norm": 0.04980861395597458, | |
| "learning_rate": 1.3769531250000001e-05, | |
| "loss": 0.0485, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 2.933586337760911, | |
| "grad_norm": 2.388545513153076, | |
| "learning_rate": 1.3720703125e-05, | |
| "loss": 0.0542, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 2.937381404174573, | |
| "grad_norm": 2.4082882404327393, | |
| "learning_rate": 1.3671875e-05, | |
| "loss": 0.0939, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 2.9411764705882355, | |
| "grad_norm": 4.933741569519043, | |
| "learning_rate": 1.3623046875e-05, | |
| "loss": 0.1409, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 2.9449715370018974, | |
| "grad_norm": 5.57550573348999, | |
| "learning_rate": 1.3574218750000001e-05, | |
| "loss": 0.0646, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 2.9487666034155597, | |
| "grad_norm": 1.8403911590576172, | |
| "learning_rate": 1.3525390625000001e-05, | |
| "loss": 0.0694, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 2.952561669829222, | |
| "grad_norm": 6.1294331550598145, | |
| "learning_rate": 1.3476562500000001e-05, | |
| "loss": 0.0476, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 2.956356736242884, | |
| "grad_norm": 0.0652192234992981, | |
| "learning_rate": 1.3427734375e-05, | |
| "loss": 0.0634, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 2.9601518026565463, | |
| "grad_norm": 2.2705845832824707, | |
| "learning_rate": 1.337890625e-05, | |
| "loss": 0.0577, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 2.9639468690702087, | |
| "grad_norm": 0.12686532735824585, | |
| "learning_rate": 1.3330078125e-05, | |
| "loss": 0.0948, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 2.967741935483871, | |
| "grad_norm": 3.2810075283050537, | |
| "learning_rate": 1.3281250000000001e-05, | |
| "loss": 0.0813, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 2.9715370018975333, | |
| "grad_norm": 2.2181339263916016, | |
| "learning_rate": 1.3232421875000001e-05, | |
| "loss": 0.1022, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 2.9753320683111957, | |
| "grad_norm": 1.6737946271896362, | |
| "learning_rate": 1.318359375e-05, | |
| "loss": 0.0557, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 2.9791271347248576, | |
| "grad_norm": 7.780960559844971, | |
| "learning_rate": 1.3134765625e-05, | |
| "loss": 0.0978, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 2.98292220113852, | |
| "grad_norm": 8.983189582824707, | |
| "learning_rate": 1.30859375e-05, | |
| "loss": 0.0601, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 2.9867172675521823, | |
| "grad_norm": 4.744899272918701, | |
| "learning_rate": 1.3037109375e-05, | |
| "loss": 0.0418, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 2.990512333965844, | |
| "grad_norm": 2.1875483989715576, | |
| "learning_rate": 1.2988281250000001e-05, | |
| "loss": 0.0746, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 2.9943074003795065, | |
| "grad_norm": 1.506842017173767, | |
| "learning_rate": 1.2939453125000001e-05, | |
| "loss": 0.0868, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 2.998102466793169, | |
| "grad_norm": 2.1302731037139893, | |
| "learning_rate": 1.2890625e-05, | |
| "loss": 0.0687, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 3.001897533206831, | |
| "grad_norm": 2.632828950881958, | |
| "learning_rate": 1.2841796875e-05, | |
| "loss": 0.0705, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 3.0056925996204935, | |
| "grad_norm": 0.15800461173057556, | |
| "learning_rate": 1.279296875e-05, | |
| "loss": 0.0522, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 3.0094876660341554, | |
| "grad_norm": 0.13846412301063538, | |
| "learning_rate": 1.2744140625e-05, | |
| "loss": 0.0363, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 3.0132827324478177, | |
| "grad_norm": 4.117944717407227, | |
| "learning_rate": 1.2695312500000001e-05, | |
| "loss": 0.0605, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 3.01707779886148, | |
| "grad_norm": 1.4927798509597778, | |
| "learning_rate": 1.2646484375000001e-05, | |
| "loss": 0.0346, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 3.0208728652751424, | |
| "grad_norm": 4.367966175079346, | |
| "learning_rate": 1.259765625e-05, | |
| "loss": 0.0458, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 3.0246679316888048, | |
| "grad_norm": 2.0026087760925293, | |
| "learning_rate": 1.2548828125e-05, | |
| "loss": 0.0749, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 3.0284629981024667, | |
| "grad_norm": 2.106546640396118, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.065, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 3.032258064516129, | |
| "grad_norm": 4.122467994689941, | |
| "learning_rate": 1.2451171875000001e-05, | |
| "loss": 0.0475, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 3.0360531309297913, | |
| "grad_norm": 0.08205808699131012, | |
| "learning_rate": 1.2402343750000001e-05, | |
| "loss": 0.0692, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.0398481973434537, | |
| "grad_norm": 1.0389831066131592, | |
| "learning_rate": 1.2353515625e-05, | |
| "loss": 0.0514, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 3.0436432637571156, | |
| "grad_norm": 0.1080293357372284, | |
| "learning_rate": 1.23046875e-05, | |
| "loss": 0.0385, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 3.047438330170778, | |
| "grad_norm": 0.2515338361263275, | |
| "learning_rate": 1.2255859375e-05, | |
| "loss": 0.0835, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 3.0512333965844403, | |
| "grad_norm": 1.1087881326675415, | |
| "learning_rate": 1.220703125e-05, | |
| "loss": 0.0559, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 3.0550284629981026, | |
| "grad_norm": 1.1088217496871948, | |
| "learning_rate": 1.2158203125000001e-05, | |
| "loss": 0.075, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 3.0588235294117645, | |
| "grad_norm": 3.310959577560425, | |
| "learning_rate": 1.2109375000000001e-05, | |
| "loss": 0.0596, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 3.062618595825427, | |
| "grad_norm": 1.186274766921997, | |
| "learning_rate": 1.2060546875e-05, | |
| "loss": 0.0399, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 3.066413662239089, | |
| "grad_norm": 3.054225444793701, | |
| "learning_rate": 1.201171875e-05, | |
| "loss": 0.0352, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 3.0702087286527515, | |
| "grad_norm": 0.3610187768936157, | |
| "learning_rate": 1.1962890625e-05, | |
| "loss": 0.0519, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 3.074003795066414, | |
| "grad_norm": 1.7858855724334717, | |
| "learning_rate": 1.19140625e-05, | |
| "loss": 0.0712, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 3.0777988614800758, | |
| "grad_norm": 3.144697666168213, | |
| "learning_rate": 1.1865234375000001e-05, | |
| "loss": 0.0343, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 3.081593927893738, | |
| "grad_norm": 1.743668556213379, | |
| "learning_rate": 1.1816406250000001e-05, | |
| "loss": 0.0611, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 3.0853889943074004, | |
| "grad_norm": 0.6149533987045288, | |
| "learning_rate": 1.1767578125e-05, | |
| "loss": 0.0512, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 3.0891840607210628, | |
| "grad_norm": 6.247795581817627, | |
| "learning_rate": 1.171875e-05, | |
| "loss": 0.0741, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 3.0929791271347247, | |
| "grad_norm": 0.8566815853118896, | |
| "learning_rate": 1.1669921875e-05, | |
| "loss": 0.0699, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 3.096774193548387, | |
| "grad_norm": 3.2794229984283447, | |
| "learning_rate": 1.162109375e-05, | |
| "loss": 0.0296, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 3.1005692599620494, | |
| "grad_norm": 0.10005365312099457, | |
| "learning_rate": 1.1572265625000001e-05, | |
| "loss": 0.0645, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 3.1043643263757117, | |
| "grad_norm": 2.8992691040039062, | |
| "learning_rate": 1.15234375e-05, | |
| "loss": 0.0456, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 3.108159392789374, | |
| "grad_norm": 3.6778674125671387, | |
| "learning_rate": 1.1474609375e-05, | |
| "loss": 0.0351, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 3.111954459203036, | |
| "grad_norm": 1.5398664474487305, | |
| "learning_rate": 1.142578125e-05, | |
| "loss": 0.042, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 3.1157495256166983, | |
| "grad_norm": 0.05135444924235344, | |
| "learning_rate": 1.1376953125e-05, | |
| "loss": 0.0478, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 3.1195445920303606, | |
| "grad_norm": 0.6804483532905579, | |
| "learning_rate": 1.1328125000000001e-05, | |
| "loss": 0.058, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 3.123339658444023, | |
| "grad_norm": 0.10011663287878036, | |
| "learning_rate": 1.1279296875000001e-05, | |
| "loss": 0.0456, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 3.127134724857685, | |
| "grad_norm": 0.466981440782547, | |
| "learning_rate": 1.123046875e-05, | |
| "loss": 0.0449, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 3.130929791271347, | |
| "grad_norm": 2.163849353790283, | |
| "learning_rate": 1.1181640625e-05, | |
| "loss": 0.0595, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 3.1347248576850095, | |
| "grad_norm": 1.1013680696487427, | |
| "learning_rate": 1.11328125e-05, | |
| "loss": 0.0708, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 3.138519924098672, | |
| "grad_norm": 8.969820022583008, | |
| "learning_rate": 1.1083984375e-05, | |
| "loss": 0.064, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 3.1423149905123338, | |
| "grad_norm": 1.1106621026992798, | |
| "learning_rate": 1.1035156250000001e-05, | |
| "loss": 0.1007, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 3.146110056925996, | |
| "grad_norm": 0.1508377343416214, | |
| "learning_rate": 1.0986328125000001e-05, | |
| "loss": 0.0464, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 3.1499051233396584, | |
| "grad_norm": 0.07330877333879471, | |
| "learning_rate": 1.09375e-05, | |
| "loss": 0.0797, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 3.153700189753321, | |
| "grad_norm": 1.6159915924072266, | |
| "learning_rate": 1.0888671875e-05, | |
| "loss": 0.0527, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 3.157495256166983, | |
| "grad_norm": 0.5196408629417419, | |
| "learning_rate": 1.083984375e-05, | |
| "loss": 0.0433, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 3.161290322580645, | |
| "grad_norm": 2.486041307449341, | |
| "learning_rate": 1.0791015625e-05, | |
| "loss": 0.0651, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 3.1650853889943074, | |
| "grad_norm": 1.0713788270950317, | |
| "learning_rate": 1.0742187500000001e-05, | |
| "loss": 0.0695, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 3.1688804554079697, | |
| "grad_norm": 0.19154168665409088, | |
| "learning_rate": 1.0693359375e-05, | |
| "loss": 0.0364, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 3.172675521821632, | |
| "grad_norm": 0.31223466992378235, | |
| "learning_rate": 1.064453125e-05, | |
| "loss": 0.0267, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 3.176470588235294, | |
| "grad_norm": 0.7767817378044128, | |
| "learning_rate": 1.0595703125e-05, | |
| "loss": 0.0635, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 3.1802656546489563, | |
| "grad_norm": 2.4257445335388184, | |
| "learning_rate": 1.0546875e-05, | |
| "loss": 0.0588, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 3.1840607210626186, | |
| "grad_norm": 1.2349954843521118, | |
| "learning_rate": 1.0498046875000001e-05, | |
| "loss": 0.0557, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 3.187855787476281, | |
| "grad_norm": 3.209284543991089, | |
| "learning_rate": 1.0449218750000001e-05, | |
| "loss": 0.0459, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 3.191650853889943, | |
| "grad_norm": 0.16265904903411865, | |
| "learning_rate": 1.0400390625e-05, | |
| "loss": 0.0525, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 3.195445920303605, | |
| "grad_norm": 0.6664568781852722, | |
| "learning_rate": 1.03515625e-05, | |
| "loss": 0.0727, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 3.1992409867172675, | |
| "grad_norm": 0.9481377005577087, | |
| "learning_rate": 1.0302734375e-05, | |
| "loss": 0.0215, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 3.20303605313093, | |
| "grad_norm": 5.600297451019287, | |
| "learning_rate": 1.025390625e-05, | |
| "loss": 0.0385, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 3.206831119544592, | |
| "grad_norm": 0.15000663697719574, | |
| "learning_rate": 1.0205078125000001e-05, | |
| "loss": 0.0659, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 3.210626185958254, | |
| "grad_norm": 0.6691407561302185, | |
| "learning_rate": 1.0156250000000001e-05, | |
| "loss": 0.0666, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 3.2144212523719164, | |
| "grad_norm": 1.3882899284362793, | |
| "learning_rate": 1.0107421875e-05, | |
| "loss": 0.0815, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 3.218216318785579, | |
| "grad_norm": 1.0314580202102661, | |
| "learning_rate": 1.005859375e-05, | |
| "loss": 0.0178, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 3.222011385199241, | |
| "grad_norm": 3.9537134170532227, | |
| "learning_rate": 1.0009765625e-05, | |
| "loss": 0.0631, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 3.225806451612903, | |
| "grad_norm": 5.446588039398193, | |
| "learning_rate": 9.9609375e-06, | |
| "loss": 0.0548, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 3.2296015180265654, | |
| "grad_norm": 8.026607513427734, | |
| "learning_rate": 9.912109375000001e-06, | |
| "loss": 0.0353, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 3.2333965844402277, | |
| "grad_norm": 0.1389143019914627, | |
| "learning_rate": 9.863281250000001e-06, | |
| "loss": 0.0419, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 3.23719165085389, | |
| "grad_norm": 1.255216121673584, | |
| "learning_rate": 9.814453125e-06, | |
| "loss": 0.0697, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 3.2409867172675524, | |
| "grad_norm": 4.600146770477295, | |
| "learning_rate": 9.765625e-06, | |
| "loss": 0.0879, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 3.2447817836812143, | |
| "grad_norm": 0.09613824635744095, | |
| "learning_rate": 9.716796875e-06, | |
| "loss": 0.0122, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 3.2485768500948766, | |
| "grad_norm": 1.0265446901321411, | |
| "learning_rate": 9.66796875e-06, | |
| "loss": 0.0227, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 3.252371916508539, | |
| "grad_norm": 2.185931444168091, | |
| "learning_rate": 9.619140625000001e-06, | |
| "loss": 0.1162, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 3.2561669829222013, | |
| "grad_norm": 0.1482323259115219, | |
| "learning_rate": 9.5703125e-06, | |
| "loss": 0.0581, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 3.259962049335863, | |
| "grad_norm": 0.17460452020168304, | |
| "learning_rate": 9.521484375e-06, | |
| "loss": 0.0399, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 3.2637571157495255, | |
| "grad_norm": 1.6274187564849854, | |
| "learning_rate": 9.47265625e-06, | |
| "loss": 0.0537, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 3.267552182163188, | |
| "grad_norm": 8.227033615112305, | |
| "learning_rate": 9.423828125e-06, | |
| "loss": 0.0646, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 3.27134724857685, | |
| "grad_norm": 0.08734069019556046, | |
| "learning_rate": 9.375000000000001e-06, | |
| "loss": 0.0675, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 3.2751423149905126, | |
| "grad_norm": 0.5700662732124329, | |
| "learning_rate": 9.326171875000001e-06, | |
| "loss": 0.0744, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 3.2789373814041745, | |
| "grad_norm": 2.089008092880249, | |
| "learning_rate": 9.27734375e-06, | |
| "loss": 0.0812, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 3.282732447817837, | |
| "grad_norm": 0.11990799009799957, | |
| "learning_rate": 9.228515625e-06, | |
| "loss": 0.071, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 3.286527514231499, | |
| "grad_norm": 0.5663464665412903, | |
| "learning_rate": 9.1796875e-06, | |
| "loss": 0.0279, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 3.2903225806451615, | |
| "grad_norm": 0.8847103118896484, | |
| "learning_rate": 9.130859375e-06, | |
| "loss": 0.0473, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 3.2941176470588234, | |
| "grad_norm": 0.08891147375106812, | |
| "learning_rate": 9.082031250000001e-06, | |
| "loss": 0.041, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 3.2979127134724857, | |
| "grad_norm": 0.0875004231929779, | |
| "learning_rate": 9.033203125000001e-06, | |
| "loss": 0.0284, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 3.301707779886148, | |
| "grad_norm": 0.353773832321167, | |
| "learning_rate": 8.984375e-06, | |
| "loss": 0.0451, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 3.3055028462998104, | |
| "grad_norm": 0.03987530991435051, | |
| "learning_rate": 8.935546875e-06, | |
| "loss": 0.0803, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 3.3092979127134727, | |
| "grad_norm": 2.087677001953125, | |
| "learning_rate": 8.88671875e-06, | |
| "loss": 0.0257, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 3.3130929791271346, | |
| "grad_norm": 4.051992893218994, | |
| "learning_rate": 8.837890625e-06, | |
| "loss": 0.0345, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 3.316888045540797, | |
| "grad_norm": 3.694368362426758, | |
| "learning_rate": 8.789062500000001e-06, | |
| "loss": 0.0824, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 3.3206831119544593, | |
| "grad_norm": 0.09131748974323273, | |
| "learning_rate": 8.740234375e-06, | |
| "loss": 0.0295, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 3.324478178368121, | |
| "grad_norm": 0.05908443033695221, | |
| "learning_rate": 8.69140625e-06, | |
| "loss": 0.0282, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 3.3282732447817835, | |
| "grad_norm": 1.863980770111084, | |
| "learning_rate": 8.642578125e-06, | |
| "loss": 0.0442, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 3.332068311195446, | |
| "grad_norm": 1.2207703590393066, | |
| "learning_rate": 8.59375e-06, | |
| "loss": 0.0316, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 3.3358633776091082, | |
| "grad_norm": 2.562156915664673, | |
| "learning_rate": 8.544921875e-06, | |
| "loss": 0.0598, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 3.3396584440227706, | |
| "grad_norm": 5.533409595489502, | |
| "learning_rate": 8.496093750000001e-06, | |
| "loss": 0.0432, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 3.3434535104364325, | |
| "grad_norm": 0.47492659091949463, | |
| "learning_rate": 8.447265625e-06, | |
| "loss": 0.0528, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 3.347248576850095, | |
| "grad_norm": 1.0108855962753296, | |
| "learning_rate": 8.3984375e-06, | |
| "loss": 0.0552, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 3.351043643263757, | |
| "grad_norm": 1.780705451965332, | |
| "learning_rate": 8.349609375e-06, | |
| "loss": 0.0252, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 3.3548387096774195, | |
| "grad_norm": 0.3152208924293518, | |
| "learning_rate": 8.30078125e-06, | |
| "loss": 0.0915, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 3.3586337760910814, | |
| "grad_norm": 1.9720813035964966, | |
| "learning_rate": 8.251953125000001e-06, | |
| "loss": 0.0571, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 3.3624288425047437, | |
| "grad_norm": 0.5636972784996033, | |
| "learning_rate": 8.203125000000001e-06, | |
| "loss": 0.0716, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 3.366223908918406, | |
| "grad_norm": 9.523944854736328, | |
| "learning_rate": 8.154296875e-06, | |
| "loss": 0.0649, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 3.3700189753320684, | |
| "grad_norm": 1.868201732635498, | |
| "learning_rate": 8.10546875e-06, | |
| "loss": 0.1055, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 3.3738140417457307, | |
| "grad_norm": 4.064790725708008, | |
| "learning_rate": 8.056640625e-06, | |
| "loss": 0.0681, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 3.3776091081593926, | |
| "grad_norm": 5.854636192321777, | |
| "learning_rate": 8.0078125e-06, | |
| "loss": 0.0755, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 3.381404174573055, | |
| "grad_norm": 0.47955596446990967, | |
| "learning_rate": 7.958984375000001e-06, | |
| "loss": 0.0832, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 3.3851992409867173, | |
| "grad_norm": 0.48627012968063354, | |
| "learning_rate": 7.91015625e-06, | |
| "loss": 0.0487, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 3.3889943074003797, | |
| "grad_norm": 1.4986870288848877, | |
| "learning_rate": 7.861328125e-06, | |
| "loss": 0.0769, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 3.3927893738140416, | |
| "grad_norm": 1.139615774154663, | |
| "learning_rate": 7.8125e-06, | |
| "loss": 0.0238, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 3.396584440227704, | |
| "grad_norm": 0.17134952545166016, | |
| "learning_rate": 7.763671875e-06, | |
| "loss": 0.072, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 3.4003795066413662, | |
| "grad_norm": 0.15060165524482727, | |
| "learning_rate": 7.71484375e-06, | |
| "loss": 0.0607, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 3.4041745730550286, | |
| "grad_norm": 1.0973819494247437, | |
| "learning_rate": 7.666015625000001e-06, | |
| "loss": 0.0914, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 3.407969639468691, | |
| "grad_norm": 4.7881951332092285, | |
| "learning_rate": 7.6171875000000005e-06, | |
| "loss": 0.0515, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 3.411764705882353, | |
| "grad_norm": 2.9025986194610596, | |
| "learning_rate": 7.568359375e-06, | |
| "loss": 0.0576, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 3.415559772296015, | |
| "grad_norm": 0.07781478762626648, | |
| "learning_rate": 7.51953125e-06, | |
| "loss": 0.0318, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 3.4193548387096775, | |
| "grad_norm": 2.8141448497772217, | |
| "learning_rate": 7.4707031250000005e-06, | |
| "loss": 0.0598, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 3.42314990512334, | |
| "grad_norm": 1.2371045351028442, | |
| "learning_rate": 7.421875e-06, | |
| "loss": 0.1014, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 3.4269449715370017, | |
| "grad_norm": 0.11280115693807602, | |
| "learning_rate": 7.373046875e-06, | |
| "loss": 0.0571, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 3.430740037950664, | |
| "grad_norm": 0.07071410119533539, | |
| "learning_rate": 7.3242187500000006e-06, | |
| "loss": 0.0289, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 3.4345351043643264, | |
| "grad_norm": 0.07948953658342361, | |
| "learning_rate": 7.275390625e-06, | |
| "loss": 0.0328, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 3.4383301707779887, | |
| "grad_norm": 6.166849613189697, | |
| "learning_rate": 7.2265625e-06, | |
| "loss": 0.0501, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 3.442125237191651, | |
| "grad_norm": 0.3815774619579315, | |
| "learning_rate": 7.177734375000001e-06, | |
| "loss": 0.0449, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 3.445920303605313, | |
| "grad_norm": 0.21274378895759583, | |
| "learning_rate": 7.12890625e-06, | |
| "loss": 0.0871, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 3.4497153700189753, | |
| "grad_norm": 0.5041061043739319, | |
| "learning_rate": 7.080078125e-06, | |
| "loss": 0.0451, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 3.4535104364326377, | |
| "grad_norm": 2.4566073417663574, | |
| "learning_rate": 7.031250000000001e-06, | |
| "loss": 0.0622, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 3.4573055028462996, | |
| "grad_norm": 5.31998872756958, | |
| "learning_rate": 6.982421875e-06, | |
| "loss": 0.0545, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 3.461100569259962, | |
| "grad_norm": 0.2531034052371979, | |
| "learning_rate": 6.93359375e-06, | |
| "loss": 0.0449, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 3.4648956356736242, | |
| "grad_norm": 0.03640067204833031, | |
| "learning_rate": 6.884765625000001e-06, | |
| "loss": 0.0944, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 3.4686907020872866, | |
| "grad_norm": 0.9717852473258972, | |
| "learning_rate": 6.8359375e-06, | |
| "loss": 0.0165, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 3.472485768500949, | |
| "grad_norm": 1.4924548864364624, | |
| "learning_rate": 6.7871093750000004e-06, | |
| "loss": 0.069, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 3.476280834914611, | |
| "grad_norm": 2.620271682739258, | |
| "learning_rate": 6.738281250000001e-06, | |
| "loss": 0.0967, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 3.480075901328273, | |
| "grad_norm": 2.279548406600952, | |
| "learning_rate": 6.689453125e-06, | |
| "loss": 0.0257, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 3.4838709677419355, | |
| "grad_norm": 0.08608423173427582, | |
| "learning_rate": 6.6406250000000005e-06, | |
| "loss": 0.0359, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 3.487666034155598, | |
| "grad_norm": 5.201995849609375, | |
| "learning_rate": 6.591796875e-06, | |
| "loss": 0.0349, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 3.4914611005692597, | |
| "grad_norm": 0.6848796606063843, | |
| "learning_rate": 6.54296875e-06, | |
| "loss": 0.0473, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 3.495256166982922, | |
| "grad_norm": 1.0673704147338867, | |
| "learning_rate": 6.4941406250000005e-06, | |
| "loss": 0.0751, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 3.4990512333965844, | |
| "grad_norm": 6.374655723571777, | |
| "learning_rate": 6.4453125e-06, | |
| "loss": 0.0672, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 3.5028462998102468, | |
| "grad_norm": 3.0670387744903564, | |
| "learning_rate": 6.396484375e-06, | |
| "loss": 0.1047, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 3.506641366223909, | |
| "grad_norm": 2.0058538913726807, | |
| "learning_rate": 6.3476562500000006e-06, | |
| "loss": 0.0571, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 3.510436432637571, | |
| "grad_norm": 0.8808121681213379, | |
| "learning_rate": 6.298828125e-06, | |
| "loss": 0.0742, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 3.5142314990512333, | |
| "grad_norm": 0.1013035699725151, | |
| "learning_rate": 6.25e-06, | |
| "loss": 0.0506, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 3.5180265654648957, | |
| "grad_norm": 1.1379400491714478, | |
| "learning_rate": 6.201171875000001e-06, | |
| "loss": 0.0466, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 3.521821631878558, | |
| "grad_norm": 0.44777366518974304, | |
| "learning_rate": 6.15234375e-06, | |
| "loss": 0.0425, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 3.52561669829222, | |
| "grad_norm": 0.6099011301994324, | |
| "learning_rate": 6.103515625e-06, | |
| "loss": 0.0368, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 3.5294117647058822, | |
| "grad_norm": 10.134333610534668, | |
| "learning_rate": 6.054687500000001e-06, | |
| "loss": 0.0459, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 3.5332068311195446, | |
| "grad_norm": 10.301962852478027, | |
| "learning_rate": 6.005859375e-06, | |
| "loss": 0.0712, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 3.537001897533207, | |
| "grad_norm": 2.240419864654541, | |
| "learning_rate": 5.95703125e-06, | |
| "loss": 0.0496, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 3.5407969639468693, | |
| "grad_norm": 9.403803825378418, | |
| "learning_rate": 5.908203125000001e-06, | |
| "loss": 0.0551, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 3.544592030360531, | |
| "grad_norm": 0.0765363797545433, | |
| "learning_rate": 5.859375e-06, | |
| "loss": 0.0382, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 3.5483870967741935, | |
| "grad_norm": 0.6216185688972473, | |
| "learning_rate": 5.810546875e-06, | |
| "loss": 0.0723, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 3.552182163187856, | |
| "grad_norm": 6.577167987823486, | |
| "learning_rate": 5.76171875e-06, | |
| "loss": 0.0626, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 3.555977229601518, | |
| "grad_norm": 0.15332098305225372, | |
| "learning_rate": 5.712890625e-06, | |
| "loss": 0.0419, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 3.55977229601518, | |
| "grad_norm": 3.2923789024353027, | |
| "learning_rate": 5.6640625000000005e-06, | |
| "loss": 0.0894, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 3.5635673624288424, | |
| "grad_norm": 1.0206191539764404, | |
| "learning_rate": 5.615234375e-06, | |
| "loss": 0.0477, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 3.5673624288425048, | |
| "grad_norm": 5.454959869384766, | |
| "learning_rate": 5.56640625e-06, | |
| "loss": 0.0315, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 3.571157495256167, | |
| "grad_norm": 0.3191007673740387, | |
| "learning_rate": 5.5175781250000005e-06, | |
| "loss": 0.068, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 3.5749525616698294, | |
| "grad_norm": 12.383304595947266, | |
| "learning_rate": 5.46875e-06, | |
| "loss": 0.0444, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 3.5787476280834913, | |
| "grad_norm": 1.9023758172988892, | |
| "learning_rate": 5.419921875e-06, | |
| "loss": 0.0942, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 3.5825426944971537, | |
| "grad_norm": 0.06706677377223969, | |
| "learning_rate": 5.3710937500000005e-06, | |
| "loss": 0.0512, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 3.586337760910816, | |
| "grad_norm": 0.32390040159225464, | |
| "learning_rate": 5.322265625e-06, | |
| "loss": 0.0603, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 3.590132827324478, | |
| "grad_norm": 1.5318775177001953, | |
| "learning_rate": 5.2734375e-06, | |
| "loss": 0.0491, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 3.5939278937381403, | |
| "grad_norm": 0.5909900665283203, | |
| "learning_rate": 5.2246093750000006e-06, | |
| "loss": 0.0294, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 3.5977229601518026, | |
| "grad_norm": 1.5226948261260986, | |
| "learning_rate": 5.17578125e-06, | |
| "loss": 0.0621, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 3.601518026565465, | |
| "grad_norm": 0.24643893539905548, | |
| "learning_rate": 5.126953125e-06, | |
| "loss": 0.0293, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 3.6053130929791273, | |
| "grad_norm": 7.143110752105713, | |
| "learning_rate": 5.078125000000001e-06, | |
| "loss": 0.0592, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 3.6091081593927896, | |
| "grad_norm": 3.5135350227355957, | |
| "learning_rate": 5.029296875e-06, | |
| "loss": 0.0705, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 3.6129032258064515, | |
| "grad_norm": 4.653140544891357, | |
| "learning_rate": 4.98046875e-06, | |
| "loss": 0.0624, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 3.616698292220114, | |
| "grad_norm": 0.044525645673274994, | |
| "learning_rate": 4.931640625000001e-06, | |
| "loss": 0.0449, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 3.620493358633776, | |
| "grad_norm": 7.338439464569092, | |
| "learning_rate": 4.8828125e-06, | |
| "loss": 0.0536, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 3.624288425047438, | |
| "grad_norm": 0.4086396396160126, | |
| "learning_rate": 4.833984375e-06, | |
| "loss": 0.038, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 3.6280834914611004, | |
| "grad_norm": 0.05038388445973396, | |
| "learning_rate": 4.78515625e-06, | |
| "loss": 0.0458, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 3.6318785578747628, | |
| "grad_norm": 0.09961717575788498, | |
| "learning_rate": 4.736328125e-06, | |
| "loss": 0.0468, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 3.635673624288425, | |
| "grad_norm": 0.27485185861587524, | |
| "learning_rate": 4.6875000000000004e-06, | |
| "loss": 0.0675, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 3.6394686907020875, | |
| "grad_norm": 4.295794486999512, | |
| "learning_rate": 4.638671875e-06, | |
| "loss": 0.0519, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 3.64326375711575, | |
| "grad_norm": 1.9907684326171875, | |
| "learning_rate": 4.58984375e-06, | |
| "loss": 0.0422, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 3.6470588235294117, | |
| "grad_norm": 0.12039614468812943, | |
| "learning_rate": 4.5410156250000005e-06, | |
| "loss": 0.044, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 3.650853889943074, | |
| "grad_norm": 0.4942443072795868, | |
| "learning_rate": 4.4921875e-06, | |
| "loss": 0.0828, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 3.6546489563567364, | |
| "grad_norm": 0.8744149804115295, | |
| "learning_rate": 4.443359375e-06, | |
| "loss": 0.0514, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 3.6584440227703983, | |
| "grad_norm": 1.8012325763702393, | |
| "learning_rate": 4.3945312500000005e-06, | |
| "loss": 0.0389, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 3.6622390891840606, | |
| "grad_norm": 0.09957607835531235, | |
| "learning_rate": 4.345703125e-06, | |
| "loss": 0.0512, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 3.666034155597723, | |
| "grad_norm": 0.0749269425868988, | |
| "learning_rate": 4.296875e-06, | |
| "loss": 0.0278, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 3.6698292220113853, | |
| "grad_norm": 0.04859253391623497, | |
| "learning_rate": 4.2480468750000006e-06, | |
| "loss": 0.0813, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 3.6736242884250476, | |
| "grad_norm": 3.236546277999878, | |
| "learning_rate": 4.19921875e-06, | |
| "loss": 0.0408, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 3.6774193548387095, | |
| "grad_norm": 2.782500743865967, | |
| "learning_rate": 4.150390625e-06, | |
| "loss": 0.0365, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 3.681214421252372, | |
| "grad_norm": 0.2516065835952759, | |
| "learning_rate": 4.101562500000001e-06, | |
| "loss": 0.0541, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 3.685009487666034, | |
| "grad_norm": 0.0802445337176323, | |
| "learning_rate": 4.052734375e-06, | |
| "loss": 0.0296, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 3.6888045540796965, | |
| "grad_norm": 0.7485657930374146, | |
| "learning_rate": 4.00390625e-06, | |
| "loss": 0.0194, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 3.6925996204933584, | |
| "grad_norm": 0.05877687409520149, | |
| "learning_rate": 3.955078125e-06, | |
| "loss": 0.0547, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 3.6963946869070208, | |
| "grad_norm": 3.6818785667419434, | |
| "learning_rate": 3.90625e-06, | |
| "loss": 0.0801, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 3.700189753320683, | |
| "grad_norm": 0.22303463518619537, | |
| "learning_rate": 3.857421875e-06, | |
| "loss": 0.0326, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 3.7039848197343455, | |
| "grad_norm": 0.16665808856487274, | |
| "learning_rate": 3.8085937500000002e-06, | |
| "loss": 0.0664, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 3.707779886148008, | |
| "grad_norm": 0.2113623172044754, | |
| "learning_rate": 3.759765625e-06, | |
| "loss": 0.0495, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 3.7115749525616697, | |
| "grad_norm": 1.9400161504745483, | |
| "learning_rate": 3.7109375e-06, | |
| "loss": 0.062, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 3.715370018975332, | |
| "grad_norm": 2.147211790084839, | |
| "learning_rate": 3.6621093750000003e-06, | |
| "loss": 0.0408, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 3.7191650853889944, | |
| "grad_norm": 0.17818136513233185, | |
| "learning_rate": 3.61328125e-06, | |
| "loss": 0.0376, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 3.7229601518026563, | |
| "grad_norm": 0.2646294832229614, | |
| "learning_rate": 3.564453125e-06, | |
| "loss": 0.0488, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 3.7267552182163186, | |
| "grad_norm": 0.07648167759180069, | |
| "learning_rate": 3.5156250000000003e-06, | |
| "loss": 0.0618, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 3.730550284629981, | |
| "grad_norm": 4.988431930541992, | |
| "learning_rate": 3.466796875e-06, | |
| "loss": 0.0438, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 3.7343453510436433, | |
| "grad_norm": 4.025431156158447, | |
| "learning_rate": 3.41796875e-06, | |
| "loss": 0.0663, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 3.7381404174573056, | |
| "grad_norm": 0.7877894043922424, | |
| "learning_rate": 3.3691406250000004e-06, | |
| "loss": 0.0261, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 3.741935483870968, | |
| "grad_norm": 1.7883660793304443, | |
| "learning_rate": 3.3203125000000002e-06, | |
| "loss": 0.0481, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 3.74573055028463, | |
| "grad_norm": 2.136960029602051, | |
| "learning_rate": 3.271484375e-06, | |
| "loss": 0.052, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 3.749525616698292, | |
| "grad_norm": 0.9067153930664062, | |
| "learning_rate": 3.22265625e-06, | |
| "loss": 0.0567, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 3.7533206831119545, | |
| "grad_norm": 1.2437059879302979, | |
| "learning_rate": 3.1738281250000003e-06, | |
| "loss": 0.053, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 3.7571157495256164, | |
| "grad_norm": 2.1223294734954834, | |
| "learning_rate": 3.125e-06, | |
| "loss": 0.0484, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 3.760910815939279, | |
| "grad_norm": 8.40434455871582, | |
| "learning_rate": 3.076171875e-06, | |
| "loss": 0.0451, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 3.764705882352941, | |
| "grad_norm": 2.565584421157837, | |
| "learning_rate": 3.0273437500000003e-06, | |
| "loss": 0.0589, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 3.7685009487666035, | |
| "grad_norm": 5.559597492218018, | |
| "learning_rate": 2.978515625e-06, | |
| "loss": 0.0396, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 3.772296015180266, | |
| "grad_norm": 0.5843867659568787, | |
| "learning_rate": 2.9296875e-06, | |
| "loss": 0.0682, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 3.776091081593928, | |
| "grad_norm": 1.6344566345214844, | |
| "learning_rate": 2.880859375e-06, | |
| "loss": 0.0892, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 3.77988614800759, | |
| "grad_norm": 5.6130051612854, | |
| "learning_rate": 2.8320312500000002e-06, | |
| "loss": 0.0439, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 3.7836812144212524, | |
| "grad_norm": 3.700528144836426, | |
| "learning_rate": 2.783203125e-06, | |
| "loss": 0.0228, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 3.7874762808349147, | |
| "grad_norm": 2.797687530517578, | |
| "learning_rate": 2.734375e-06, | |
| "loss": 0.0247, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 3.7912713472485766, | |
| "grad_norm": 1.7192658185958862, | |
| "learning_rate": 2.6855468750000003e-06, | |
| "loss": 0.0792, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 3.795066413662239, | |
| "grad_norm": 0.0573776513338089, | |
| "learning_rate": 2.63671875e-06, | |
| "loss": 0.0136, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 3.7988614800759013, | |
| "grad_norm": 0.07321004569530487, | |
| "learning_rate": 2.587890625e-06, | |
| "loss": 0.0461, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 3.8026565464895636, | |
| "grad_norm": 0.045114945620298386, | |
| "learning_rate": 2.5390625000000003e-06, | |
| "loss": 0.0658, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 3.806451612903226, | |
| "grad_norm": 0.3899228870868683, | |
| "learning_rate": 2.490234375e-06, | |
| "loss": 0.0389, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 3.8102466793168883, | |
| "grad_norm": 0.6319021582603455, | |
| "learning_rate": 2.44140625e-06, | |
| "loss": 0.0247, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 3.81404174573055, | |
| "grad_norm": 1.4026541709899902, | |
| "learning_rate": 2.392578125e-06, | |
| "loss": 0.0161, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 3.8178368121442126, | |
| "grad_norm": 4.106344699859619, | |
| "learning_rate": 2.3437500000000002e-06, | |
| "loss": 0.042, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 3.821631878557875, | |
| "grad_norm": 0.5673054456710815, | |
| "learning_rate": 2.294921875e-06, | |
| "loss": 0.0589, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 3.825426944971537, | |
| "grad_norm": 0.057744644582271576, | |
| "learning_rate": 2.24609375e-06, | |
| "loss": 0.0305, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 3.829222011385199, | |
| "grad_norm": 3.3453450202941895, | |
| "learning_rate": 2.1972656250000003e-06, | |
| "loss": 0.0317, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 3.8330170777988615, | |
| "grad_norm": 0.08820886164903641, | |
| "learning_rate": 2.1484375e-06, | |
| "loss": 0.0355, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 3.836812144212524, | |
| "grad_norm": 1.522764801979065, | |
| "learning_rate": 2.099609375e-06, | |
| "loss": 0.0496, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 3.840607210626186, | |
| "grad_norm": 0.9732184410095215, | |
| "learning_rate": 2.0507812500000003e-06, | |
| "loss": 0.0303, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 3.844402277039848, | |
| "grad_norm": 0.1131846010684967, | |
| "learning_rate": 2.001953125e-06, | |
| "loss": 0.0359, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 3.8481973434535104, | |
| "grad_norm": 3.4666688442230225, | |
| "learning_rate": 1.953125e-06, | |
| "loss": 0.0542, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 3.8519924098671727, | |
| "grad_norm": 3.6389381885528564, | |
| "learning_rate": 1.9042968750000001e-06, | |
| "loss": 0.0328, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 3.855787476280835, | |
| "grad_norm": 0.7695565819740295, | |
| "learning_rate": 1.85546875e-06, | |
| "loss": 0.0294, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 3.859582542694497, | |
| "grad_norm": 5.1775593757629395, | |
| "learning_rate": 1.806640625e-06, | |
| "loss": 0.055, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 3.8633776091081593, | |
| "grad_norm": 0.46061795949935913, | |
| "learning_rate": 1.7578125000000002e-06, | |
| "loss": 0.0376, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 3.8671726755218216, | |
| "grad_norm": 0.16866852343082428, | |
| "learning_rate": 1.708984375e-06, | |
| "loss": 0.092, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 3.870967741935484, | |
| "grad_norm": 2.495349168777466, | |
| "learning_rate": 1.6601562500000001e-06, | |
| "loss": 0.0205, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 3.8747628083491463, | |
| "grad_norm": 4.127594470977783, | |
| "learning_rate": 1.611328125e-06, | |
| "loss": 0.0376, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 3.878557874762808, | |
| "grad_norm": 0.0868837833404541, | |
| "learning_rate": 1.5625e-06, | |
| "loss": 0.0715, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 3.8823529411764706, | |
| "grad_norm": 2.7866268157958984, | |
| "learning_rate": 1.5136718750000002e-06, | |
| "loss": 0.0623, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 3.886148007590133, | |
| "grad_norm": 0.5652477741241455, | |
| "learning_rate": 1.46484375e-06, | |
| "loss": 0.0521, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 3.889943074003795, | |
| "grad_norm": 0.13568060100078583, | |
| "learning_rate": 1.4160156250000001e-06, | |
| "loss": 0.0373, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 3.893738140417457, | |
| "grad_norm": 7.213637828826904, | |
| "learning_rate": 1.3671875e-06, | |
| "loss": 0.1189, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 3.8975332068311195, | |
| "grad_norm": 4.795431613922119, | |
| "learning_rate": 1.318359375e-06, | |
| "loss": 0.0368, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 3.901328273244782, | |
| "grad_norm": 4.8751220703125, | |
| "learning_rate": 1.2695312500000002e-06, | |
| "loss": 0.0972, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 3.905123339658444, | |
| "grad_norm": 0.5513148307800293, | |
| "learning_rate": 1.220703125e-06, | |
| "loss": 0.0287, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 3.9089184060721065, | |
| "grad_norm": 0.16232678294181824, | |
| "learning_rate": 1.1718750000000001e-06, | |
| "loss": 0.0651, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 3.9127134724857684, | |
| "grad_norm": 3.053624391555786, | |
| "learning_rate": 1.123046875e-06, | |
| "loss": 0.0358, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 3.9165085388994307, | |
| "grad_norm": 0.1307297945022583, | |
| "learning_rate": 1.07421875e-06, | |
| "loss": 0.0171, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 3.920303605313093, | |
| "grad_norm": 5.61918306350708, | |
| "learning_rate": 1.0253906250000001e-06, | |
| "loss": 0.0383, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 3.924098671726755, | |
| "grad_norm": 4.017998695373535, | |
| "learning_rate": 9.765625e-07, | |
| "loss": 0.0547, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 3.9278937381404173, | |
| "grad_norm": 8.339895248413086, | |
| "learning_rate": 9.27734375e-07, | |
| "loss": 0.059, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 3.9316888045540797, | |
| "grad_norm": 0.5986772179603577, | |
| "learning_rate": 8.789062500000001e-07, | |
| "loss": 0.0773, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 3.935483870967742, | |
| "grad_norm": 0.0516970194876194, | |
| "learning_rate": 8.300781250000001e-07, | |
| "loss": 0.0697, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 3.9392789373814043, | |
| "grad_norm": 1.0691931247711182, | |
| "learning_rate": 7.8125e-07, | |
| "loss": 0.0382, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 3.9430740037950667, | |
| "grad_norm": 1.0503530502319336, | |
| "learning_rate": 7.32421875e-07, | |
| "loss": 0.0781, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 3.9468690702087286, | |
| "grad_norm": 4.003793239593506, | |
| "learning_rate": 6.8359375e-07, | |
| "loss": 0.1007, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 3.950664136622391, | |
| "grad_norm": 0.04315977543592453, | |
| "learning_rate": 6.347656250000001e-07, | |
| "loss": 0.0553, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 3.9544592030360532, | |
| "grad_norm": 4.378900051116943, | |
| "learning_rate": 5.859375000000001e-07, | |
| "loss": 0.0239, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 3.958254269449715, | |
| "grad_norm": 0.17604303359985352, | |
| "learning_rate": 5.37109375e-07, | |
| "loss": 0.0338, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 3.9620493358633775, | |
| "grad_norm": 0.040019456297159195, | |
| "learning_rate": 4.8828125e-07, | |
| "loss": 0.0088, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 3.96584440227704, | |
| "grad_norm": 4.001920700073242, | |
| "learning_rate": 4.3945312500000004e-07, | |
| "loss": 0.0395, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 3.969639468690702, | |
| "grad_norm": 4.805160999298096, | |
| "learning_rate": 3.90625e-07, | |
| "loss": 0.0713, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 3.9734345351043645, | |
| "grad_norm": 0.0865137130022049, | |
| "learning_rate": 3.41796875e-07, | |
| "loss": 0.0394, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 3.9772296015180264, | |
| "grad_norm": 2.695357322692871, | |
| "learning_rate": 2.9296875000000003e-07, | |
| "loss": 0.0582, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 3.9810246679316887, | |
| "grad_norm": 0.9629122018814087, | |
| "learning_rate": 2.44140625e-07, | |
| "loss": 0.0201, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 3.984819734345351, | |
| "grad_norm": 0.8045425415039062, | |
| "learning_rate": 1.953125e-07, | |
| "loss": 0.0653, | |
| "step": 10500 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 10540, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2762272477794816.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |