Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7578947368421053, | |
| "eval_steps": 500, | |
| "global_step": 9000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0008421052631578948, | |
| "grad_norm": 0.2948727011680603, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.6064, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0016842105263157896, | |
| "grad_norm": 0.3294071853160858, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.6688, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0025263157894736842, | |
| "grad_norm": 0.6131762266159058, | |
| "learning_rate": 5.8e-05, | |
| "loss": 0.5587, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.003368421052631579, | |
| "grad_norm": 0.8525914549827576, | |
| "learning_rate": 7.800000000000001e-05, | |
| "loss": 0.566, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.004210526315789474, | |
| "grad_norm": NaN, | |
| "learning_rate": 9.6e-05, | |
| "loss": 1.4335, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0050526315789473685, | |
| "grad_norm": 0.3046252429485321, | |
| "learning_rate": 0.00011399999999999999, | |
| "loss": 0.551, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.005894736842105263, | |
| "grad_norm": 0.4604945182800293, | |
| "learning_rate": 0.000134, | |
| "loss": 0.526, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006736842105263158, | |
| "grad_norm": 0.43640658259391785, | |
| "learning_rate": 0.000154, | |
| "loss": 0.51, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.007578947368421052, | |
| "grad_norm": 0.6126458644866943, | |
| "learning_rate": 0.000174, | |
| "loss": 0.4569, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.008421052631578947, | |
| "grad_norm": 3.4451630115509033, | |
| "learning_rate": 0.000194, | |
| "loss": 1.1195, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.009263157894736843, | |
| "grad_norm": 0.3751116096973419, | |
| "learning_rate": 0.0001998811040339703, | |
| "loss": 0.5587, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.010105263157894737, | |
| "grad_norm": 0.5739931464195251, | |
| "learning_rate": 0.00019971125265392782, | |
| "loss": 0.4409, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.010947368421052631, | |
| "grad_norm": 0.43667858839035034, | |
| "learning_rate": 0.00019954140127388537, | |
| "loss": 0.4715, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011789473684210527, | |
| "grad_norm": 0.8669484257698059, | |
| "learning_rate": 0.0001993715498938429, | |
| "loss": 0.5422, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.01263157894736842, | |
| "grad_norm": 1.5785276889801025, | |
| "learning_rate": 0.00019920169851380044, | |
| "loss": 1.0571, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.013473684210526317, | |
| "grad_norm": 0.4289887249469757, | |
| "learning_rate": 0.00019903184713375798, | |
| "loss": 0.6397, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.01431578947368421, | |
| "grad_norm": 0.3432268500328064, | |
| "learning_rate": 0.0001988619957537155, | |
| "loss": 0.4, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.015157894736842105, | |
| "grad_norm": 0.4807721972465515, | |
| "learning_rate": 0.00019869214437367305, | |
| "loss": 0.4532, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.016, | |
| "grad_norm": 0.7666722536087036, | |
| "learning_rate": 0.0001985222929936306, | |
| "loss": 0.58, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.016842105263157894, | |
| "grad_norm": 41.56742477416992, | |
| "learning_rate": 0.00019840339702760086, | |
| "loss": 2.3371, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.01768421052631579, | |
| "grad_norm": 0.27794915437698364, | |
| "learning_rate": 0.0001982335456475584, | |
| "loss": 0.631, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.018526315789473686, | |
| "grad_norm": 0.45571169257164, | |
| "learning_rate": 0.00019806369426751593, | |
| "loss": 0.4544, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.01936842105263158, | |
| "grad_norm": 0.9907029867172241, | |
| "learning_rate": 0.00019789384288747348, | |
| "loss": 0.4042, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.020210526315789474, | |
| "grad_norm": 0.7444189786911011, | |
| "learning_rate": 0.00019772399150743103, | |
| "loss": 0.469, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.021052631578947368, | |
| "grad_norm": 2.981804132461548, | |
| "learning_rate": 0.00019755414012738855, | |
| "loss": 1.1646, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.021894736842105262, | |
| "grad_norm": 0.2933043837547302, | |
| "learning_rate": 0.0001973842887473461, | |
| "loss": 0.5745, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02273684210526316, | |
| "grad_norm": 0.329301118850708, | |
| "learning_rate": 0.00019721443736730364, | |
| "loss": 0.3978, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.023578947368421053, | |
| "grad_norm": 0.42804253101348877, | |
| "learning_rate": 0.00019704458598726116, | |
| "loss": 0.4352, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.024421052631578948, | |
| "grad_norm": 0.4651006758213043, | |
| "learning_rate": 0.0001968747346072187, | |
| "loss": 0.3708, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.02526315789473684, | |
| "grad_norm": 1.5117040872573853, | |
| "learning_rate": 0.00019670488322717623, | |
| "loss": 0.7743, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.026105263157894736, | |
| "grad_norm": 0.37924298644065857, | |
| "learning_rate": 0.00019653503184713378, | |
| "loss": 0.5297, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.026947368421052633, | |
| "grad_norm": 0.40597257018089294, | |
| "learning_rate": 0.00019636518046709132, | |
| "loss": 0.4548, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.027789473684210527, | |
| "grad_norm": 0.5641767382621765, | |
| "learning_rate": 0.00019619532908704885, | |
| "loss": 0.5121, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.02863157894736842, | |
| "grad_norm": 0.48890987038612366, | |
| "learning_rate": 0.0001960254777070064, | |
| "loss": 0.4627, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.029473684210526315, | |
| "grad_norm": 2.158003330230713, | |
| "learning_rate": 0.0001958556263269639, | |
| "loss": 1.0156, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.03031578947368421, | |
| "grad_norm": 0.31984683871269226, | |
| "learning_rate": 0.00019568577494692146, | |
| "loss": 0.6703, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.031157894736842107, | |
| "grad_norm": 0.3384983539581299, | |
| "learning_rate": 0.000195515923566879, | |
| "loss": 0.437, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.032, | |
| "grad_norm": 0.5063002109527588, | |
| "learning_rate": 0.00019534607218683653, | |
| "loss": 0.4519, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.032842105263157895, | |
| "grad_norm": 1.0176048278808594, | |
| "learning_rate": 0.00019517622080679408, | |
| "loss": 0.5687, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.03368421052631579, | |
| "grad_norm": 2.2058560848236084, | |
| "learning_rate": 0.00019500636942675162, | |
| "loss": 1.0849, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.03452631578947368, | |
| "grad_norm": 0.2913868725299835, | |
| "learning_rate": 0.00019483651804670914, | |
| "loss": 0.5526, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.03536842105263158, | |
| "grad_norm": 0.32760244607925415, | |
| "learning_rate": 0.0001946666666666667, | |
| "loss": 0.4839, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.03621052631578947, | |
| "grad_norm": 0.536840558052063, | |
| "learning_rate": 0.0001944968152866242, | |
| "loss": 0.5193, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03705263157894737, | |
| "grad_norm": 0.6680445075035095, | |
| "learning_rate": 0.00019432696390658176, | |
| "loss": 0.4765, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.037894736842105266, | |
| "grad_norm": 2.1454713344573975, | |
| "learning_rate": 0.0001941571125265393, | |
| "loss": 1.0602, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.03873684210526316, | |
| "grad_norm": 0.35563862323760986, | |
| "learning_rate": 0.00019398726114649683, | |
| "loss": 0.5151, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.039578947368421054, | |
| "grad_norm": 0.35780319571495056, | |
| "learning_rate": 0.00019381740976645437, | |
| "loss": 0.35, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.04042105263157895, | |
| "grad_norm": 0.7377018332481384, | |
| "learning_rate": 0.00019364755838641192, | |
| "loss": 0.4746, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.04126315789473684, | |
| "grad_norm": 0.6868434548377991, | |
| "learning_rate": 0.00019347770700636944, | |
| "loss": 0.4324, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.042105263157894736, | |
| "grad_norm": 1.9966992139816284, | |
| "learning_rate": 0.000193307855626327, | |
| "loss": 0.9766, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04294736842105263, | |
| "grad_norm": 0.2713783383369446, | |
| "learning_rate": 0.0001931380042462845, | |
| "loss": 0.7473, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.043789473684210524, | |
| "grad_norm": 0.36451488733291626, | |
| "learning_rate": 0.00019296815286624206, | |
| "loss": 0.5569, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.04463157894736842, | |
| "grad_norm": 0.31844836473464966, | |
| "learning_rate": 0.0001927983014861996, | |
| "loss": 0.5004, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.04547368421052632, | |
| "grad_norm": 0.7280228137969971, | |
| "learning_rate": 0.00019262845010615713, | |
| "loss": 0.3666, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.04631578947368421, | |
| "grad_norm": 2.725432872772217, | |
| "learning_rate": 0.00019245859872611467, | |
| "loss": 0.9544, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04715789473684211, | |
| "grad_norm": 0.2997317612171173, | |
| "learning_rate": 0.0001922887473460722, | |
| "loss": 0.4112, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.048, | |
| "grad_norm": 0.3395216763019562, | |
| "learning_rate": 0.00019211889596602974, | |
| "loss": 0.5209, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.048842105263157895, | |
| "grad_norm": 0.3606681227684021, | |
| "learning_rate": 0.0001919490445859873, | |
| "loss": 0.549, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.04968421052631579, | |
| "grad_norm": 0.6045392155647278, | |
| "learning_rate": 0.0001917791932059448, | |
| "loss": 0.4447, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.05052631578947368, | |
| "grad_norm": 1.960634469985962, | |
| "learning_rate": 0.00019160934182590236, | |
| "loss": 0.87, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.05136842105263158, | |
| "grad_norm": 0.3449041247367859, | |
| "learning_rate": 0.0001914394904458599, | |
| "loss": 0.5336, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.05221052631578947, | |
| "grad_norm": 0.47411444783210754, | |
| "learning_rate": 0.00019126963906581742, | |
| "loss": 0.4872, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.053052631578947365, | |
| "grad_norm": 0.4293436110019684, | |
| "learning_rate": 0.00019109978768577497, | |
| "loss": 0.4825, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.053894736842105266, | |
| "grad_norm": 0.8081805109977722, | |
| "learning_rate": 0.0001909299363057325, | |
| "loss": 0.5144, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.05473684210526316, | |
| "grad_norm": 1.4926713705062866, | |
| "learning_rate": 0.00019076008492569004, | |
| "loss": 0.8288, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.055578947368421054, | |
| "grad_norm": 0.35779792070388794, | |
| "learning_rate": 0.0001905902335456476, | |
| "loss": 0.5208, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.05642105263157895, | |
| "grad_norm": 0.35513201355934143, | |
| "learning_rate": 0.0001904203821656051, | |
| "loss": 0.5325, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.05726315789473684, | |
| "grad_norm": 0.5638071298599243, | |
| "learning_rate": 0.00019025053078556265, | |
| "loss": 0.439, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.058105263157894736, | |
| "grad_norm": 0.5975926518440247, | |
| "learning_rate": 0.0001900806794055202, | |
| "loss": 0.503, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.05894736842105263, | |
| "grad_norm": 1.4200608730316162, | |
| "learning_rate": 0.0001899108280254777, | |
| "loss": 0.8735, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.059789473684210524, | |
| "grad_norm": 0.27127236127853394, | |
| "learning_rate": 0.00018974097664543524, | |
| "loss": 0.548, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.06063157894736842, | |
| "grad_norm": 0.4204287528991699, | |
| "learning_rate": 0.0001895711252653928, | |
| "loss": 0.5147, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.06147368421052631, | |
| "grad_norm": 0.36488077044487, | |
| "learning_rate": 0.0001894012738853503, | |
| "loss": 0.4798, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.06231578947368421, | |
| "grad_norm": 0.5879007577896118, | |
| "learning_rate": 0.00018923142250530786, | |
| "loss": 0.4313, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.06315789473684211, | |
| "grad_norm": 2.1575605869293213, | |
| "learning_rate": 0.00018906157112526538, | |
| "loss": 0.9985, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.064, | |
| "grad_norm": 0.3367585837841034, | |
| "learning_rate": 0.00018889171974522293, | |
| "loss": 0.6418, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.0648421052631579, | |
| "grad_norm": 0.4385579824447632, | |
| "learning_rate": 0.00018872186836518047, | |
| "loss": 0.5294, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.06568421052631579, | |
| "grad_norm": 0.8517505526542664, | |
| "learning_rate": 0.000188552016985138, | |
| "loss": 0.4541, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.06652631578947368, | |
| "grad_norm": 0.6036492586135864, | |
| "learning_rate": 0.00018838216560509554, | |
| "loss": 0.5745, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.06736842105263158, | |
| "grad_norm": 1.9340647459030151, | |
| "learning_rate": 0.0001882123142250531, | |
| "loss": 0.8355, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.06821052631578947, | |
| "grad_norm": 0.36341995000839233, | |
| "learning_rate": 0.0001880424628450106, | |
| "loss": 0.564, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.06905263157894737, | |
| "grad_norm": 0.7440673112869263, | |
| "learning_rate": 0.00018787261146496816, | |
| "loss": 0.5561, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.06989473684210526, | |
| "grad_norm": 0.42742231488227844, | |
| "learning_rate": 0.00018770276008492568, | |
| "loss": 0.4338, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.07073684210526315, | |
| "grad_norm": 0.8215022087097168, | |
| "learning_rate": 0.00018753290870488322, | |
| "loss": 0.5426, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.07157894736842105, | |
| "grad_norm": 1.9778239727020264, | |
| "learning_rate": 0.00018736305732484077, | |
| "loss": 0.9847, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.07242105263157894, | |
| "grad_norm": 0.3316439986228943, | |
| "learning_rate": 0.0001871932059447983, | |
| "loss": 0.5208, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.07326315789473684, | |
| "grad_norm": 0.44610121846199036, | |
| "learning_rate": 0.00018702335456475584, | |
| "loss": 0.5181, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.07410526315789474, | |
| "grad_norm": 0.4572339653968811, | |
| "learning_rate": 0.0001868535031847134, | |
| "loss": 0.472, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.07494736842105264, | |
| "grad_norm": 0.6796393394470215, | |
| "learning_rate": 0.0001866836518046709, | |
| "loss": 0.5231, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.07578947368421053, | |
| "grad_norm": 2.218637704849243, | |
| "learning_rate": 0.00018651380042462845, | |
| "loss": 1.0274, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.07663157894736843, | |
| "grad_norm": 0.33950433135032654, | |
| "learning_rate": 0.00018634394904458598, | |
| "loss": 0.5662, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.07747368421052632, | |
| "grad_norm": 0.3826090693473816, | |
| "learning_rate": 0.00018617409766454352, | |
| "loss": 0.6224, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.07831578947368421, | |
| "grad_norm": 0.6022137403488159, | |
| "learning_rate": 0.00018600424628450107, | |
| "loss": 0.4505, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.07915789473684211, | |
| "grad_norm": 0.7508429884910583, | |
| "learning_rate": 0.0001858343949044586, | |
| "loss": 0.6001, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 2.407134771347046, | |
| "learning_rate": 0.00018566454352441614, | |
| "loss": 1.1017, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.0808421052631579, | |
| "grad_norm": 0.3121108114719391, | |
| "learning_rate": 0.00018549469214437366, | |
| "loss": 0.5718, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.08168421052631579, | |
| "grad_norm": 0.5360256433486938, | |
| "learning_rate": 0.0001853248407643312, | |
| "loss": 0.4086, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.08252631578947368, | |
| "grad_norm": 0.8037394881248474, | |
| "learning_rate": 0.00018515498938428875, | |
| "loss": 0.4914, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.08336842105263158, | |
| "grad_norm": 0.6030890345573425, | |
| "learning_rate": 0.00018498513800424627, | |
| "loss": 0.5412, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.08421052631578947, | |
| "grad_norm": 1.7535202503204346, | |
| "learning_rate": 0.00018481528662420382, | |
| "loss": 1.0836, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08505263157894737, | |
| "grad_norm": 0.34740158915519714, | |
| "learning_rate": 0.00018464543524416137, | |
| "loss": 0.6538, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.08589473684210526, | |
| "grad_norm": 0.4071979224681854, | |
| "learning_rate": 0.0001844755838641189, | |
| "loss": 0.5483, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.08673684210526315, | |
| "grad_norm": 0.38291046023368835, | |
| "learning_rate": 0.00018430573248407644, | |
| "loss": 0.4409, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.08757894736842105, | |
| "grad_norm": 0.5153874754905701, | |
| "learning_rate": 0.00018413588110403396, | |
| "loss": 0.3837, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.08842105263157894, | |
| "grad_norm": 2.0868875980377197, | |
| "learning_rate": 0.0001839660297239915, | |
| "loss": 0.8349, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.08926315789473684, | |
| "grad_norm": 0.39394018054008484, | |
| "learning_rate": 0.00018379617834394905, | |
| "loss": 0.501, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.09010526315789473, | |
| "grad_norm": 0.44843438267707825, | |
| "learning_rate": 0.00018362632696390657, | |
| "loss": 0.4275, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.09094736842105264, | |
| "grad_norm": 0.40821248292922974, | |
| "learning_rate": 0.00018345647558386412, | |
| "loss": 0.4714, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.09178947368421053, | |
| "grad_norm": 0.8207138776779175, | |
| "learning_rate": 0.00018328662420382167, | |
| "loss": 0.486, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.09263157894736843, | |
| "grad_norm": 2.6762123107910156, | |
| "learning_rate": 0.0001831167728237792, | |
| "loss": 0.8407, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.09347368421052632, | |
| "grad_norm": 0.36313536763191223, | |
| "learning_rate": 0.00018294692144373673, | |
| "loss": 0.4636, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.09431578947368421, | |
| "grad_norm": 0.4025310277938843, | |
| "learning_rate": 0.00018277707006369426, | |
| "loss": 0.4223, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.09515789473684211, | |
| "grad_norm": 0.4154893755912781, | |
| "learning_rate": 0.0001826072186836518, | |
| "loss": 0.4727, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.096, | |
| "grad_norm": 0.7508569359779358, | |
| "learning_rate": 0.00018243736730360935, | |
| "loss": 0.5067, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.0968421052631579, | |
| "grad_norm": 1.8863797187805176, | |
| "learning_rate": 0.00018226751592356687, | |
| "loss": 1.0783, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.09768421052631579, | |
| "grad_norm": 0.3644469678401947, | |
| "learning_rate": 0.00018209766454352442, | |
| "loss": 0.5204, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.09852631578947368, | |
| "grad_norm": 0.3684028089046478, | |
| "learning_rate": 0.00018192781316348197, | |
| "loss": 0.5487, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.09936842105263158, | |
| "grad_norm": 0.5149257779121399, | |
| "learning_rate": 0.00018175796178343949, | |
| "loss": 0.3638, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.10021052631578947, | |
| "grad_norm": 0.5993695855140686, | |
| "learning_rate": 0.00018158811040339703, | |
| "loss": 0.3902, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.10105263157894737, | |
| "grad_norm": 1.52362060546875, | |
| "learning_rate": 0.00018141825902335455, | |
| "loss": 0.8221, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.10189473684210526, | |
| "grad_norm": 0.3562985956668854, | |
| "learning_rate": 0.0001812484076433121, | |
| "loss": 0.6236, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.10273684210526315, | |
| "grad_norm": 0.5047670006752014, | |
| "learning_rate": 0.00018107855626326965, | |
| "loss": 0.4056, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.10357894736842105, | |
| "grad_norm": 0.4353023171424866, | |
| "learning_rate": 0.00018090870488322717, | |
| "loss": 0.4499, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.10442105263157894, | |
| "grad_norm": 0.5599729418754578, | |
| "learning_rate": 0.00018073885350318472, | |
| "loss": 0.5245, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 0.9040461182594299, | |
| "learning_rate": 0.00018056900212314224, | |
| "loss": 0.8732, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.10610526315789473, | |
| "grad_norm": 0.3409920632839203, | |
| "learning_rate": 0.00018039915074309978, | |
| "loss": 0.7919, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.10694736842105264, | |
| "grad_norm": 0.47438284754753113, | |
| "learning_rate": 0.00018022929936305733, | |
| "loss": 0.4574, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.10778947368421053, | |
| "grad_norm": 0.4298554062843323, | |
| "learning_rate": 0.00018005944798301485, | |
| "loss": 0.4319, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.10863157894736843, | |
| "grad_norm": 0.5575684905052185, | |
| "learning_rate": 0.0001798895966029724, | |
| "loss": 0.375, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.10947368421052632, | |
| "grad_norm": 1.7726786136627197, | |
| "learning_rate": 0.00017971974522292995, | |
| "loss": 0.9513, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.11031578947368421, | |
| "grad_norm": 0.29987606406211853, | |
| "learning_rate": 0.00017954989384288747, | |
| "loss": 0.7624, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.11115789473684211, | |
| "grad_norm": 0.36507460474967957, | |
| "learning_rate": 0.00017938004246284501, | |
| "loss": 0.4088, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.112, | |
| "grad_norm": 0.4720146954059601, | |
| "learning_rate": 0.00017921019108280254, | |
| "loss": 0.4442, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.1128421052631579, | |
| "grad_norm": 0.6479263305664062, | |
| "learning_rate": 0.00017904033970276008, | |
| "loss": 0.4397, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.11368421052631579, | |
| "grad_norm": 1.8554607629776, | |
| "learning_rate": 0.00017887048832271763, | |
| "loss": 0.9708, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.11452631578947368, | |
| "grad_norm": 0.3114994764328003, | |
| "learning_rate": 0.00017870063694267515, | |
| "loss": 0.5778, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.11536842105263158, | |
| "grad_norm": 0.4002878665924072, | |
| "learning_rate": 0.0001785307855626327, | |
| "loss": 0.4949, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.11621052631578947, | |
| "grad_norm": 0.4125431776046753, | |
| "learning_rate": 0.00017836093418259025, | |
| "loss": 0.5122, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.11705263157894737, | |
| "grad_norm": 0.8050837516784668, | |
| "learning_rate": 0.00017819108280254777, | |
| "loss": 0.4228, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.11789473684210526, | |
| "grad_norm": 1.4780641794204712, | |
| "learning_rate": 0.0001780212314225053, | |
| "loss": 0.9094, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.11873684210526315, | |
| "grad_norm": 0.3558010458946228, | |
| "learning_rate": 0.00017785138004246283, | |
| "loss": 0.487, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.11957894736842105, | |
| "grad_norm": 0.3622126579284668, | |
| "learning_rate": 0.00017768152866242038, | |
| "loss": 0.5116, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.12042105263157894, | |
| "grad_norm": 0.4631308913230896, | |
| "learning_rate": 0.00017751167728237793, | |
| "loss": 0.3919, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.12126315789473684, | |
| "grad_norm": 0.8485559225082397, | |
| "learning_rate": 0.00017734182590233545, | |
| "loss": 0.5028, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.12210526315789473, | |
| "grad_norm": 1.857799768447876, | |
| "learning_rate": 0.000177171974522293, | |
| "loss": 0.944, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.12294736842105262, | |
| "grad_norm": 0.29168710112571716, | |
| "learning_rate": 0.00017700212314225054, | |
| "loss": 0.5765, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.12378947368421053, | |
| "grad_norm": 0.36629101634025574, | |
| "learning_rate": 0.00017683227176220806, | |
| "loss": 0.3604, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.12463157894736843, | |
| "grad_norm": 0.4458591341972351, | |
| "learning_rate": 0.0001766624203821656, | |
| "loss": 0.4628, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.12547368421052632, | |
| "grad_norm": 0.9340721964836121, | |
| "learning_rate": 0.00017649256900212313, | |
| "loss": 0.4289, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.12631578947368421, | |
| "grad_norm": 1.931969404220581, | |
| "learning_rate": 0.00017632271762208068, | |
| "loss": 0.8819, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1271578947368421, | |
| "grad_norm": 0.34700122475624084, | |
| "learning_rate": 0.00017615286624203823, | |
| "loss": 0.481, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.128, | |
| "grad_norm": 0.4540385603904724, | |
| "learning_rate": 0.00017598301486199575, | |
| "loss": 0.4309, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.1288421052631579, | |
| "grad_norm": 0.4399193823337555, | |
| "learning_rate": 0.0001758131634819533, | |
| "loss": 0.4394, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.1296842105263158, | |
| "grad_norm": 0.9772260785102844, | |
| "learning_rate": 0.00017564331210191082, | |
| "loss": 0.5023, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.13052631578947368, | |
| "grad_norm": 1.433576226234436, | |
| "learning_rate": 0.00017547346072186836, | |
| "loss": 1.0256, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.13136842105263158, | |
| "grad_norm": 0.3410467505455017, | |
| "learning_rate": 0.0001753036093418259, | |
| "loss": 0.6604, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.13221052631578947, | |
| "grad_norm": 0.392319917678833, | |
| "learning_rate": 0.00017513375796178343, | |
| "loss": 0.4625, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.13305263157894737, | |
| "grad_norm": 0.5776181221008301, | |
| "learning_rate": 0.00017496390658174098, | |
| "loss": 0.5512, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.13389473684210526, | |
| "grad_norm": 0.5809136629104614, | |
| "learning_rate": 0.00017479405520169853, | |
| "loss": 0.418, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.13473684210526315, | |
| "grad_norm": 2.6065728664398193, | |
| "learning_rate": 0.00017462420382165605, | |
| "loss": 1.0438, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.13557894736842105, | |
| "grad_norm": 0.3214600086212158, | |
| "learning_rate": 0.0001744543524416136, | |
| "loss": 0.5217, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.13642105263157894, | |
| "grad_norm": 0.684394359588623, | |
| "learning_rate": 0.00017428450106157111, | |
| "loss": 0.4193, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.13726315789473684, | |
| "grad_norm": 0.5176171064376831, | |
| "learning_rate": 0.00017411464968152866, | |
| "loss": 0.3715, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.13810526315789473, | |
| "grad_norm": 1.1062986850738525, | |
| "learning_rate": 0.0001739447983014862, | |
| "loss": 0.5975, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.13894736842105262, | |
| "grad_norm": 2.4857382774353027, | |
| "learning_rate": 0.00017377494692144373, | |
| "loss": 0.9618, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.13978947368421052, | |
| "grad_norm": 0.3263668715953827, | |
| "learning_rate": 0.00017360509554140128, | |
| "loss": 0.6863, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.1406315789473684, | |
| "grad_norm": 0.35392507910728455, | |
| "learning_rate": 0.00017343524416135882, | |
| "loss": 0.4826, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.1414736842105263, | |
| "grad_norm": 0.3837219178676605, | |
| "learning_rate": 0.00017326539278131634, | |
| "loss": 0.3812, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.1423157894736842, | |
| "grad_norm": 0.8743443489074707, | |
| "learning_rate": 0.0001730955414012739, | |
| "loss": 0.4712, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.1431578947368421, | |
| "grad_norm": 1.9244558811187744, | |
| "learning_rate": 0.0001729256900212314, | |
| "loss": 0.8909, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.144, | |
| "grad_norm": 0.3320176899433136, | |
| "learning_rate": 0.00017275583864118896, | |
| "loss": 0.6078, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.14484210526315788, | |
| "grad_norm": 0.5013318061828613, | |
| "learning_rate": 0.0001725859872611465, | |
| "loss": 0.4295, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.14568421052631578, | |
| "grad_norm": 0.5023675560951233, | |
| "learning_rate": 0.00017241613588110403, | |
| "loss": 0.4376, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.14652631578947367, | |
| "grad_norm": 0.7797672152519226, | |
| "learning_rate": 0.00017224628450106157, | |
| "loss": 0.4582, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.14736842105263157, | |
| "grad_norm": 1.6873104572296143, | |
| "learning_rate": 0.00017207643312101912, | |
| "loss": 0.7561, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.1482105263157895, | |
| "grad_norm": 0.4051543176174164, | |
| "learning_rate": 0.00017190658174097664, | |
| "loss": 0.608, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.14905263157894738, | |
| "grad_norm": 0.3614313006401062, | |
| "learning_rate": 0.0001717367303609342, | |
| "loss": 0.4534, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.14989473684210528, | |
| "grad_norm": 0.5554386973381042, | |
| "learning_rate": 0.0001715668789808917, | |
| "loss": 0.4134, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.15073684210526317, | |
| "grad_norm": 0.6349577903747559, | |
| "learning_rate": 0.00017139702760084926, | |
| "loss": 0.6117, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.15157894736842106, | |
| "grad_norm": 2.212400197982788, | |
| "learning_rate": 0.0001712271762208068, | |
| "loss": 0.87, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.15242105263157896, | |
| "grad_norm": 0.32632893323898315, | |
| "learning_rate": 0.00017105732484076433, | |
| "loss": 0.6271, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.15326315789473685, | |
| "grad_norm": 0.47393667697906494, | |
| "learning_rate": 0.00017088747346072187, | |
| "loss": 0.4916, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.15410526315789475, | |
| "grad_norm": 0.5286993384361267, | |
| "learning_rate": 0.0001707176220806794, | |
| "loss": 0.4655, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.15494736842105264, | |
| "grad_norm": 0.586979329586029, | |
| "learning_rate": 0.00017054777070063694, | |
| "loss": 0.4179, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.15578947368421053, | |
| "grad_norm": 2.648200273513794, | |
| "learning_rate": 0.0001703779193205945, | |
| "loss": 0.9152, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.15663157894736843, | |
| "grad_norm": 0.34112730622291565, | |
| "learning_rate": 0.000170208067940552, | |
| "loss": 0.5162, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.15747368421052632, | |
| "grad_norm": 0.4711189568042755, | |
| "learning_rate": 0.00017003821656050956, | |
| "loss": 0.4571, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.15831578947368422, | |
| "grad_norm": 0.47188910841941833, | |
| "learning_rate": 0.0001698683651804671, | |
| "loss": 0.3708, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.1591578947368421, | |
| "grad_norm": 0.749568521976471, | |
| "learning_rate": 0.00016969851380042462, | |
| "loss": 0.5347, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.7001060247421265, | |
| "learning_rate": 0.00016952866242038217, | |
| "loss": 0.865, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.1608421052631579, | |
| "grad_norm": 0.3287590444087982, | |
| "learning_rate": 0.0001693588110403397, | |
| "loss": 0.6793, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.1616842105263158, | |
| "grad_norm": 0.32221516966819763, | |
| "learning_rate": 0.00016918895966029724, | |
| "loss": 0.5273, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.16252631578947369, | |
| "grad_norm": 0.5661808252334595, | |
| "learning_rate": 0.0001690191082802548, | |
| "loss": 0.5382, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.16336842105263158, | |
| "grad_norm": 0.956669270992279, | |
| "learning_rate": 0.0001688492569002123, | |
| "loss": 0.5235, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.16421052631578947, | |
| "grad_norm": 2.0267767906188965, | |
| "learning_rate": 0.00016867940552016986, | |
| "loss": 0.9644, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.16505263157894737, | |
| "grad_norm": 0.3559626042842865, | |
| "learning_rate": 0.0001685095541401274, | |
| "loss": 0.6881, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.16589473684210526, | |
| "grad_norm": 0.38796085119247437, | |
| "learning_rate": 0.00016833970276008492, | |
| "loss": 0.5117, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.16673684210526316, | |
| "grad_norm": 0.5446242094039917, | |
| "learning_rate": 0.00016816985138004247, | |
| "loss": 0.4413, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.16757894736842105, | |
| "grad_norm": 0.8065064549446106, | |
| "learning_rate": 0.000168, | |
| "loss": 0.4918, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.16842105263157894, | |
| "grad_norm": 2.228316307067871, | |
| "learning_rate": 0.00016783014861995754, | |
| "loss": 0.8926, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16926315789473684, | |
| "grad_norm": 0.32953980565071106, | |
| "learning_rate": 0.00016766029723991509, | |
| "loss": 0.6337, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.17010526315789473, | |
| "grad_norm": 0.3540143370628357, | |
| "learning_rate": 0.0001674904458598726, | |
| "loss": 0.3952, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.17094736842105263, | |
| "grad_norm": 0.5358151197433472, | |
| "learning_rate": 0.00016732059447983015, | |
| "loss": 0.5481, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.17178947368421052, | |
| "grad_norm": 0.6727182269096375, | |
| "learning_rate": 0.0001671507430997877, | |
| "loss": 0.4033, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.1726315789473684, | |
| "grad_norm": 1.5078625679016113, | |
| "learning_rate": 0.00016698089171974522, | |
| "loss": 0.8821, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.1734736842105263, | |
| "grad_norm": 0.38373953104019165, | |
| "learning_rate": 0.00016681104033970277, | |
| "loss": 0.4362, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.1743157894736842, | |
| "grad_norm": 0.5610182881355286, | |
| "learning_rate": 0.0001666411889596603, | |
| "loss": 0.4401, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.1751578947368421, | |
| "grad_norm": 0.3951750695705414, | |
| "learning_rate": 0.00016647133757961784, | |
| "loss": 0.3958, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.176, | |
| "grad_norm": 0.7693811058998108, | |
| "learning_rate": 0.00016630148619957538, | |
| "loss": 0.5733, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.17684210526315788, | |
| "grad_norm": 1.3583319187164307, | |
| "learning_rate": 0.0001661316348195329, | |
| "loss": 0.8683, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.17768421052631578, | |
| "grad_norm": 0.38344472646713257, | |
| "learning_rate": 0.00016596178343949045, | |
| "loss": 0.5884, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.17852631578947367, | |
| "grad_norm": 0.5510813593864441, | |
| "learning_rate": 0.00016579193205944797, | |
| "loss": 0.4586, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.17936842105263157, | |
| "grad_norm": 0.4634307026863098, | |
| "learning_rate": 0.00016562208067940552, | |
| "loss": 0.4628, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.18021052631578946, | |
| "grad_norm": 0.6211772561073303, | |
| "learning_rate": 0.00016545222929936307, | |
| "loss": 0.3752, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.18105263157894738, | |
| "grad_norm": 1.8192520141601562, | |
| "learning_rate": 0.0001652823779193206, | |
| "loss": 0.7837, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.18189473684210528, | |
| "grad_norm": 0.35456815361976624, | |
| "learning_rate": 0.00016511252653927814, | |
| "loss": 0.4848, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.18273684210526317, | |
| "grad_norm": 0.4259573817253113, | |
| "learning_rate": 0.00016494267515923568, | |
| "loss": 0.4495, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.18357894736842106, | |
| "grad_norm": 0.6338594555854797, | |
| "learning_rate": 0.0001647728237791932, | |
| "loss": 0.4754, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.18442105263157896, | |
| "grad_norm": 0.5970250368118286, | |
| "learning_rate": 0.00016460297239915075, | |
| "loss": 0.4639, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.18526315789473685, | |
| "grad_norm": 1.6016937494277954, | |
| "learning_rate": 0.00016443312101910827, | |
| "loss": 0.7107, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.18610526315789475, | |
| "grad_norm": 0.3862210512161255, | |
| "learning_rate": 0.00016426326963906582, | |
| "loss": 0.5027, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.18694736842105264, | |
| "grad_norm": 0.4793887734413147, | |
| "learning_rate": 0.00016409341825902337, | |
| "loss": 0.4371, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.18778947368421053, | |
| "grad_norm": 0.37670132517814636, | |
| "learning_rate": 0.00016392356687898089, | |
| "loss": 0.5004, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.18863157894736843, | |
| "grad_norm": 0.559830367565155, | |
| "learning_rate": 0.00016375371549893843, | |
| "loss": 0.4086, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.18947368421052632, | |
| "grad_norm": 2.0401771068573, | |
| "learning_rate": 0.00016358386411889598, | |
| "loss": 0.786, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.19031578947368422, | |
| "grad_norm": 0.35014039278030396, | |
| "learning_rate": 0.0001634140127388535, | |
| "loss": 0.4927, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.1911578947368421, | |
| "grad_norm": 0.40696877241134644, | |
| "learning_rate": 0.00016324416135881105, | |
| "loss": 0.4181, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.192, | |
| "grad_norm": 0.46805477142333984, | |
| "learning_rate": 0.00016307430997876857, | |
| "loss": 0.452, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.1928421052631579, | |
| "grad_norm": 0.88698410987854, | |
| "learning_rate": 0.00016290445859872612, | |
| "loss": 0.4575, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.1936842105263158, | |
| "grad_norm": 1.5436071157455444, | |
| "learning_rate": 0.00016273460721868366, | |
| "loss": 0.8377, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.19452631578947369, | |
| "grad_norm": 0.37646251916885376, | |
| "learning_rate": 0.00016256475583864118, | |
| "loss": 0.4991, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.19536842105263158, | |
| "grad_norm": 0.4300127923488617, | |
| "learning_rate": 0.00016239490445859873, | |
| "loss": 0.53, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.19621052631578947, | |
| "grad_norm": 0.45355018973350525, | |
| "learning_rate": 0.00016222505307855628, | |
| "loss": 0.4933, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.19705263157894737, | |
| "grad_norm": 0.8323020935058594, | |
| "learning_rate": 0.0001620552016985138, | |
| "loss": 0.4728, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.19789473684210526, | |
| "grad_norm": 1.9126681089401245, | |
| "learning_rate": 0.00016188535031847135, | |
| "loss": 0.9841, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.19873684210526316, | |
| "grad_norm": 0.332199364900589, | |
| "learning_rate": 0.00016171549893842887, | |
| "loss": 0.5236, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.19957894736842105, | |
| "grad_norm": 0.4285382330417633, | |
| "learning_rate": 0.00016154564755838642, | |
| "loss": 0.4751, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.20042105263157894, | |
| "grad_norm": 0.5518692135810852, | |
| "learning_rate": 0.00016137579617834396, | |
| "loss": 0.4559, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.20126315789473684, | |
| "grad_norm": 0.9700078964233398, | |
| "learning_rate": 0.00016120594479830148, | |
| "loss": 0.4473, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.20210526315789473, | |
| "grad_norm": 1.7239770889282227, | |
| "learning_rate": 0.00016103609341825903, | |
| "loss": 0.989, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.20294736842105263, | |
| "grad_norm": 0.5785214304924011, | |
| "learning_rate": 0.00016086624203821655, | |
| "loss": 0.5337, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.20378947368421052, | |
| "grad_norm": 0.4549299478530884, | |
| "learning_rate": 0.0001606963906581741, | |
| "loss": 0.5333, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.20463157894736841, | |
| "grad_norm": 0.3455871343612671, | |
| "learning_rate": 0.00016052653927813165, | |
| "loss": 0.3773, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.2054736842105263, | |
| "grad_norm": 0.741362988948822, | |
| "learning_rate": 0.00016035668789808917, | |
| "loss": 0.5402, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.2063157894736842, | |
| "grad_norm": 2.1377577781677246, | |
| "learning_rate": 0.00016018683651804671, | |
| "loss": 1.0227, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.2071578947368421, | |
| "grad_norm": 0.31699422001838684, | |
| "learning_rate": 0.00016001698513800426, | |
| "loss": 0.6183, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.208, | |
| "grad_norm": 0.42210838198661804, | |
| "learning_rate": 0.00015984713375796178, | |
| "loss": 0.5432, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.20884210526315788, | |
| "grad_norm": 0.43271490931510925, | |
| "learning_rate": 0.00015967728237791933, | |
| "loss": 0.4723, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.20968421052631578, | |
| "grad_norm": 0.8529659509658813, | |
| "learning_rate": 0.00015950743099787685, | |
| "loss": 0.4378, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.21052631578947367, | |
| "grad_norm": 2.1393823623657227, | |
| "learning_rate": 0.0001593375796178344, | |
| "loss": 0.9569, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.21136842105263157, | |
| "grad_norm": 0.39425280690193176, | |
| "learning_rate": 0.00015916772823779194, | |
| "loss": 0.42, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.21221052631578946, | |
| "grad_norm": 0.4368259608745575, | |
| "learning_rate": 0.00015899787685774946, | |
| "loss": 0.4317, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.21305263157894735, | |
| "grad_norm": 0.5311638712882996, | |
| "learning_rate": 0.000158828025477707, | |
| "loss": 0.3639, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.21389473684210528, | |
| "grad_norm": 0.6656532883644104, | |
| "learning_rate": 0.00015865817409766456, | |
| "loss": 0.458, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.21473684210526317, | |
| "grad_norm": 1.7003415822982788, | |
| "learning_rate": 0.00015848832271762208, | |
| "loss": 0.8852, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.21557894736842106, | |
| "grad_norm": 0.39136645197868347, | |
| "learning_rate": 0.00015831847133757963, | |
| "loss": 0.4906, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.21642105263157896, | |
| "grad_norm": 0.553306519985199, | |
| "learning_rate": 0.00015814861995753715, | |
| "loss": 0.4181, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.21726315789473685, | |
| "grad_norm": 0.6687208414077759, | |
| "learning_rate": 0.0001579787685774947, | |
| "loss": 0.4068, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.21810526315789475, | |
| "grad_norm": 0.7988360524177551, | |
| "learning_rate": 0.00015780891719745224, | |
| "loss": 0.5836, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.21894736842105264, | |
| "grad_norm": 1.8867573738098145, | |
| "learning_rate": 0.00015763906581740976, | |
| "loss": 1.0491, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.21978947368421053, | |
| "grad_norm": 0.3274540603160858, | |
| "learning_rate": 0.0001574692144373673, | |
| "loss": 0.4785, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.22063157894736843, | |
| "grad_norm": 0.49191635847091675, | |
| "learning_rate": 0.00015729936305732486, | |
| "loss": 0.4976, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.22147368421052632, | |
| "grad_norm": 0.5721222162246704, | |
| "learning_rate": 0.00015712951167728238, | |
| "loss": 0.4384, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.22231578947368422, | |
| "grad_norm": 0.9080024361610413, | |
| "learning_rate": 0.00015695966029723993, | |
| "loss": 0.4528, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.2231578947368421, | |
| "grad_norm": 2.2378227710723877, | |
| "learning_rate": 0.00015678980891719745, | |
| "loss": 0.9349, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.224, | |
| "grad_norm": 0.45769771933555603, | |
| "learning_rate": 0.000156619957537155, | |
| "loss": 0.6637, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.2248421052631579, | |
| "grad_norm": 0.4500807821750641, | |
| "learning_rate": 0.00015645010615711254, | |
| "loss": 0.5129, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.2256842105263158, | |
| "grad_norm": 0.49212267994880676, | |
| "learning_rate": 0.00015628025477707006, | |
| "loss": 0.5673, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.2265263157894737, | |
| "grad_norm": 0.6729869842529297, | |
| "learning_rate": 0.0001561104033970276, | |
| "loss": 0.4295, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.22736842105263158, | |
| "grad_norm": 2.135563373565674, | |
| "learning_rate": 0.00015594055201698513, | |
| "loss": 0.9253, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.22821052631578947, | |
| "grad_norm": 0.3811759650707245, | |
| "learning_rate": 0.00015577070063694268, | |
| "loss": 0.6266, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.22905263157894737, | |
| "grad_norm": 0.3998406231403351, | |
| "learning_rate": 0.00015560084925690022, | |
| "loss": 0.5072, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.22989473684210526, | |
| "grad_norm": 0.5444859862327576, | |
| "learning_rate": 0.00015543099787685774, | |
| "loss": 0.4199, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.23073684210526316, | |
| "grad_norm": 0.8080804347991943, | |
| "learning_rate": 0.0001552611464968153, | |
| "loss": 0.4498, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.23157894736842105, | |
| "grad_norm": 1.729440450668335, | |
| "learning_rate": 0.00015509129511677284, | |
| "loss": 0.9028, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.23242105263157894, | |
| "grad_norm": 0.3848320245742798, | |
| "learning_rate": 0.00015492144373673036, | |
| "loss": 0.6824, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.23326315789473684, | |
| "grad_norm": 0.3806059658527374, | |
| "learning_rate": 0.0001547515923566879, | |
| "loss": 0.4026, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.23410526315789473, | |
| "grad_norm": 0.6117827296257019, | |
| "learning_rate": 0.00015458174097664543, | |
| "loss": 0.4476, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.23494736842105263, | |
| "grad_norm": 0.617476224899292, | |
| "learning_rate": 0.00015441188959660298, | |
| "loss": 0.4291, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.23578947368421052, | |
| "grad_norm": 3.325373888015747, | |
| "learning_rate": 0.00015424203821656052, | |
| "loss": 0.9073, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.23663157894736842, | |
| "grad_norm": 0.3204039931297302, | |
| "learning_rate": 0.00015407218683651804, | |
| "loss": 0.4429, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.2374736842105263, | |
| "grad_norm": 0.5016764402389526, | |
| "learning_rate": 0.0001539023354564756, | |
| "loss": 0.4591, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.2383157894736842, | |
| "grad_norm": 0.339837908744812, | |
| "learning_rate": 0.00015373248407643314, | |
| "loss": 0.3768, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.2391578947368421, | |
| "grad_norm": 0.8145161867141724, | |
| "learning_rate": 0.00015356263269639066, | |
| "loss": 0.4685, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.590245246887207, | |
| "learning_rate": 0.0001533927813163482, | |
| "loss": 0.8875, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.24084210526315789, | |
| "grad_norm": 0.27945977449417114, | |
| "learning_rate": 0.00015322292993630573, | |
| "loss": 0.4617, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.24168421052631578, | |
| "grad_norm": 0.44162896275520325, | |
| "learning_rate": 0.00015305307855626327, | |
| "loss": 0.5119, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.24252631578947367, | |
| "grad_norm": 0.48978477716445923, | |
| "learning_rate": 0.00015288322717622082, | |
| "loss": 0.4441, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.24336842105263157, | |
| "grad_norm": 0.8361369371414185, | |
| "learning_rate": 0.00015271337579617834, | |
| "loss": 0.4917, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.24421052631578946, | |
| "grad_norm": 1.856650710105896, | |
| "learning_rate": 0.0001525435244161359, | |
| "loss": 1.0373, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.24505263157894736, | |
| "grad_norm": 0.33647775650024414, | |
| "learning_rate": 0.0001523736730360934, | |
| "loss": 0.57, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.24589473684210525, | |
| "grad_norm": 0.39696812629699707, | |
| "learning_rate": 0.00015220382165605096, | |
| "loss": 0.5177, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.24673684210526317, | |
| "grad_norm": 0.680009663105011, | |
| "learning_rate": 0.0001520339702760085, | |
| "loss": 0.3436, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.24757894736842107, | |
| "grad_norm": 1.0433998107910156, | |
| "learning_rate": 0.00015186411889596602, | |
| "loss": 0.5135, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.24842105263157896, | |
| "grad_norm": 2.094115734100342, | |
| "learning_rate": 0.00015169426751592357, | |
| "loss": 0.8141, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.24926315789473685, | |
| "grad_norm": 0.32484060525894165, | |
| "learning_rate": 0.00015152441613588112, | |
| "loss": 0.5105, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.2501052631578947, | |
| "grad_norm": 0.4649001359939575, | |
| "learning_rate": 0.00015135456475583864, | |
| "loss": 0.4184, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.25094736842105264, | |
| "grad_norm": 0.4989701807498932, | |
| "learning_rate": 0.0001511847133757962, | |
| "loss": 0.4171, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.2517894736842105, | |
| "grad_norm": 0.8283904194831848, | |
| "learning_rate": 0.0001510148619957537, | |
| "loss": 0.5362, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.25263157894736843, | |
| "grad_norm": 2.41343355178833, | |
| "learning_rate": 0.00015084501061571126, | |
| "loss": 0.8531, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.2534736842105263, | |
| "grad_norm": 0.3413928747177124, | |
| "learning_rate": 0.0001506751592356688, | |
| "loss": 0.6351, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.2543157894736842, | |
| "grad_norm": 0.3596470355987549, | |
| "learning_rate": 0.00015050530785562632, | |
| "loss": 0.4201, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.2551578947368421, | |
| "grad_norm": 0.4724276065826416, | |
| "learning_rate": 0.00015033545647558387, | |
| "loss": 0.3763, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.256, | |
| "grad_norm": 0.7316722273826599, | |
| "learning_rate": 0.00015016560509554142, | |
| "loss": 0.3916, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.25684210526315787, | |
| "grad_norm": 2.757746934890747, | |
| "learning_rate": 0.00014999575371549894, | |
| "loss": 0.843, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.2576842105263158, | |
| "grad_norm": 0.3595764935016632, | |
| "learning_rate": 0.00014982590233545649, | |
| "loss": 0.5024, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.25852631578947366, | |
| "grad_norm": 0.46667343378067017, | |
| "learning_rate": 0.000149656050955414, | |
| "loss": 0.5805, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.2593684210526316, | |
| "grad_norm": 0.35031619668006897, | |
| "learning_rate": 0.00014948619957537155, | |
| "loss": 0.4775, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.26021052631578945, | |
| "grad_norm": 0.4115280210971832, | |
| "learning_rate": 0.0001493163481953291, | |
| "loss": 0.4006, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.26105263157894737, | |
| "grad_norm": 1.8953053951263428, | |
| "learning_rate": 0.00014914649681528662, | |
| "loss": 0.767, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.26189473684210524, | |
| "grad_norm": 0.30454185605049133, | |
| "learning_rate": 0.00014897664543524417, | |
| "loss": 0.5656, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.26273684210526316, | |
| "grad_norm": 0.6822018027305603, | |
| "learning_rate": 0.00014880679405520172, | |
| "loss": 0.5186, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.2635789473684211, | |
| "grad_norm": 0.5316245555877686, | |
| "learning_rate": 0.00014863694267515924, | |
| "loss": 0.4083, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.26442105263157895, | |
| "grad_norm": 0.593500018119812, | |
| "learning_rate": 0.00014846709129511678, | |
| "loss": 0.3675, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.26526315789473687, | |
| "grad_norm": 1.867174506187439, | |
| "learning_rate": 0.0001482972399150743, | |
| "loss": 0.9189, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.26610526315789473, | |
| "grad_norm": 0.324643611907959, | |
| "learning_rate": 0.00014812738853503185, | |
| "loss": 0.4793, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.26694736842105266, | |
| "grad_norm": 0.4558936655521393, | |
| "learning_rate": 0.0001479575371549894, | |
| "loss": 0.4737, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.2677894736842105, | |
| "grad_norm": 0.46329912543296814, | |
| "learning_rate": 0.00014778768577494692, | |
| "loss": 0.5038, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.26863157894736844, | |
| "grad_norm": 0.7512781023979187, | |
| "learning_rate": 0.00014761783439490447, | |
| "loss": 0.4943, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.2694736842105263, | |
| "grad_norm": 1.2475156784057617, | |
| "learning_rate": 0.000147447983014862, | |
| "loss": 0.846, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.27031578947368423, | |
| "grad_norm": 0.3240302503108978, | |
| "learning_rate": 0.00014727813163481954, | |
| "loss": 0.5503, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.2711578947368421, | |
| "grad_norm": 0.49550485610961914, | |
| "learning_rate": 0.00014710828025477708, | |
| "loss": 0.456, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.272, | |
| "grad_norm": 0.5459126234054565, | |
| "learning_rate": 0.0001469384288747346, | |
| "loss": 0.4185, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.2728421052631579, | |
| "grad_norm": 0.7852250337600708, | |
| "learning_rate": 0.00014676857749469215, | |
| "loss": 0.5148, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.2736842105263158, | |
| "grad_norm": 2.70231556892395, | |
| "learning_rate": 0.0001465987261146497, | |
| "loss": 1.0239, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.2745263157894737, | |
| "grad_norm": 0.4467475712299347, | |
| "learning_rate": 0.00014642887473460722, | |
| "loss": 0.5637, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.2753684210526316, | |
| "grad_norm": 0.3841107189655304, | |
| "learning_rate": 0.00014625902335456477, | |
| "loss": 0.4139, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.27621052631578946, | |
| "grad_norm": 0.72939532995224, | |
| "learning_rate": 0.00014608917197452229, | |
| "loss": 0.4735, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.2770526315789474, | |
| "grad_norm": 0.7061725854873657, | |
| "learning_rate": 0.00014591932059447983, | |
| "loss": 0.4229, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.27789473684210525, | |
| "grad_norm": 1.8529305458068848, | |
| "learning_rate": 0.00014574946921443738, | |
| "loss": 0.8639, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.27873684210526317, | |
| "grad_norm": 0.3370007276535034, | |
| "learning_rate": 0.0001455796178343949, | |
| "loss": 0.6602, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.27957894736842104, | |
| "grad_norm": 0.4234888255596161, | |
| "learning_rate": 0.00014540976645435245, | |
| "loss": 0.4598, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.28042105263157896, | |
| "grad_norm": 0.4830009937286377, | |
| "learning_rate": 0.00014523991507431, | |
| "loss": 0.3922, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.2812631578947368, | |
| "grad_norm": 0.6933443546295166, | |
| "learning_rate": 0.00014507006369426752, | |
| "loss": 0.4811, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.28210526315789475, | |
| "grad_norm": 1.7801717519760132, | |
| "learning_rate": 0.00014490021231422506, | |
| "loss": 0.9388, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.2829473684210526, | |
| "grad_norm": 0.3998657166957855, | |
| "learning_rate": 0.00014473036093418258, | |
| "loss": 0.5352, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.28378947368421054, | |
| "grad_norm": 0.426088809967041, | |
| "learning_rate": 0.00014456050955414013, | |
| "loss": 0.4185, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.2846315789473684, | |
| "grad_norm": 0.6409427523612976, | |
| "learning_rate": 0.00014439065817409768, | |
| "loss": 0.4261, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.2854736842105263, | |
| "grad_norm": 0.5442774891853333, | |
| "learning_rate": 0.0001442208067940552, | |
| "loss": 0.3617, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.2863157894736842, | |
| "grad_norm": 1.6628094911575317, | |
| "learning_rate": 0.00014405095541401275, | |
| "loss": 0.9024, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.2871578947368421, | |
| "grad_norm": 0.41220685839653015, | |
| "learning_rate": 0.0001438811040339703, | |
| "loss": 0.6064, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.288, | |
| "grad_norm": 0.45231524109840393, | |
| "learning_rate": 0.00014371125265392782, | |
| "loss": 0.5803, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.2888421052631579, | |
| "grad_norm": 0.43630462884902954, | |
| "learning_rate": 0.00014354140127388536, | |
| "loss": 0.4852, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.28968421052631577, | |
| "grad_norm": 0.6493312120437622, | |
| "learning_rate": 0.00014337154989384288, | |
| "loss": 0.4444, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.2905263157894737, | |
| "grad_norm": 1.7730942964553833, | |
| "learning_rate": 0.00014320169851380043, | |
| "loss": 0.7441, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.29136842105263155, | |
| "grad_norm": 0.37726616859436035, | |
| "learning_rate": 0.00014303184713375798, | |
| "loss": 0.6209, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.2922105263157895, | |
| "grad_norm": 0.45927125215530396, | |
| "learning_rate": 0.0001428619957537155, | |
| "loss": 0.4692, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.29305263157894734, | |
| "grad_norm": 0.4703160524368286, | |
| "learning_rate": 0.00014269214437367305, | |
| "loss": 0.3952, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.29389473684210526, | |
| "grad_norm": 0.8873239755630493, | |
| "learning_rate": 0.00014252229299363057, | |
| "loss": 0.5184, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.29473684210526313, | |
| "grad_norm": 1.6383920907974243, | |
| "learning_rate": 0.00014235244161358811, | |
| "loss": 0.94, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.29557894736842105, | |
| "grad_norm": 0.371126264333725, | |
| "learning_rate": 0.00014218259023354566, | |
| "loss": 0.6306, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.296421052631579, | |
| "grad_norm": 0.42718377709388733, | |
| "learning_rate": 0.00014201273885350318, | |
| "loss": 0.5226, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.29726315789473684, | |
| "grad_norm": 0.4946935176849365, | |
| "learning_rate": 0.00014184288747346073, | |
| "loss": 0.4776, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.29810526315789476, | |
| "grad_norm": 0.6871415972709656, | |
| "learning_rate": 0.00014167303609341828, | |
| "loss": 0.3815, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.29894736842105263, | |
| "grad_norm": 1.9425073862075806, | |
| "learning_rate": 0.0001415031847133758, | |
| "loss": 0.8956, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.29978947368421055, | |
| "grad_norm": 0.44313549995422363, | |
| "learning_rate": 0.00014133333333333334, | |
| "loss": 0.626, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.3006315789473684, | |
| "grad_norm": 0.46598005294799805, | |
| "learning_rate": 0.00014116348195329086, | |
| "loss": 0.4069, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.30147368421052634, | |
| "grad_norm": 0.6723268628120422, | |
| "learning_rate": 0.0001409936305732484, | |
| "loss": 0.4478, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.3023157894736842, | |
| "grad_norm": 0.5502001643180847, | |
| "learning_rate": 0.00014082377919320596, | |
| "loss": 0.5162, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.3031578947368421, | |
| "grad_norm": 2.246311664581299, | |
| "learning_rate": 0.00014065392781316348, | |
| "loss": 1.0122, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.304, | |
| "grad_norm": 0.4047815203666687, | |
| "learning_rate": 0.00014048407643312103, | |
| "loss": 0.5781, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.3048421052631579, | |
| "grad_norm": 0.43664127588272095, | |
| "learning_rate": 0.00014031422505307858, | |
| "loss": 0.4955, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.3056842105263158, | |
| "grad_norm": 0.6888949275016785, | |
| "learning_rate": 0.0001401443736730361, | |
| "loss": 0.4727, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.3065263157894737, | |
| "grad_norm": 0.7450914978981018, | |
| "learning_rate": 0.00013997452229299364, | |
| "loss": 0.348, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.30736842105263157, | |
| "grad_norm": 2.1871423721313477, | |
| "learning_rate": 0.00013980467091295116, | |
| "loss": 0.8319, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.3082105263157895, | |
| "grad_norm": 0.5232999324798584, | |
| "learning_rate": 0.0001396348195329087, | |
| "loss": 0.6121, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.30905263157894736, | |
| "grad_norm": 0.45325085520744324, | |
| "learning_rate": 0.00013946496815286626, | |
| "loss": 0.4894, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.3098947368421053, | |
| "grad_norm": 0.5262650847434998, | |
| "learning_rate": 0.00013929511677282378, | |
| "loss": 0.3711, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.31073684210526314, | |
| "grad_norm": 0.7367873787879944, | |
| "learning_rate": 0.00013912526539278133, | |
| "loss": 0.4665, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.31157894736842107, | |
| "grad_norm": 1.9024940729141235, | |
| "learning_rate": 0.00013895541401273887, | |
| "loss": 0.9023, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.31242105263157893, | |
| "grad_norm": 0.376897931098938, | |
| "learning_rate": 0.0001387855626326964, | |
| "loss": 0.645, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.31326315789473685, | |
| "grad_norm": 0.45147833228111267, | |
| "learning_rate": 0.00013861571125265394, | |
| "loss": 0.4427, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.3141052631578947, | |
| "grad_norm": 0.5443453788757324, | |
| "learning_rate": 0.00013844585987261146, | |
| "loss": 0.5412, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.31494736842105264, | |
| "grad_norm": 1.0339975357055664, | |
| "learning_rate": 0.000138276008492569, | |
| "loss": 0.516, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.3157894736842105, | |
| "grad_norm": 2.173063278198242, | |
| "learning_rate": 0.00013810615711252656, | |
| "loss": 0.9036, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.31663157894736843, | |
| "grad_norm": 0.5104514360427856, | |
| "learning_rate": 0.00013793630573248408, | |
| "loss": 0.7482, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.3174736842105263, | |
| "grad_norm": 0.41140398383140564, | |
| "learning_rate": 0.00013776645435244162, | |
| "loss": 0.4176, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.3183157894736842, | |
| "grad_norm": 0.6841542720794678, | |
| "learning_rate": 0.00013759660297239914, | |
| "loss": 0.4552, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.3191578947368421, | |
| "grad_norm": 0.8318420052528381, | |
| "learning_rate": 0.0001374267515923567, | |
| "loss": 0.5629, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.329176902770996, | |
| "learning_rate": 0.00013725690021231424, | |
| "loss": 0.8823, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.3208421052631579, | |
| "grad_norm": 0.3277033269405365, | |
| "learning_rate": 0.00013708704883227176, | |
| "loss": 0.4825, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.3216842105263158, | |
| "grad_norm": 0.4870198965072632, | |
| "learning_rate": 0.0001369171974522293, | |
| "loss": 0.4404, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.32252631578947366, | |
| "grad_norm": 0.5535099506378174, | |
| "learning_rate": 0.00013674734607218686, | |
| "loss": 0.5578, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.3233684210526316, | |
| "grad_norm": 0.848175048828125, | |
| "learning_rate": 0.00013657749469214438, | |
| "loss": 0.425, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.32421052631578945, | |
| "grad_norm": 1.8445836305618286, | |
| "learning_rate": 0.00013640764331210192, | |
| "loss": 0.8534, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.32505263157894737, | |
| "grad_norm": 0.3430965542793274, | |
| "learning_rate": 0.00013623779193205944, | |
| "loss": 0.5229, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.32589473684210524, | |
| "grad_norm": 0.3818278908729553, | |
| "learning_rate": 0.000136067940552017, | |
| "loss": 0.4904, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.32673684210526316, | |
| "grad_norm": 0.3931770622730255, | |
| "learning_rate": 0.00013589808917197454, | |
| "loss": 0.3472, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.327578947368421, | |
| "grad_norm": 0.6242585182189941, | |
| "learning_rate": 0.00013572823779193206, | |
| "loss": 0.4391, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.32842105263157895, | |
| "grad_norm": 2.3917336463928223, | |
| "learning_rate": 0.0001355583864118896, | |
| "loss": 0.8238, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.32926315789473687, | |
| "grad_norm": 0.34531742334365845, | |
| "learning_rate": 0.00013538853503184715, | |
| "loss": 0.5469, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.33010526315789473, | |
| "grad_norm": 0.44152313470840454, | |
| "learning_rate": 0.00013521868365180467, | |
| "loss": 0.418, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.33094736842105266, | |
| "grad_norm": 0.530602216720581, | |
| "learning_rate": 0.00013504883227176222, | |
| "loss": 0.5126, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.3317894736842105, | |
| "grad_norm": 0.5722650289535522, | |
| "learning_rate": 0.00013487898089171974, | |
| "loss": 0.3929, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.33263157894736844, | |
| "grad_norm": 1.1613789796829224, | |
| "learning_rate": 0.0001347091295116773, | |
| "loss": 0.7835, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.3334736842105263, | |
| "grad_norm": 0.378336638212204, | |
| "learning_rate": 0.00013453927813163484, | |
| "loss": 0.5817, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.33431578947368423, | |
| "grad_norm": 0.409066766500473, | |
| "learning_rate": 0.00013436942675159236, | |
| "loss": 0.4298, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.3351578947368421, | |
| "grad_norm": 0.5206225514411926, | |
| "learning_rate": 0.0001341995753715499, | |
| "loss": 0.4844, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.336, | |
| "grad_norm": 0.9876739978790283, | |
| "learning_rate": 0.00013402972399150745, | |
| "loss": 0.5237, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.3368421052631579, | |
| "grad_norm": 1.9923458099365234, | |
| "learning_rate": 0.00013385987261146497, | |
| "loss": 0.9039, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3376842105263158, | |
| "grad_norm": 0.5249975323677063, | |
| "learning_rate": 0.00013369002123142252, | |
| "loss": 0.417, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.3385263157894737, | |
| "grad_norm": 0.3336249887943268, | |
| "learning_rate": 0.00013352016985138004, | |
| "loss": 0.3544, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.3393684210526316, | |
| "grad_norm": 0.9306570291519165, | |
| "learning_rate": 0.0001333503184713376, | |
| "loss": 0.5496, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.34021052631578946, | |
| "grad_norm": 0.9381512999534607, | |
| "learning_rate": 0.00013318046709129514, | |
| "loss": 0.5631, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.3410526315789474, | |
| "grad_norm": 1.838768482208252, | |
| "learning_rate": 0.00013301061571125266, | |
| "loss": 0.7881, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.34189473684210525, | |
| "grad_norm": 0.37969905138015747, | |
| "learning_rate": 0.0001328407643312102, | |
| "loss": 0.4469, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.3427368421052632, | |
| "grad_norm": 0.4671299457550049, | |
| "learning_rate": 0.00013267091295116772, | |
| "loss": 0.4265, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.34357894736842104, | |
| "grad_norm": 0.5337377786636353, | |
| "learning_rate": 0.00013250106157112527, | |
| "loss": 0.4472, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.34442105263157896, | |
| "grad_norm": 0.8540793657302856, | |
| "learning_rate": 0.00013233121019108282, | |
| "loss": 0.4881, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.3452631578947368, | |
| "grad_norm": 1.903519630432129, | |
| "learning_rate": 0.00013216135881104034, | |
| "loss": 0.9538, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.34610526315789475, | |
| "grad_norm": 0.4626956880092621, | |
| "learning_rate": 0.00013199150743099789, | |
| "loss": 0.4086, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.3469473684210526, | |
| "grad_norm": 0.4555705785751343, | |
| "learning_rate": 0.00013182165605095543, | |
| "loss": 0.4369, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.34778947368421054, | |
| "grad_norm": 0.44597890973091125, | |
| "learning_rate": 0.00013165180467091295, | |
| "loss": 0.4244, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.3486315789473684, | |
| "grad_norm": 0.5580148696899414, | |
| "learning_rate": 0.0001314819532908705, | |
| "loss": 0.3834, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.3494736842105263, | |
| "grad_norm": 2.1332900524139404, | |
| "learning_rate": 0.00013131210191082802, | |
| "loss": 0.8044, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.3503157894736842, | |
| "grad_norm": 0.3396005630493164, | |
| "learning_rate": 0.00013114225053078557, | |
| "loss": 0.4968, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.3511578947368421, | |
| "grad_norm": 0.38661715388298035, | |
| "learning_rate": 0.00013097239915074312, | |
| "loss": 0.4131, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.352, | |
| "grad_norm": 0.43859660625457764, | |
| "learning_rate": 0.00013080254777070064, | |
| "loss": 0.4612, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.3528421052631579, | |
| "grad_norm": 0.9659103155136108, | |
| "learning_rate": 0.00013063269639065818, | |
| "loss": 0.4966, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.35368421052631577, | |
| "grad_norm": 2.0894362926483154, | |
| "learning_rate": 0.00013046284501061573, | |
| "loss": 0.8287, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.3545263157894737, | |
| "grad_norm": 0.364837110042572, | |
| "learning_rate": 0.00013029299363057325, | |
| "loss": 0.5177, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.35536842105263156, | |
| "grad_norm": 0.5444786548614502, | |
| "learning_rate": 0.0001301231422505308, | |
| "loss": 0.5576, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.3562105263157895, | |
| "grad_norm": 0.48805421590805054, | |
| "learning_rate": 0.00012995329087048832, | |
| "loss": 0.4294, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.35705263157894734, | |
| "grad_norm": 0.7706589102745056, | |
| "learning_rate": 0.00012978343949044587, | |
| "loss": 0.487, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.35789473684210527, | |
| "grad_norm": 1.5804134607315063, | |
| "learning_rate": 0.00012961358811040342, | |
| "loss": 0.9245, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.35873684210526313, | |
| "grad_norm": 0.2868419885635376, | |
| "learning_rate": 0.00012944373673036094, | |
| "loss": 0.5638, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.35957894736842105, | |
| "grad_norm": 0.41276785731315613, | |
| "learning_rate": 0.00012927388535031848, | |
| "loss": 0.4995, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.3604210526315789, | |
| "grad_norm": 0.458173006772995, | |
| "learning_rate": 0.00012910403397027603, | |
| "loss": 0.3755, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.36126315789473684, | |
| "grad_norm": 0.7480744123458862, | |
| "learning_rate": 0.00012893418259023355, | |
| "loss": 0.4606, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.36210526315789476, | |
| "grad_norm": 1.673670768737793, | |
| "learning_rate": 0.0001287643312101911, | |
| "loss": 0.9763, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.36294736842105263, | |
| "grad_norm": 0.41645050048828125, | |
| "learning_rate": 0.00012859447983014862, | |
| "loss": 0.4802, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.36378947368421055, | |
| "grad_norm": 0.47213512659072876, | |
| "learning_rate": 0.00012842462845010617, | |
| "loss": 0.4031, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.3646315789473684, | |
| "grad_norm": 0.45644351840019226, | |
| "learning_rate": 0.00012825477707006371, | |
| "loss": 0.359, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.36547368421052634, | |
| "grad_norm": 0.6647821068763733, | |
| "learning_rate": 0.00012808492569002123, | |
| "loss": 0.3796, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.3663157894736842, | |
| "grad_norm": 1.8943500518798828, | |
| "learning_rate": 0.00012791507430997878, | |
| "loss": 0.8407, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.3671578947368421, | |
| "grad_norm": 0.33899691700935364, | |
| "learning_rate": 0.0001277452229299363, | |
| "loss": 0.5056, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.368, | |
| "grad_norm": 0.338820219039917, | |
| "learning_rate": 0.00012757537154989385, | |
| "loss": 0.419, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.3688421052631579, | |
| "grad_norm": 0.5587894916534424, | |
| "learning_rate": 0.0001274055201698514, | |
| "loss": 0.4888, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.3696842105263158, | |
| "grad_norm": 0.7106043696403503, | |
| "learning_rate": 0.00012723566878980892, | |
| "loss": 0.3529, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.3705263157894737, | |
| "grad_norm": 1.7128459215164185, | |
| "learning_rate": 0.00012706581740976646, | |
| "loss": 0.9441, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.37136842105263157, | |
| "grad_norm": 0.5022220015525818, | |
| "learning_rate": 0.000126895966029724, | |
| "loss": 0.5007, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.3722105263157895, | |
| "grad_norm": 0.4136195778846741, | |
| "learning_rate": 0.00012672611464968153, | |
| "loss": 0.474, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.37305263157894736, | |
| "grad_norm": 0.44717109203338623, | |
| "learning_rate": 0.00012655626326963908, | |
| "loss": 0.4713, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.3738947368421053, | |
| "grad_norm": 0.9358104467391968, | |
| "learning_rate": 0.0001263864118895966, | |
| "loss": 0.4607, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.37473684210526315, | |
| "grad_norm": 1.8086439371109009, | |
| "learning_rate": 0.00012621656050955415, | |
| "loss": 0.8773, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.37557894736842107, | |
| "grad_norm": 0.4302763342857361, | |
| "learning_rate": 0.0001260467091295117, | |
| "loss": 0.6941, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.37642105263157893, | |
| "grad_norm": 0.5124600529670715, | |
| "learning_rate": 0.00012587685774946922, | |
| "loss": 0.481, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.37726315789473686, | |
| "grad_norm": 0.5362034440040588, | |
| "learning_rate": 0.00012570700636942676, | |
| "loss": 0.4283, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.3781052631578947, | |
| "grad_norm": 0.6653463244438171, | |
| "learning_rate": 0.0001255371549893843, | |
| "loss": 0.3755, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.37894736842105264, | |
| "grad_norm": 1.978276252746582, | |
| "learning_rate": 0.00012536730360934183, | |
| "loss": 0.8844, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3797894736842105, | |
| "grad_norm": 0.31543081998825073, | |
| "learning_rate": 0.00012519745222929938, | |
| "loss": 0.5637, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.38063157894736843, | |
| "grad_norm": 0.3930933475494385, | |
| "learning_rate": 0.0001250276008492569, | |
| "loss": 0.4646, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.3814736842105263, | |
| "grad_norm": 0.7759447693824768, | |
| "learning_rate": 0.00012485774946921445, | |
| "loss": 0.4531, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.3823157894736842, | |
| "grad_norm": 0.8808984160423279, | |
| "learning_rate": 0.000124687898089172, | |
| "loss": 0.4445, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.3831578947368421, | |
| "grad_norm": 2.26517915725708, | |
| "learning_rate": 0.00012451804670912951, | |
| "loss": 0.7758, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.384, | |
| "grad_norm": 0.3629767596721649, | |
| "learning_rate": 0.00012434819532908706, | |
| "loss": 0.6019, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.3848421052631579, | |
| "grad_norm": 0.4092148542404175, | |
| "learning_rate": 0.00012417834394904458, | |
| "loss": 0.4494, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.3856842105263158, | |
| "grad_norm": 0.503391444683075, | |
| "learning_rate": 0.00012400849256900213, | |
| "loss": 0.3964, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 0.38652631578947366, | |
| "grad_norm": 0.6524780988693237, | |
| "learning_rate": 0.00012383864118895968, | |
| "loss": 0.4745, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 0.3873684210526316, | |
| "grad_norm": 1.764403223991394, | |
| "learning_rate": 0.0001236687898089172, | |
| "loss": 0.8143, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.38821052631578945, | |
| "grad_norm": 0.3626035749912262, | |
| "learning_rate": 0.00012349893842887474, | |
| "loss": 0.6169, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 0.38905263157894737, | |
| "grad_norm": 0.5219221115112305, | |
| "learning_rate": 0.0001233290870488323, | |
| "loss": 0.4546, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 0.38989473684210524, | |
| "grad_norm": 0.5172279477119446, | |
| "learning_rate": 0.0001231592356687898, | |
| "loss": 0.4021, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 0.39073684210526316, | |
| "grad_norm": 0.7367202043533325, | |
| "learning_rate": 0.00012298938428874736, | |
| "loss": 0.4233, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 0.391578947368421, | |
| "grad_norm": 1.9178476333618164, | |
| "learning_rate": 0.00012281953290870488, | |
| "loss": 0.8616, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.39242105263157895, | |
| "grad_norm": 0.37362372875213623, | |
| "learning_rate": 0.00012264968152866243, | |
| "loss": 0.5105, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 0.3932631578947368, | |
| "grad_norm": 0.42427071928977966, | |
| "learning_rate": 0.00012247983014861998, | |
| "loss": 0.4966, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 0.39410526315789474, | |
| "grad_norm": 0.42204511165618896, | |
| "learning_rate": 0.0001223099787685775, | |
| "loss": 0.4458, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 0.39494736842105266, | |
| "grad_norm": 0.6317414045333862, | |
| "learning_rate": 0.00012214012738853504, | |
| "loss": 0.4233, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 0.3957894736842105, | |
| "grad_norm": 1.9896742105484009, | |
| "learning_rate": 0.00012197027600849258, | |
| "loss": 0.9184, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.39663157894736845, | |
| "grad_norm": 0.370765745639801, | |
| "learning_rate": 0.00012180042462845011, | |
| "loss": 0.7689, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 0.3974736842105263, | |
| "grad_norm": 0.462027907371521, | |
| "learning_rate": 0.00012163057324840766, | |
| "loss": 0.4029, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 0.39831578947368423, | |
| "grad_norm": 0.46577131748199463, | |
| "learning_rate": 0.00012146072186836519, | |
| "loss": 0.391, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 0.3991578947368421, | |
| "grad_norm": 1.0755490064620972, | |
| "learning_rate": 0.00012129087048832273, | |
| "loss": 0.4561, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.7896242141723633, | |
| "learning_rate": 0.00012112101910828026, | |
| "loss": 0.8992, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.4008421052631579, | |
| "grad_norm": 0.3657568097114563, | |
| "learning_rate": 0.00012095116772823781, | |
| "loss": 0.5263, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 0.4016842105263158, | |
| "grad_norm": 0.4388144314289093, | |
| "learning_rate": 0.00012078131634819534, | |
| "loss": 0.5054, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 0.4025263157894737, | |
| "grad_norm": 0.5261571407318115, | |
| "learning_rate": 0.00012061146496815288, | |
| "loss": 0.4926, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 0.4033684210526316, | |
| "grad_norm": 0.8278854489326477, | |
| "learning_rate": 0.00012044161358811041, | |
| "loss": 0.406, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 0.40421052631578946, | |
| "grad_norm": 2.475177764892578, | |
| "learning_rate": 0.00012027176220806796, | |
| "loss": 0.8657, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.4050526315789474, | |
| "grad_norm": 0.341237336397171, | |
| "learning_rate": 0.00012010191082802549, | |
| "loss": 0.6936, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 0.40589473684210525, | |
| "grad_norm": 0.49271059036254883, | |
| "learning_rate": 0.00011993205944798302, | |
| "loss": 0.4932, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 0.4067368421052632, | |
| "grad_norm": 0.3779512047767639, | |
| "learning_rate": 0.00011976220806794056, | |
| "loss": 0.5052, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 0.40757894736842104, | |
| "grad_norm": 0.60589998960495, | |
| "learning_rate": 0.0001195923566878981, | |
| "loss": 0.3625, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 0.40842105263157896, | |
| "grad_norm": 2.016735553741455, | |
| "learning_rate": 0.00011942250530785564, | |
| "loss": 0.7341, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.40926315789473683, | |
| "grad_norm": 0.387846440076828, | |
| "learning_rate": 0.00011925265392781317, | |
| "loss": 0.6, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 0.41010526315789475, | |
| "grad_norm": 0.37108609080314636, | |
| "learning_rate": 0.00011908280254777071, | |
| "loss": 0.4301, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 0.4109473684210526, | |
| "grad_norm": 0.3581722378730774, | |
| "learning_rate": 0.00011891295116772824, | |
| "loss": 0.4823, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 0.41178947368421054, | |
| "grad_norm": 0.6709173321723938, | |
| "learning_rate": 0.00011874309978768579, | |
| "loss": 0.3915, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 0.4126315789473684, | |
| "grad_norm": 1.945478081703186, | |
| "learning_rate": 0.00011857324840764332, | |
| "loss": 0.8356, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.4134736842105263, | |
| "grad_norm": 0.3648563623428345, | |
| "learning_rate": 0.00011840339702760086, | |
| "loss": 0.8605, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 0.4143157894736842, | |
| "grad_norm": 0.39815518260002136, | |
| "learning_rate": 0.00011823354564755839, | |
| "loss": 0.357, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 0.4151578947368421, | |
| "grad_norm": 0.40915629267692566, | |
| "learning_rate": 0.00011806369426751594, | |
| "loss": 0.3756, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 0.416, | |
| "grad_norm": 0.8093079328536987, | |
| "learning_rate": 0.00011789384288747347, | |
| "loss": 0.4563, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 0.4168421052631579, | |
| "grad_norm": 2.1880149841308594, | |
| "learning_rate": 0.000117723991507431, | |
| "loss": 0.8151, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.41768421052631577, | |
| "grad_norm": 0.3419650197029114, | |
| "learning_rate": 0.00011755414012738854, | |
| "loss": 0.6649, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 0.4185263157894737, | |
| "grad_norm": 0.4165056645870209, | |
| "learning_rate": 0.00011738428874734609, | |
| "loss": 0.4374, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 0.41936842105263156, | |
| "grad_norm": 0.5629061460494995, | |
| "learning_rate": 0.00011721443736730362, | |
| "loss": 0.4862, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 0.4202105263157895, | |
| "grad_norm": 0.8363445997238159, | |
| "learning_rate": 0.00011704458598726116, | |
| "loss": 0.5843, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 1.9334666728973389, | |
| "learning_rate": 0.00011687473460721869, | |
| "loss": 0.9163, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.42189473684210527, | |
| "grad_norm": 0.3296836316585541, | |
| "learning_rate": 0.00011670488322717624, | |
| "loss": 0.6008, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 0.42273684210526313, | |
| "grad_norm": 0.5271950364112854, | |
| "learning_rate": 0.00011653503184713377, | |
| "loss": 0.452, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 0.42357894736842105, | |
| "grad_norm": 0.44200482964515686, | |
| "learning_rate": 0.0001163651804670913, | |
| "loss": 0.3819, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 0.4244210526315789, | |
| "grad_norm": 0.650926947593689, | |
| "learning_rate": 0.00011619532908704884, | |
| "loss": 0.4078, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 0.42526315789473684, | |
| "grad_norm": 1.7106821537017822, | |
| "learning_rate": 0.00011602547770700639, | |
| "loss": 0.9444, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.4261052631578947, | |
| "grad_norm": 0.5171535015106201, | |
| "learning_rate": 0.00011585562632696392, | |
| "loss": 0.4195, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 0.42694736842105263, | |
| "grad_norm": 0.41028520464897156, | |
| "learning_rate": 0.00011568577494692145, | |
| "loss": 0.4426, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 0.42778947368421055, | |
| "grad_norm": 0.5907677412033081, | |
| "learning_rate": 0.00011551592356687899, | |
| "loss": 0.4805, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 0.4286315789473684, | |
| "grad_norm": 0.7928293943405151, | |
| "learning_rate": 0.00011534607218683654, | |
| "loss": 0.4096, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 0.42947368421052634, | |
| "grad_norm": 2.1657371520996094, | |
| "learning_rate": 0.00011517622080679407, | |
| "loss": 0.7263, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.4303157894736842, | |
| "grad_norm": 0.532307505607605, | |
| "learning_rate": 0.0001150063694267516, | |
| "loss": 0.5954, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 0.43115789473684213, | |
| "grad_norm": 0.35027825832366943, | |
| "learning_rate": 0.00011483651804670914, | |
| "loss": 0.3788, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.432, | |
| "grad_norm": 0.4618242383003235, | |
| "learning_rate": 0.00011466666666666667, | |
| "loss": 0.3941, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 0.4328421052631579, | |
| "grad_norm": 0.8208893537521362, | |
| "learning_rate": 0.00011449681528662422, | |
| "loss": 0.4338, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 0.4336842105263158, | |
| "grad_norm": 2.1040568351745605, | |
| "learning_rate": 0.00011432696390658175, | |
| "loss": 0.9682, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.4345263157894737, | |
| "grad_norm": 0.43169477581977844, | |
| "learning_rate": 0.00011415711252653929, | |
| "loss": 0.6634, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 0.43536842105263157, | |
| "grad_norm": 0.39884811639785767, | |
| "learning_rate": 0.00011398726114649682, | |
| "loss": 0.4574, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 0.4362105263157895, | |
| "grad_norm": 0.507757306098938, | |
| "learning_rate": 0.00011381740976645437, | |
| "loss": 0.4617, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 0.43705263157894736, | |
| "grad_norm": 0.6555622220039368, | |
| "learning_rate": 0.0001136475583864119, | |
| "loss": 0.4563, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 0.4378947368421053, | |
| "grad_norm": 1.2537477016448975, | |
| "learning_rate": 0.00011347770700636944, | |
| "loss": 0.6793, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.43873684210526315, | |
| "grad_norm": 0.4446013867855072, | |
| "learning_rate": 0.00011330785562632697, | |
| "loss": 0.5328, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 0.43957894736842107, | |
| "grad_norm": 0.5096587538719177, | |
| "learning_rate": 0.00011313800424628452, | |
| "loss": 0.518, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 0.44042105263157894, | |
| "grad_norm": 0.4603320062160492, | |
| "learning_rate": 0.00011296815286624205, | |
| "loss": 0.3654, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 0.44126315789473686, | |
| "grad_norm": 0.7096450924873352, | |
| "learning_rate": 0.00011279830148619958, | |
| "loss": 0.4051, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 0.4421052631578947, | |
| "grad_norm": 1.851871371269226, | |
| "learning_rate": 0.00011262845010615712, | |
| "loss": 0.8106, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.44294736842105265, | |
| "grad_norm": 0.4419096112251282, | |
| "learning_rate": 0.00011245859872611467, | |
| "loss": 0.6919, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 0.4437894736842105, | |
| "grad_norm": 0.5313680768013, | |
| "learning_rate": 0.0001122887473460722, | |
| "loss": 0.448, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 0.44463157894736843, | |
| "grad_norm": 0.4806055426597595, | |
| "learning_rate": 0.00011211889596602973, | |
| "loss": 0.3896, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 0.4454736842105263, | |
| "grad_norm": 0.7410107254981995, | |
| "learning_rate": 0.00011194904458598727, | |
| "loss": 0.4677, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 0.4463157894736842, | |
| "grad_norm": 2.6054928302764893, | |
| "learning_rate": 0.00011177919320594482, | |
| "loss": 0.9431, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.4471578947368421, | |
| "grad_norm": 0.4333815276622772, | |
| "learning_rate": 0.00011160934182590235, | |
| "loss": 0.5473, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 0.448, | |
| "grad_norm": 0.5291936993598938, | |
| "learning_rate": 0.00011143949044585988, | |
| "loss": 0.537, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 0.4488421052631579, | |
| "grad_norm": 0.5883787274360657, | |
| "learning_rate": 0.00011126963906581742, | |
| "loss": 0.4294, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 0.4496842105263158, | |
| "grad_norm": 0.7452921867370605, | |
| "learning_rate": 0.00011109978768577496, | |
| "loss": 0.4222, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 0.45052631578947366, | |
| "grad_norm": 1.6746816635131836, | |
| "learning_rate": 0.0001109299363057325, | |
| "loss": 0.9157, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.4513684210526316, | |
| "grad_norm": 0.4518404006958008, | |
| "learning_rate": 0.00011076008492569003, | |
| "loss": 0.5146, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 0.45221052631578945, | |
| "grad_norm": 0.6486232280731201, | |
| "learning_rate": 0.00011059023354564757, | |
| "loss": 0.4886, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 0.4530526315789474, | |
| "grad_norm": 0.5233629941940308, | |
| "learning_rate": 0.00011042038216560511, | |
| "loss": 0.4217, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 0.45389473684210524, | |
| "grad_norm": 1.0410133600234985, | |
| "learning_rate": 0.00011025053078556265, | |
| "loss": 0.5072, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 0.45473684210526316, | |
| "grad_norm": 1.8575575351715088, | |
| "learning_rate": 0.00011008067940552018, | |
| "loss": 0.9385, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.45557894736842103, | |
| "grad_norm": 0.366054505109787, | |
| "learning_rate": 0.00010991082802547772, | |
| "loss": 0.5769, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 0.45642105263157895, | |
| "grad_norm": 0.42400917410850525, | |
| "learning_rate": 0.00010974097664543525, | |
| "loss": 0.4607, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 0.4572631578947368, | |
| "grad_norm": 0.6598902344703674, | |
| "learning_rate": 0.0001095711252653928, | |
| "loss": 0.5085, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 0.45810526315789474, | |
| "grad_norm": 0.9073595404624939, | |
| "learning_rate": 0.00010940127388535033, | |
| "loss": 0.4785, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 0.4589473684210526, | |
| "grad_norm": 1.6613352298736572, | |
| "learning_rate": 0.00010923142250530786, | |
| "loss": 0.8917, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.4597894736842105, | |
| "grad_norm": 0.36178308725357056, | |
| "learning_rate": 0.0001090615711252654, | |
| "loss": 0.5751, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 0.46063157894736845, | |
| "grad_norm": 0.4736432433128357, | |
| "learning_rate": 0.00010889171974522295, | |
| "loss": 0.4292, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 0.4614736842105263, | |
| "grad_norm": 0.41263148188591003, | |
| "learning_rate": 0.00010872186836518048, | |
| "loss": 0.3977, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 0.46231578947368424, | |
| "grad_norm": 0.9939879775047302, | |
| "learning_rate": 0.00010855201698513801, | |
| "loss": 0.4354, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 0.4631578947368421, | |
| "grad_norm": 2.0625500679016113, | |
| "learning_rate": 0.00010838216560509555, | |
| "loss": 0.9635, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.464, | |
| "grad_norm": 0.3898473381996155, | |
| "learning_rate": 0.0001082123142250531, | |
| "loss": 0.6018, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 0.4648421052631579, | |
| "grad_norm": 0.6290395855903625, | |
| "learning_rate": 0.00010804246284501063, | |
| "loss": 0.499, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 0.4656842105263158, | |
| "grad_norm": 0.46902212500572205, | |
| "learning_rate": 0.00010787261146496816, | |
| "loss": 0.3578, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 0.4665263157894737, | |
| "grad_norm": 1.018322467803955, | |
| "learning_rate": 0.0001077027600849257, | |
| "loss": 0.569, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 0.4673684210526316, | |
| "grad_norm": 2.5967328548431396, | |
| "learning_rate": 0.00010753290870488324, | |
| "loss": 0.9481, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.46821052631578947, | |
| "grad_norm": 0.328856885433197, | |
| "learning_rate": 0.00010736305732484078, | |
| "loss": 0.4369, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 0.4690526315789474, | |
| "grad_norm": 0.4609110653400421, | |
| "learning_rate": 0.00010719320594479831, | |
| "loss": 0.462, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 0.46989473684210525, | |
| "grad_norm": 0.782852828502655, | |
| "learning_rate": 0.00010702335456475585, | |
| "loss": 0.4226, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 0.4707368421052632, | |
| "grad_norm": 0.760334312915802, | |
| "learning_rate": 0.0001068535031847134, | |
| "loss": 0.4347, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 0.47157894736842104, | |
| "grad_norm": 1.9141038656234741, | |
| "learning_rate": 0.00010668365180467093, | |
| "loss": 0.9091, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.47242105263157896, | |
| "grad_norm": 0.37223225831985474, | |
| "learning_rate": 0.00010651380042462846, | |
| "loss": 0.3715, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 0.47326315789473683, | |
| "grad_norm": 0.42550092935562134, | |
| "learning_rate": 0.000106343949044586, | |
| "loss": 0.3685, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 0.47410526315789475, | |
| "grad_norm": 0.6292097568511963, | |
| "learning_rate": 0.00010617409766454354, | |
| "loss": 0.4427, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 0.4749473684210526, | |
| "grad_norm": 0.9241964817047119, | |
| "learning_rate": 0.00010600424628450108, | |
| "loss": 0.4764, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 0.47578947368421054, | |
| "grad_norm": 2.170848846435547, | |
| "learning_rate": 0.00010583439490445861, | |
| "loss": 0.9196, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.4766315789473684, | |
| "grad_norm": 0.35500389337539673, | |
| "learning_rate": 0.00010566454352441614, | |
| "loss": 0.4752, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 0.47747368421052633, | |
| "grad_norm": 0.33285343647003174, | |
| "learning_rate": 0.00010549469214437369, | |
| "loss": 0.5268, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 0.4783157894736842, | |
| "grad_norm": 0.313558965921402, | |
| "learning_rate": 0.00010532484076433123, | |
| "loss": 0.4336, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 0.4791578947368421, | |
| "grad_norm": 0.6317498087882996, | |
| "learning_rate": 0.00010515498938428876, | |
| "loss": 0.371, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.7509946823120117, | |
| "learning_rate": 0.0001049851380042463, | |
| "loss": 0.689, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.4808421052631579, | |
| "grad_norm": 0.44371727108955383, | |
| "learning_rate": 0.00010481528662420383, | |
| "loss": 0.6226, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 0.48168421052631577, | |
| "grad_norm": 0.4474042057991028, | |
| "learning_rate": 0.00010464543524416138, | |
| "loss": 0.4004, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 0.4825263157894737, | |
| "grad_norm": 0.4645920693874359, | |
| "learning_rate": 0.00010447558386411891, | |
| "loss": 0.4242, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 0.48336842105263156, | |
| "grad_norm": 0.6978147029876709, | |
| "learning_rate": 0.00010430573248407644, | |
| "loss": 0.3233, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 0.4842105263157895, | |
| "grad_norm": 2.3527235984802246, | |
| "learning_rate": 0.00010413588110403398, | |
| "loss": 0.9131, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.48505263157894735, | |
| "grad_norm": 0.36891087889671326, | |
| "learning_rate": 0.00010396602972399152, | |
| "loss": 0.6604, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 0.48589473684210527, | |
| "grad_norm": 0.5065646171569824, | |
| "learning_rate": 0.00010379617834394906, | |
| "loss": 0.4632, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 0.48673684210526313, | |
| "grad_norm": 0.6291417479515076, | |
| "learning_rate": 0.00010362632696390659, | |
| "loss": 0.4137, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 0.48757894736842106, | |
| "grad_norm": 0.6354262828826904, | |
| "learning_rate": 0.00010345647558386413, | |
| "loss": 0.3884, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 0.4884210526315789, | |
| "grad_norm": 1.3266727924346924, | |
| "learning_rate": 0.00010328662420382167, | |
| "loss": 0.8399, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.48926315789473684, | |
| "grad_norm": 0.6220839023590088, | |
| "learning_rate": 0.00010311677282377921, | |
| "loss": 0.3599, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 0.4901052631578947, | |
| "grad_norm": 0.4402288794517517, | |
| "learning_rate": 0.00010294692144373674, | |
| "loss": 0.463, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 0.49094736842105263, | |
| "grad_norm": 0.6351674795150757, | |
| "learning_rate": 0.00010277707006369428, | |
| "loss": 0.47, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 0.4917894736842105, | |
| "grad_norm": 0.7723363041877747, | |
| "learning_rate": 0.00010260721868365182, | |
| "loss": 0.41, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 0.4926315789473684, | |
| "grad_norm": 1.651047706604004, | |
| "learning_rate": 0.00010243736730360936, | |
| "loss": 0.8441, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.49347368421052634, | |
| "grad_norm": 0.36726486682891846, | |
| "learning_rate": 0.00010226751592356689, | |
| "loss": 0.5353, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 0.4943157894736842, | |
| "grad_norm": 0.45022356510162354, | |
| "learning_rate": 0.00010209766454352442, | |
| "loss": 0.5229, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 0.49515789473684213, | |
| "grad_norm": 0.5299806594848633, | |
| "learning_rate": 0.00010192781316348197, | |
| "loss": 0.4623, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 0.496, | |
| "grad_norm": 0.620650053024292, | |
| "learning_rate": 0.0001017579617834395, | |
| "loss": 0.4374, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 0.4968421052631579, | |
| "grad_norm": 1.5846679210662842, | |
| "learning_rate": 0.00010158811040339704, | |
| "loss": 0.864, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.4976842105263158, | |
| "grad_norm": 0.3415738642215729, | |
| "learning_rate": 0.00010141825902335457, | |
| "loss": 0.6045, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 0.4985263157894737, | |
| "grad_norm": 0.4266776740550995, | |
| "learning_rate": 0.00010124840764331212, | |
| "loss": 0.442, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 0.4993684210526316, | |
| "grad_norm": 0.5931389927864075, | |
| "learning_rate": 0.00010107855626326966, | |
| "loss": 0.4766, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 0.5002105263157894, | |
| "grad_norm": 0.8721299767494202, | |
| "learning_rate": 0.00010090870488322719, | |
| "loss": 0.4228, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 0.5010526315789474, | |
| "grad_norm": 1.9695945978164673, | |
| "learning_rate": 0.00010073885350318472, | |
| "loss": 0.8368, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.5018947368421053, | |
| "grad_norm": 0.32946762442588806, | |
| "learning_rate": 0.00010056900212314226, | |
| "loss": 0.594, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 0.5027368421052631, | |
| "grad_norm": 0.40950503945350647, | |
| "learning_rate": 0.0001003991507430998, | |
| "loss": 0.45, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 0.503578947368421, | |
| "grad_norm": 0.5714607238769531, | |
| "learning_rate": 0.00010022929936305734, | |
| "loss": 0.4881, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 0.504421052631579, | |
| "grad_norm": 0.7367619276046753, | |
| "learning_rate": 0.00010005944798301487, | |
| "loss": 0.4109, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 0.5052631578947369, | |
| "grad_norm": 2.145780324935913, | |
| "learning_rate": 9.98895966029724e-05, | |
| "loss": 0.8434, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.5061052631578947, | |
| "grad_norm": 0.36616432666778564, | |
| "learning_rate": 9.971974522292994e-05, | |
| "loss": 0.4736, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 0.5069473684210526, | |
| "grad_norm": 0.46724286675453186, | |
| "learning_rate": 9.954989384288747e-05, | |
| "loss": 0.4062, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 0.5077894736842106, | |
| "grad_norm": 0.5795565843582153, | |
| "learning_rate": 9.938004246284501e-05, | |
| "loss": 0.3891, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 0.5086315789473684, | |
| "grad_norm": 0.6596546173095703, | |
| "learning_rate": 9.921019108280256e-05, | |
| "loss": 0.396, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 0.5094736842105263, | |
| "grad_norm": 1.6219359636306763, | |
| "learning_rate": 9.904033970276009e-05, | |
| "loss": 0.8966, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.5103157894736842, | |
| "grad_norm": 0.40633150935173035, | |
| "learning_rate": 9.887048832271762e-05, | |
| "loss": 0.4844, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 0.5111578947368421, | |
| "grad_norm": 0.42162320017814636, | |
| "learning_rate": 9.870063694267516e-05, | |
| "loss": 0.5059, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 0.512, | |
| "grad_norm": 0.45179325342178345, | |
| "learning_rate": 9.85307855626327e-05, | |
| "loss": 0.3242, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 0.5128421052631579, | |
| "grad_norm": 0.521105170249939, | |
| "learning_rate": 9.836093418259024e-05, | |
| "loss": 0.3204, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 0.5136842105263157, | |
| "grad_norm": 1.7168673276901245, | |
| "learning_rate": 9.819108280254777e-05, | |
| "loss": 0.8103, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.5145263157894737, | |
| "grad_norm": 0.44698312878608704, | |
| "learning_rate": 9.802123142250531e-05, | |
| "loss": 0.5399, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 0.5153684210526316, | |
| "grad_norm": 0.47150564193725586, | |
| "learning_rate": 9.785138004246285e-05, | |
| "loss": 0.4054, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 0.5162105263157895, | |
| "grad_norm": 0.5537845492362976, | |
| "learning_rate": 9.768152866242039e-05, | |
| "loss": 0.4963, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 0.5170526315789473, | |
| "grad_norm": 1.0748082399368286, | |
| "learning_rate": 9.751167728237792e-05, | |
| "loss": 0.5656, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 0.5178947368421053, | |
| "grad_norm": 1.717982530593872, | |
| "learning_rate": 9.734182590233546e-05, | |
| "loss": 0.9553, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.5187368421052632, | |
| "grad_norm": 0.6548579931259155, | |
| "learning_rate": 9.717197452229299e-05, | |
| "loss": 0.606, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 0.519578947368421, | |
| "grad_norm": 0.48931124806404114, | |
| "learning_rate": 9.700212314225054e-05, | |
| "loss": 0.4891, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 0.5204210526315789, | |
| "grad_norm": 0.5882300734519958, | |
| "learning_rate": 9.683227176220807e-05, | |
| "loss": 0.5242, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 0.5212631578947369, | |
| "grad_norm": 0.8208670616149902, | |
| "learning_rate": 9.66624203821656e-05, | |
| "loss": 0.4845, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 0.5221052631578947, | |
| "grad_norm": 1.7853714227676392, | |
| "learning_rate": 9.649256900212314e-05, | |
| "loss": 0.9512, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.5229473684210526, | |
| "grad_norm": 0.353689044713974, | |
| "learning_rate": 9.632271762208069e-05, | |
| "loss": 0.4359, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 0.5237894736842105, | |
| "grad_norm": 0.4187658727169037, | |
| "learning_rate": 9.615286624203822e-05, | |
| "loss": 0.4497, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 0.5246315789473684, | |
| "grad_norm": 0.45738714933395386, | |
| "learning_rate": 9.598301486199575e-05, | |
| "loss": 0.4777, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 0.5254736842105263, | |
| "grad_norm": 0.8259281516075134, | |
| "learning_rate": 9.581316348195329e-05, | |
| "loss": 0.6149, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 0.5263157894736842, | |
| "grad_norm": 1.838474154472351, | |
| "learning_rate": 9.564331210191084e-05, | |
| "loss": 0.8437, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.5271578947368422, | |
| "grad_norm": 0.3545338809490204, | |
| "learning_rate": 9.547346072186837e-05, | |
| "loss": 0.3997, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 0.528, | |
| "grad_norm": 0.36879757046699524, | |
| "learning_rate": 9.53036093418259e-05, | |
| "loss": 0.5417, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 0.5288421052631579, | |
| "grad_norm": 0.6034509539604187, | |
| "learning_rate": 9.513375796178344e-05, | |
| "loss": 0.4581, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 0.5296842105263158, | |
| "grad_norm": 0.8801015615463257, | |
| "learning_rate": 9.496390658174099e-05, | |
| "loss": 0.4301, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 0.5305263157894737, | |
| "grad_norm": 2.0344324111938477, | |
| "learning_rate": 9.479405520169852e-05, | |
| "loss": 0.8094, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.5313684210526316, | |
| "grad_norm": 0.3488701581954956, | |
| "learning_rate": 9.462420382165605e-05, | |
| "loss": 0.6011, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 0.5322105263157895, | |
| "grad_norm": 0.44746795296669006, | |
| "learning_rate": 9.445435244161359e-05, | |
| "loss": 0.3912, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 0.5330526315789473, | |
| "grad_norm": 0.31405824422836304, | |
| "learning_rate": 9.428450106157113e-05, | |
| "loss": 0.3609, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 0.5338947368421053, | |
| "grad_norm": 0.9379681348800659, | |
| "learning_rate": 9.411464968152867e-05, | |
| "loss": 0.3793, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 0.5347368421052632, | |
| "grad_norm": 2.2353193759918213, | |
| "learning_rate": 9.39447983014862e-05, | |
| "loss": 1.0219, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.535578947368421, | |
| "grad_norm": 0.42344018816947937, | |
| "learning_rate": 9.377494692144374e-05, | |
| "loss": 0.5933, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 0.5364210526315789, | |
| "grad_norm": 0.4148212671279907, | |
| "learning_rate": 9.360509554140128e-05, | |
| "loss": 0.4368, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 0.5372631578947369, | |
| "grad_norm": 0.4840141832828522, | |
| "learning_rate": 9.343524416135882e-05, | |
| "loss": 0.3895, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 0.5381052631578948, | |
| "grad_norm": 0.6662552356719971, | |
| "learning_rate": 9.326539278131635e-05, | |
| "loss": 0.4226, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 0.5389473684210526, | |
| "grad_norm": 1.8149895668029785, | |
| "learning_rate": 9.309554140127389e-05, | |
| "loss": 0.7928, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.5397894736842105, | |
| "grad_norm": 0.34705260396003723, | |
| "learning_rate": 9.292569002123143e-05, | |
| "loss": 0.4934, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 0.5406315789473685, | |
| "grad_norm": 0.4355999529361725, | |
| "learning_rate": 9.275583864118897e-05, | |
| "loss": 0.4827, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 0.5414736842105263, | |
| "grad_norm": 0.5139076113700867, | |
| "learning_rate": 9.25859872611465e-05, | |
| "loss": 0.4206, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 0.5423157894736842, | |
| "grad_norm": 0.641939103603363, | |
| "learning_rate": 9.241613588110403e-05, | |
| "loss": 0.3451, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 0.5431578947368421, | |
| "grad_norm": 1.6043181419372559, | |
| "learning_rate": 9.224628450106157e-05, | |
| "loss": 0.6773, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.544, | |
| "grad_norm": 0.41280820965766907, | |
| "learning_rate": 9.207643312101912e-05, | |
| "loss": 0.7343, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 0.5448421052631579, | |
| "grad_norm": 0.49688437581062317, | |
| "learning_rate": 9.190658174097665e-05, | |
| "loss": 0.4707, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 0.5456842105263158, | |
| "grad_norm": 0.437663733959198, | |
| "learning_rate": 9.173673036093418e-05, | |
| "loss": 0.361, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 0.5465263157894736, | |
| "grad_norm": 0.5839958786964417, | |
| "learning_rate": 9.156687898089172e-05, | |
| "loss": 0.3983, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 0.5473684210526316, | |
| "grad_norm": 1.998749017715454, | |
| "learning_rate": 9.139702760084927e-05, | |
| "loss": 0.8788, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.5482105263157895, | |
| "grad_norm": 0.3655886650085449, | |
| "learning_rate": 9.12271762208068e-05, | |
| "loss": 0.5732, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 0.5490526315789473, | |
| "grad_norm": 0.3797145187854767, | |
| "learning_rate": 9.105732484076433e-05, | |
| "loss": 0.3746, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 0.5498947368421052, | |
| "grad_norm": 0.4555297791957855, | |
| "learning_rate": 9.088747346072187e-05, | |
| "loss": 0.4607, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 0.5507368421052632, | |
| "grad_norm": 0.5361640453338623, | |
| "learning_rate": 9.071762208067941e-05, | |
| "loss": 0.4279, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 0.5515789473684211, | |
| "grad_norm": 1.3506211042404175, | |
| "learning_rate": 9.054777070063695e-05, | |
| "loss": 0.7126, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 0.5524210526315789, | |
| "grad_norm": 0.3545656204223633, | |
| "learning_rate": 9.037791932059448e-05, | |
| "loss": 0.5059, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 0.5532631578947368, | |
| "grad_norm": 0.5703416466712952, | |
| "learning_rate": 9.020806794055202e-05, | |
| "loss": 0.4461, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 0.5541052631578948, | |
| "grad_norm": 0.50933837890625, | |
| "learning_rate": 9.003821656050956e-05, | |
| "loss": 0.4362, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 0.5549473684210526, | |
| "grad_norm": 0.8454868793487549, | |
| "learning_rate": 8.98683651804671e-05, | |
| "loss": 0.5222, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 0.5557894736842105, | |
| "grad_norm": 1.7412545680999756, | |
| "learning_rate": 8.969851380042463e-05, | |
| "loss": 0.8838, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.5566315789473684, | |
| "grad_norm": 0.3488069176673889, | |
| "learning_rate": 8.952866242038217e-05, | |
| "loss": 0.5161, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 0.5574736842105263, | |
| "grad_norm": 0.5215222239494324, | |
| "learning_rate": 8.935881104033971e-05, | |
| "loss": 0.5151, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 0.5583157894736842, | |
| "grad_norm": 0.4489799439907074, | |
| "learning_rate": 8.918895966029725e-05, | |
| "loss": 0.4329, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 0.5591578947368421, | |
| "grad_norm": 0.7236605882644653, | |
| "learning_rate": 8.901910828025478e-05, | |
| "loss": 0.4311, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.7031099796295166, | |
| "learning_rate": 8.884925690021231e-05, | |
| "loss": 0.8816, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 0.5608421052631579, | |
| "grad_norm": 0.37654176354408264, | |
| "learning_rate": 8.867940552016986e-05, | |
| "loss": 0.5018, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 0.5616842105263158, | |
| "grad_norm": 0.4404195249080658, | |
| "learning_rate": 8.85095541401274e-05, | |
| "loss": 0.4173, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 0.5625263157894737, | |
| "grad_norm": 0.4034554660320282, | |
| "learning_rate": 8.833970276008493e-05, | |
| "loss": 0.5456, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 0.5633684210526316, | |
| "grad_norm": 0.8815232515335083, | |
| "learning_rate": 8.816985138004246e-05, | |
| "loss": 0.5054, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 0.5642105263157895, | |
| "grad_norm": 1.9308191537857056, | |
| "learning_rate": 8.800000000000001e-05, | |
| "loss": 0.8496, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.5650526315789474, | |
| "grad_norm": 0.45235711336135864, | |
| "learning_rate": 8.783014861995755e-05, | |
| "loss": 0.6675, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 0.5658947368421052, | |
| "grad_norm": 0.48554515838623047, | |
| "learning_rate": 8.766029723991508e-05, | |
| "loss": 0.551, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 0.5667368421052632, | |
| "grad_norm": 0.5193004608154297, | |
| "learning_rate": 8.749044585987261e-05, | |
| "loss": 0.3911, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 0.5675789473684211, | |
| "grad_norm": 0.6188972592353821, | |
| "learning_rate": 8.732059447983015e-05, | |
| "loss": 0.3075, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 0.5684210526315789, | |
| "grad_norm": 2.9124598503112793, | |
| "learning_rate": 8.71507430997877e-05, | |
| "loss": 0.7141, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.5692631578947368, | |
| "grad_norm": 0.3527674674987793, | |
| "learning_rate": 8.698089171974523e-05, | |
| "loss": 0.4783, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 0.5701052631578948, | |
| "grad_norm": 0.33679574728012085, | |
| "learning_rate": 8.681104033970276e-05, | |
| "loss": 0.4256, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 0.5709473684210526, | |
| "grad_norm": 0.4084140360355377, | |
| "learning_rate": 8.66411889596603e-05, | |
| "loss": 0.4866, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 0.5717894736842105, | |
| "grad_norm": 0.7694320678710938, | |
| "learning_rate": 8.647133757961784e-05, | |
| "loss": 0.412, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 0.5726315789473684, | |
| "grad_norm": 1.674834966659546, | |
| "learning_rate": 8.630148619957538e-05, | |
| "loss": 0.7666, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.5734736842105264, | |
| "grad_norm": 0.33270978927612305, | |
| "learning_rate": 8.613163481953291e-05, | |
| "loss": 0.4935, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 0.5743157894736842, | |
| "grad_norm": 0.3835461139678955, | |
| "learning_rate": 8.596178343949045e-05, | |
| "loss": 0.4028, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 0.5751578947368421, | |
| "grad_norm": 0.44173529744148254, | |
| "learning_rate": 8.579193205944799e-05, | |
| "loss": 0.3889, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 0.576, | |
| "grad_norm": 0.622157871723175, | |
| "learning_rate": 8.562208067940553e-05, | |
| "loss": 0.5212, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 0.5768421052631579, | |
| "grad_norm": 2.094125747680664, | |
| "learning_rate": 8.545222929936306e-05, | |
| "loss": 0.923, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 0.5776842105263158, | |
| "grad_norm": 0.4058837890625, | |
| "learning_rate": 8.52823779193206e-05, | |
| "loss": 0.4192, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 0.5785263157894737, | |
| "grad_norm": 0.4020976722240448, | |
| "learning_rate": 8.511252653927814e-05, | |
| "loss": 0.4444, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 0.5793684210526315, | |
| "grad_norm": 0.41881394386291504, | |
| "learning_rate": 8.494267515923568e-05, | |
| "loss": 0.3557, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 0.5802105263157895, | |
| "grad_norm": 0.7409086227416992, | |
| "learning_rate": 8.477282377919321e-05, | |
| "loss": 0.4267, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 0.5810526315789474, | |
| "grad_norm": 1.5581746101379395, | |
| "learning_rate": 8.460297239915074e-05, | |
| "loss": 0.7418, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.5818947368421052, | |
| "grad_norm": 0.34820643067359924, | |
| "learning_rate": 8.443312101910829e-05, | |
| "loss": 0.5094, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 0.5827368421052631, | |
| "grad_norm": 0.36036792397499084, | |
| "learning_rate": 8.426326963906583e-05, | |
| "loss": 0.441, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 0.5835789473684211, | |
| "grad_norm": 0.5172143578529358, | |
| "learning_rate": 8.409341825902336e-05, | |
| "loss": 0.4403, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 0.584421052631579, | |
| "grad_norm": 0.646388828754425, | |
| "learning_rate": 8.392356687898089e-05, | |
| "loss": 0.4083, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 0.5852631578947368, | |
| "grad_norm": 2.0343103408813477, | |
| "learning_rate": 8.375371549893844e-05, | |
| "loss": 0.9618, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 0.5861052631578947, | |
| "grad_norm": 0.31648343801498413, | |
| "learning_rate": 8.358386411889597e-05, | |
| "loss": 0.672, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 0.5869473684210527, | |
| "grad_norm": 0.4494827389717102, | |
| "learning_rate": 8.341401273885351e-05, | |
| "loss": 0.4666, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 0.5877894736842105, | |
| "grad_norm": 0.49414846301078796, | |
| "learning_rate": 8.324416135881104e-05, | |
| "loss": 0.4915, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 0.5886315789473684, | |
| "grad_norm": 0.771273136138916, | |
| "learning_rate": 8.307430997876859e-05, | |
| "loss": 0.5075, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 0.5894736842105263, | |
| "grad_norm": 3.525618076324463, | |
| "learning_rate": 8.290445859872612e-05, | |
| "loss": 0.814, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.5903157894736842, | |
| "grad_norm": 0.32761165499687195, | |
| "learning_rate": 8.273460721868366e-05, | |
| "loss": 0.5086, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 0.5911578947368421, | |
| "grad_norm": 0.360644668340683, | |
| "learning_rate": 8.256475583864119e-05, | |
| "loss": 0.5022, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 0.592, | |
| "grad_norm": 0.4749659299850464, | |
| "learning_rate": 8.239490445859873e-05, | |
| "loss": 0.3701, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 0.592842105263158, | |
| "grad_norm": 0.7997972965240479, | |
| "learning_rate": 8.222505307855627e-05, | |
| "loss": 0.4521, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 0.5936842105263158, | |
| "grad_norm": 2.1665775775909424, | |
| "learning_rate": 8.205520169851381e-05, | |
| "loss": 0.9126, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 0.5945263157894737, | |
| "grad_norm": 0.3611660599708557, | |
| "learning_rate": 8.188535031847134e-05, | |
| "loss": 0.5051, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 0.5953684210526315, | |
| "grad_norm": 0.45509031414985657, | |
| "learning_rate": 8.171549893842887e-05, | |
| "loss": 0.4151, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 0.5962105263157895, | |
| "grad_norm": 0.39179739356040955, | |
| "learning_rate": 8.154564755838642e-05, | |
| "loss": 0.3851, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 0.5970526315789474, | |
| "grad_norm": 1.0507985353469849, | |
| "learning_rate": 8.137579617834396e-05, | |
| "loss": 0.6043, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 0.5978947368421053, | |
| "grad_norm": 1.78620183467865, | |
| "learning_rate": 8.120594479830149e-05, | |
| "loss": 1.0623, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.5987368421052631, | |
| "grad_norm": 0.4446164071559906, | |
| "learning_rate": 8.103609341825902e-05, | |
| "loss": 0.5451, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 0.5995789473684211, | |
| "grad_norm": 0.44868966937065125, | |
| "learning_rate": 8.086624203821657e-05, | |
| "loss": 0.5643, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 0.600421052631579, | |
| "grad_norm": 0.4379372000694275, | |
| "learning_rate": 8.06963906581741e-05, | |
| "loss": 0.382, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 0.6012631578947368, | |
| "grad_norm": 0.8946030735969543, | |
| "learning_rate": 8.052653927813164e-05, | |
| "loss": 0.482, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 0.6021052631578947, | |
| "grad_norm": 1.4377981424331665, | |
| "learning_rate": 8.035668789808917e-05, | |
| "loss": 0.9919, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 0.6029473684210527, | |
| "grad_norm": 0.3892590403556824, | |
| "learning_rate": 8.018683651804672e-05, | |
| "loss": 0.5398, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 0.6037894736842105, | |
| "grad_norm": 0.4308941960334778, | |
| "learning_rate": 8.001698513800425e-05, | |
| "loss": 0.4227, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 0.6046315789473684, | |
| "grad_norm": 0.5427702069282532, | |
| "learning_rate": 7.984713375796179e-05, | |
| "loss": 0.4529, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 0.6054736842105263, | |
| "grad_norm": 0.7260848879814148, | |
| "learning_rate": 7.967728237791932e-05, | |
| "loss": 0.3762, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 0.6063157894736843, | |
| "grad_norm": 2.3360161781311035, | |
| "learning_rate": 7.950743099787687e-05, | |
| "loss": 0.8866, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.6071578947368421, | |
| "grad_norm": 0.40340855717658997, | |
| "learning_rate": 7.93375796178344e-05, | |
| "loss": 0.5545, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 0.608, | |
| "grad_norm": 0.3597467541694641, | |
| "learning_rate": 7.916772823779194e-05, | |
| "loss": 0.4485, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 0.6088421052631579, | |
| "grad_norm": 0.39148929715156555, | |
| "learning_rate": 7.899787685774947e-05, | |
| "loss": 0.4329, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 0.6096842105263158, | |
| "grad_norm": 0.7953996658325195, | |
| "learning_rate": 7.882802547770702e-05, | |
| "loss": 0.3859, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 0.6105263157894737, | |
| "grad_norm": 1.3665465116500854, | |
| "learning_rate": 7.865817409766455e-05, | |
| "loss": 0.6957, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.6113684210526316, | |
| "grad_norm": 0.3502986431121826, | |
| "learning_rate": 7.848832271762209e-05, | |
| "loss": 0.5458, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 0.6122105263157894, | |
| "grad_norm": 0.5014148354530334, | |
| "learning_rate": 7.831847133757962e-05, | |
| "loss": 0.5342, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 0.6130526315789474, | |
| "grad_norm": 0.4739173650741577, | |
| "learning_rate": 7.814861995753715e-05, | |
| "loss": 0.4255, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 0.6138947368421053, | |
| "grad_norm": 0.8123893737792969, | |
| "learning_rate": 7.79787685774947e-05, | |
| "loss": 0.3516, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 0.6147368421052631, | |
| "grad_norm": 1.4595434665679932, | |
| "learning_rate": 7.780891719745224e-05, | |
| "loss": 0.7016, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.615578947368421, | |
| "grad_norm": 0.5389490723609924, | |
| "learning_rate": 7.763906581740977e-05, | |
| "loss": 0.5036, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 0.616421052631579, | |
| "grad_norm": 0.5022503733634949, | |
| "learning_rate": 7.74692144373673e-05, | |
| "loss": 0.4802, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 0.6172631578947368, | |
| "grad_norm": 0.5243063569068909, | |
| "learning_rate": 7.729936305732485e-05, | |
| "loss": 0.4034, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 0.6181052631578947, | |
| "grad_norm": 0.79988694190979, | |
| "learning_rate": 7.712951167728239e-05, | |
| "loss": 0.3253, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 0.6189473684210526, | |
| "grad_norm": 2.4979023933410645, | |
| "learning_rate": 7.695966029723992e-05, | |
| "loss": 0.8422, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 0.6197894736842106, | |
| "grad_norm": 0.48066073656082153, | |
| "learning_rate": 7.678980891719745e-05, | |
| "loss": 0.5312, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 0.6206315789473684, | |
| "grad_norm": 0.41666412353515625, | |
| "learning_rate": 7.6619957537155e-05, | |
| "loss": 0.4475, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 0.6214736842105263, | |
| "grad_norm": 0.7244799137115479, | |
| "learning_rate": 7.645010615711253e-05, | |
| "loss": 0.3882, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 0.6223157894736842, | |
| "grad_norm": 0.8004568219184875, | |
| "learning_rate": 7.628025477707007e-05, | |
| "loss": 0.5399, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 0.6231578947368421, | |
| "grad_norm": 2.1711270809173584, | |
| "learning_rate": 7.61104033970276e-05, | |
| "loss": 1.0735, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.624, | |
| "grad_norm": 0.3782844543457031, | |
| "learning_rate": 7.594055201698515e-05, | |
| "loss": 0.5268, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 0.6248421052631579, | |
| "grad_norm": 0.4545004963874817, | |
| "learning_rate": 7.577070063694268e-05, | |
| "loss": 0.4519, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 0.6256842105263158, | |
| "grad_norm": 0.5942566394805908, | |
| "learning_rate": 7.560084925690022e-05, | |
| "loss": 0.4009, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 0.6265263157894737, | |
| "grad_norm": 0.867929995059967, | |
| "learning_rate": 7.543099787685775e-05, | |
| "loss": 0.4948, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 0.6273684210526316, | |
| "grad_norm": 2.595675230026245, | |
| "learning_rate": 7.52611464968153e-05, | |
| "loss": 0.8613, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 0.6282105263157894, | |
| "grad_norm": 0.4917793571949005, | |
| "learning_rate": 7.509129511677283e-05, | |
| "loss": 0.5887, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 0.6290526315789474, | |
| "grad_norm": 0.328522264957428, | |
| "learning_rate": 7.492144373673037e-05, | |
| "loss": 0.4422, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 0.6298947368421053, | |
| "grad_norm": 0.45172616839408875, | |
| "learning_rate": 7.47515923566879e-05, | |
| "loss": 0.355, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 0.6307368421052632, | |
| "grad_norm": 0.8733186721801758, | |
| "learning_rate": 7.458174097664545e-05, | |
| "loss": 0.416, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 0.631578947368421, | |
| "grad_norm": 1.5660936832427979, | |
| "learning_rate": 7.441188959660298e-05, | |
| "loss": 0.9255, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.632421052631579, | |
| "grad_norm": 0.339676171541214, | |
| "learning_rate": 7.424203821656052e-05, | |
| "loss": 0.5741, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 0.6332631578947369, | |
| "grad_norm": 0.5170826315879822, | |
| "learning_rate": 7.407218683651805e-05, | |
| "loss": 0.4798, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 0.6341052631578947, | |
| "grad_norm": 0.518412172794342, | |
| "learning_rate": 7.39023354564756e-05, | |
| "loss": 0.3851, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 0.6349473684210526, | |
| "grad_norm": 0.8092654943466187, | |
| "learning_rate": 7.373248407643313e-05, | |
| "loss": 0.3828, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 0.6357894736842106, | |
| "grad_norm": 1.6960948705673218, | |
| "learning_rate": 7.356263269639067e-05, | |
| "loss": 0.8314, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 0.6366315789473684, | |
| "grad_norm": 0.46050435304641724, | |
| "learning_rate": 7.33927813163482e-05, | |
| "loss": 0.5946, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 0.6374736842105263, | |
| "grad_norm": 0.4946458637714386, | |
| "learning_rate": 7.322292993630573e-05, | |
| "loss": 0.4398, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 0.6383157894736842, | |
| "grad_norm": 0.4214226007461548, | |
| "learning_rate": 7.305307855626328e-05, | |
| "loss": 0.3334, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 0.6391578947368421, | |
| "grad_norm": 1.2122212648391724, | |
| "learning_rate": 7.288322717622081e-05, | |
| "loss": 0.3931, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.7408720254898071, | |
| "learning_rate": 7.271337579617835e-05, | |
| "loss": 0.6906, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.6408421052631579, | |
| "grad_norm": 0.4397237300872803, | |
| "learning_rate": 7.254352441613588e-05, | |
| "loss": 0.5082, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 0.6416842105263157, | |
| "grad_norm": 0.39046570658683777, | |
| "learning_rate": 7.237367303609343e-05, | |
| "loss": 0.4112, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 0.6425263157894737, | |
| "grad_norm": 0.4175955355167389, | |
| "learning_rate": 7.220382165605096e-05, | |
| "loss": 0.4524, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 0.6433684210526316, | |
| "grad_norm": 0.8807598948478699, | |
| "learning_rate": 7.20339702760085e-05, | |
| "loss": 0.4278, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 0.6442105263157895, | |
| "grad_norm": 1.9032187461853027, | |
| "learning_rate": 7.186411889596603e-05, | |
| "loss": 0.7373, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 0.6450526315789473, | |
| "grad_norm": 0.4502487778663635, | |
| "learning_rate": 7.169426751592358e-05, | |
| "loss": 0.3928, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 0.6458947368421053, | |
| "grad_norm": 0.6761430501937866, | |
| "learning_rate": 7.152441613588111e-05, | |
| "loss": 0.3859, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 0.6467368421052632, | |
| "grad_norm": 0.4130532145500183, | |
| "learning_rate": 7.135456475583865e-05, | |
| "loss": 0.3485, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.647578947368421, | |
| "grad_norm": 1.2999097108840942, | |
| "learning_rate": 7.118471337579618e-05, | |
| "loss": 0.4113, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 0.6484210526315789, | |
| "grad_norm": 1.205367088317871, | |
| "learning_rate": 7.101486199575373e-05, | |
| "loss": 0.627, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.6492631578947369, | |
| "grad_norm": 0.43658527731895447, | |
| "learning_rate": 7.084501061571126e-05, | |
| "loss": 0.5305, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 0.6501052631578947, | |
| "grad_norm": 0.627872884273529, | |
| "learning_rate": 7.06751592356688e-05, | |
| "loss": 0.563, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 0.6509473684210526, | |
| "grad_norm": 0.5612541437149048, | |
| "learning_rate": 7.050530785562633e-05, | |
| "loss": 0.4298, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 0.6517894736842105, | |
| "grad_norm": 0.8334627747535706, | |
| "learning_rate": 7.033545647558388e-05, | |
| "loss": 0.3597, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 0.6526315789473685, | |
| "grad_norm": 2.3180391788482666, | |
| "learning_rate": 7.016560509554141e-05, | |
| "loss": 0.8694, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.6534736842105263, | |
| "grad_norm": 0.35839709639549255, | |
| "learning_rate": 6.999575371549895e-05, | |
| "loss": 0.3914, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 0.6543157894736842, | |
| "grad_norm": 0.5006768107414246, | |
| "learning_rate": 6.982590233545648e-05, | |
| "loss": 0.4471, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 0.655157894736842, | |
| "grad_norm": 0.40057373046875, | |
| "learning_rate": 6.965605095541403e-05, | |
| "loss": 0.3642, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 0.656, | |
| "grad_norm": 0.718809187412262, | |
| "learning_rate": 6.948619957537156e-05, | |
| "loss": 0.4577, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 0.6568421052631579, | |
| "grad_norm": 1.9087162017822266, | |
| "learning_rate": 6.93163481953291e-05, | |
| "loss": 0.9204, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.6576842105263158, | |
| "grad_norm": 0.3778667151927948, | |
| "learning_rate": 6.914649681528663e-05, | |
| "loss": 0.5122, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 0.6585263157894737, | |
| "grad_norm": 0.4479425549507141, | |
| "learning_rate": 6.897664543524418e-05, | |
| "loss": 0.4437, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 0.6593684210526316, | |
| "grad_norm": 0.6616013646125793, | |
| "learning_rate": 6.880679405520171e-05, | |
| "loss": 0.4667, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 0.6602105263157895, | |
| "grad_norm": 0.6953230500221252, | |
| "learning_rate": 6.863694267515924e-05, | |
| "loss": 0.3329, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 0.6610526315789473, | |
| "grad_norm": 1.7211588621139526, | |
| "learning_rate": 6.846709129511678e-05, | |
| "loss": 0.7693, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 0.6618947368421053, | |
| "grad_norm": 0.3227670192718506, | |
| "learning_rate": 6.829723991507431e-05, | |
| "loss": 0.5612, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 0.6627368421052632, | |
| "grad_norm": 0.35804659128189087, | |
| "learning_rate": 6.812738853503186e-05, | |
| "loss": 0.4718, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 0.663578947368421, | |
| "grad_norm": 0.5097082853317261, | |
| "learning_rate": 6.795753715498939e-05, | |
| "loss": 0.3597, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 0.6644210526315789, | |
| "grad_norm": 0.8905921578407288, | |
| "learning_rate": 6.778768577494693e-05, | |
| "loss": 0.4448, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 0.6652631578947369, | |
| "grad_norm": 1.93904447555542, | |
| "learning_rate": 6.761783439490446e-05, | |
| "loss": 0.9481, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.6661052631578948, | |
| "grad_norm": 0.4530518054962158, | |
| "learning_rate": 6.744798301486201e-05, | |
| "loss": 0.5991, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 0.6669473684210526, | |
| "grad_norm": 0.4234374165534973, | |
| "learning_rate": 6.727813163481954e-05, | |
| "loss": 0.3928, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 0.6677894736842105, | |
| "grad_norm": 0.4781579375267029, | |
| "learning_rate": 6.710828025477708e-05, | |
| "loss": 0.4273, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 0.6686315789473685, | |
| "grad_norm": 1.1969267129898071, | |
| "learning_rate": 6.693842887473461e-05, | |
| "loss": 0.4818, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 0.6694736842105263, | |
| "grad_norm": 2.3066813945770264, | |
| "learning_rate": 6.676857749469216e-05, | |
| "loss": 0.8872, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 0.6703157894736842, | |
| "grad_norm": 0.4617319107055664, | |
| "learning_rate": 6.659872611464969e-05, | |
| "loss": 0.5357, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 0.6711578947368421, | |
| "grad_norm": 0.5731051564216614, | |
| "learning_rate": 6.642887473460723e-05, | |
| "loss": 0.4261, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 0.672, | |
| "grad_norm": 0.5270722508430481, | |
| "learning_rate": 6.625902335456476e-05, | |
| "loss": 0.4013, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 0.6728421052631579, | |
| "grad_norm": 0.7825943827629089, | |
| "learning_rate": 6.608917197452231e-05, | |
| "loss": 0.382, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 0.6736842105263158, | |
| "grad_norm": 1.549627423286438, | |
| "learning_rate": 6.591932059447984e-05, | |
| "loss": 0.7573, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6745263157894736, | |
| "grad_norm": 0.37630459666252136, | |
| "learning_rate": 6.574946921443737e-05, | |
| "loss": 0.4964, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 0.6753684210526316, | |
| "grad_norm": 0.49831104278564453, | |
| "learning_rate": 6.557961783439491e-05, | |
| "loss": 0.5215, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 0.6762105263157895, | |
| "grad_norm": 0.3724237382411957, | |
| "learning_rate": 6.540976645435246e-05, | |
| "loss": 0.4817, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 0.6770526315789474, | |
| "grad_norm": 0.7709687352180481, | |
| "learning_rate": 6.523991507430999e-05, | |
| "loss": 0.4132, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 0.6778947368421052, | |
| "grad_norm": 1.9156075716018677, | |
| "learning_rate": 6.507006369426752e-05, | |
| "loss": 0.9424, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 0.6787368421052632, | |
| "grad_norm": 0.4514032304286957, | |
| "learning_rate": 6.490021231422504e-05, | |
| "loss": 0.5559, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 0.6795789473684211, | |
| "grad_norm": 0.44393032789230347, | |
| "learning_rate": 6.473036093418259e-05, | |
| "loss": 0.4563, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 0.6804210526315789, | |
| "grad_norm": 0.5609434843063354, | |
| "learning_rate": 6.456050955414013e-05, | |
| "loss": 0.413, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 0.6812631578947368, | |
| "grad_norm": 0.7650034427642822, | |
| "learning_rate": 6.439065817409766e-05, | |
| "loss": 0.5315, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 0.6821052631578948, | |
| "grad_norm": 1.606882929801941, | |
| "learning_rate": 6.42208067940552e-05, | |
| "loss": 0.7553, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.6829473684210526, | |
| "grad_norm": 0.3158307373523712, | |
| "learning_rate": 6.405095541401274e-05, | |
| "loss": 0.735, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 0.6837894736842105, | |
| "grad_norm": 0.45169734954833984, | |
| "learning_rate": 6.388110403397027e-05, | |
| "loss": 0.4593, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 0.6846315789473684, | |
| "grad_norm": 0.46328073740005493, | |
| "learning_rate": 6.371125265392781e-05, | |
| "loss": 0.4411, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 0.6854736842105263, | |
| "grad_norm": 1.4630694389343262, | |
| "learning_rate": 6.354140127388534e-05, | |
| "loss": 0.3707, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 0.6863157894736842, | |
| "grad_norm": 1.8492755889892578, | |
| "learning_rate": 6.337154989384289e-05, | |
| "loss": 0.9106, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 0.6871578947368421, | |
| "grad_norm": 0.36578068137168884, | |
| "learning_rate": 6.320169851380042e-05, | |
| "loss": 0.4788, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 0.688, | |
| "grad_norm": 0.5161772966384888, | |
| "learning_rate": 6.303184713375796e-05, | |
| "loss": 0.5123, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 0.6888421052631579, | |
| "grad_norm": 0.9331473708152771, | |
| "learning_rate": 6.286199575371549e-05, | |
| "loss": 0.4949, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 0.6896842105263158, | |
| "grad_norm": 0.7946401834487915, | |
| "learning_rate": 6.269214437367304e-05, | |
| "loss": 0.4313, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 0.6905263157894737, | |
| "grad_norm": 1.8209006786346436, | |
| "learning_rate": 6.252229299363057e-05, | |
| "loss": 0.947, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.6913684210526316, | |
| "grad_norm": 0.34166574478149414, | |
| "learning_rate": 6.235244161358811e-05, | |
| "loss": 0.63, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 0.6922105263157895, | |
| "grad_norm": 0.3546527326107025, | |
| "learning_rate": 6.218259023354564e-05, | |
| "loss": 0.4265, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 0.6930526315789474, | |
| "grad_norm": 0.4769902527332306, | |
| "learning_rate": 6.201273885350319e-05, | |
| "loss": 0.4186, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 0.6938947368421052, | |
| "grad_norm": 0.6809453964233398, | |
| "learning_rate": 6.184288747346072e-05, | |
| "loss": 0.379, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 0.6947368421052632, | |
| "grad_norm": 2.208510637283325, | |
| "learning_rate": 6.167303609341826e-05, | |
| "loss": 0.7516, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.6955789473684211, | |
| "grad_norm": 0.35100066661834717, | |
| "learning_rate": 6.150318471337579e-05, | |
| "loss": 0.6133, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 0.6964210526315789, | |
| "grad_norm": 0.6335263252258301, | |
| "learning_rate": 6.133333333333334e-05, | |
| "loss": 0.4057, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 0.6972631578947368, | |
| "grad_norm": 0.44616439938545227, | |
| "learning_rate": 6.116348195329087e-05, | |
| "loss": 0.4113, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 0.6981052631578948, | |
| "grad_norm": 0.861053466796875, | |
| "learning_rate": 6.0993630573248406e-05, | |
| "loss": 0.5107, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 0.6989473684210527, | |
| "grad_norm": 1.3923041820526123, | |
| "learning_rate": 6.0823779193205946e-05, | |
| "loss": 0.9659, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.6997894736842105, | |
| "grad_norm": 0.35979071259498596, | |
| "learning_rate": 6.065392781316348e-05, | |
| "loss": 0.416, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 0.7006315789473684, | |
| "grad_norm": 0.4446561932563782, | |
| "learning_rate": 6.0484076433121014e-05, | |
| "loss": 0.4416, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 0.7014736842105264, | |
| "grad_norm": 0.5659903287887573, | |
| "learning_rate": 6.0314225053078555e-05, | |
| "loss": 0.4381, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 0.7023157894736842, | |
| "grad_norm": 0.6388218998908997, | |
| "learning_rate": 6.014437367303609e-05, | |
| "loss": 0.4136, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 0.7031578947368421, | |
| "grad_norm": 1.5065667629241943, | |
| "learning_rate": 5.997452229299363e-05, | |
| "loss": 0.9649, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 0.704, | |
| "grad_norm": 0.41632726788520813, | |
| "learning_rate": 5.9804670912951163e-05, | |
| "loss": 0.5943, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 0.7048421052631579, | |
| "grad_norm": 0.3307408094406128, | |
| "learning_rate": 5.9634819532908704e-05, | |
| "loss": 0.375, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 0.7056842105263158, | |
| "grad_norm": 0.5643453001976013, | |
| "learning_rate": 5.946496815286624e-05, | |
| "loss": 0.4256, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 0.7065263157894737, | |
| "grad_norm": 1.0194138288497925, | |
| "learning_rate": 5.929511677282378e-05, | |
| "loss": 0.3947, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 0.7073684210526315, | |
| "grad_norm": 1.5986536741256714, | |
| "learning_rate": 5.912526539278131e-05, | |
| "loss": 0.8802, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.7082105263157895, | |
| "grad_norm": 0.3610769808292389, | |
| "learning_rate": 5.8955414012738853e-05, | |
| "loss": 0.527, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 0.7090526315789474, | |
| "grad_norm": 0.4082561135292053, | |
| "learning_rate": 5.878556263269639e-05, | |
| "loss": 0.473, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 0.7098947368421052, | |
| "grad_norm": 0.5180047750473022, | |
| "learning_rate": 5.861571125265393e-05, | |
| "loss": 0.4129, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 0.7107368421052631, | |
| "grad_norm": 0.6872524619102478, | |
| "learning_rate": 5.844585987261146e-05, | |
| "loss": 0.5247, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 0.7115789473684211, | |
| "grad_norm": 3.061389923095703, | |
| "learning_rate": 5.8276008492569e-05, | |
| "loss": 1.0157, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 0.712421052631579, | |
| "grad_norm": 0.3320797383785248, | |
| "learning_rate": 5.8106157112526537e-05, | |
| "loss": 0.6245, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 0.7132631578947368, | |
| "grad_norm": 0.43711647391319275, | |
| "learning_rate": 5.793630573248408e-05, | |
| "loss": 0.5184, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 0.7141052631578947, | |
| "grad_norm": 0.5436128973960876, | |
| "learning_rate": 5.776645435244161e-05, | |
| "loss": 0.4446, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 0.7149473684210527, | |
| "grad_norm": 0.4923207759857178, | |
| "learning_rate": 5.759660297239915e-05, | |
| "loss": 0.415, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 0.7157894736842105, | |
| "grad_norm": 1.497881293296814, | |
| "learning_rate": 5.7426751592356686e-05, | |
| "loss": 0.8214, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.7166315789473684, | |
| "grad_norm": 0.399183064699173, | |
| "learning_rate": 5.7256900212314226e-05, | |
| "loss": 0.6317, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 0.7174736842105263, | |
| "grad_norm": 0.3991619646549225, | |
| "learning_rate": 5.708704883227176e-05, | |
| "loss": 0.4428, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 0.7183157894736842, | |
| "grad_norm": 0.4187392294406891, | |
| "learning_rate": 5.69171974522293e-05, | |
| "loss": 0.4256, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 0.7191578947368421, | |
| "grad_norm": 0.6216957569122314, | |
| "learning_rate": 5.6747346072186835e-05, | |
| "loss": 0.3957, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.7188841104507446, | |
| "learning_rate": 5.6577494692144376e-05, | |
| "loss": 0.7779, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 0.7208421052631578, | |
| "grad_norm": 0.4558383524417877, | |
| "learning_rate": 5.640764331210191e-05, | |
| "loss": 0.4152, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 0.7216842105263158, | |
| "grad_norm": 0.3992440700531006, | |
| "learning_rate": 5.623779193205945e-05, | |
| "loss": 0.5559, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 0.7225263157894737, | |
| "grad_norm": 0.6103470921516418, | |
| "learning_rate": 5.6067940552016984e-05, | |
| "loss": 0.4331, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 0.7233684210526315, | |
| "grad_norm": 0.6912561655044556, | |
| "learning_rate": 5.589808917197452e-05, | |
| "loss": 0.4378, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 0.7242105263157895, | |
| "grad_norm": 1.8499550819396973, | |
| "learning_rate": 5.572823779193206e-05, | |
| "loss": 0.8668, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.7250526315789474, | |
| "grad_norm": 0.3514401614665985, | |
| "learning_rate": 5.555838641188959e-05, | |
| "loss": 0.5245, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 0.7258947368421053, | |
| "grad_norm": 0.3800720274448395, | |
| "learning_rate": 5.5388535031847133e-05, | |
| "loss": 0.4505, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 0.7267368421052631, | |
| "grad_norm": 0.4218486249446869, | |
| "learning_rate": 5.521868365180467e-05, | |
| "loss": 0.3987, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 0.7275789473684211, | |
| "grad_norm": 0.628242015838623, | |
| "learning_rate": 5.504883227176221e-05, | |
| "loss": 0.4487, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 0.728421052631579, | |
| "grad_norm": 1.90850031375885, | |
| "learning_rate": 5.487898089171974e-05, | |
| "loss": 0.8822, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 0.7292631578947368, | |
| "grad_norm": 0.4279062747955322, | |
| "learning_rate": 5.470912951167728e-05, | |
| "loss": 0.5737, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 0.7301052631578947, | |
| "grad_norm": 0.43441444635391235, | |
| "learning_rate": 5.4539278131634817e-05, | |
| "loss": 0.4914, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 0.7309473684210527, | |
| "grad_norm": 0.5786249041557312, | |
| "learning_rate": 5.436942675159236e-05, | |
| "loss": 0.438, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 0.7317894736842105, | |
| "grad_norm": 1.024880051612854, | |
| "learning_rate": 5.419957537154989e-05, | |
| "loss": 0.4167, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 0.7326315789473684, | |
| "grad_norm": 1.7963647842407227, | |
| "learning_rate": 5.402972399150743e-05, | |
| "loss": 0.9042, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.7334736842105263, | |
| "grad_norm": 0.3581956624984741, | |
| "learning_rate": 5.3859872611464966e-05, | |
| "loss": 0.6061, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 0.7343157894736843, | |
| "grad_norm": 0.4279233515262604, | |
| "learning_rate": 5.3690021231422507e-05, | |
| "loss": 0.3736, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 0.7351578947368421, | |
| "grad_norm": 0.7165946364402771, | |
| "learning_rate": 5.352016985138004e-05, | |
| "loss": 0.4305, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 0.736, | |
| "grad_norm": 0.8655468821525574, | |
| "learning_rate": 5.335031847133758e-05, | |
| "loss": 0.4339, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 0.7368421052631579, | |
| "grad_norm": 2.037754535675049, | |
| "learning_rate": 5.3180467091295115e-05, | |
| "loss": 0.907, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.7376842105263158, | |
| "grad_norm": 0.39947813749313354, | |
| "learning_rate": 5.3010615711252656e-05, | |
| "loss": 0.6141, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 0.7385263157894737, | |
| "grad_norm": 0.4504844844341278, | |
| "learning_rate": 5.284076433121019e-05, | |
| "loss": 0.4128, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 0.7393684210526316, | |
| "grad_norm": 0.5372864603996277, | |
| "learning_rate": 5.267091295116773e-05, | |
| "loss": 0.425, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 0.7402105263157894, | |
| "grad_norm": 0.7260842323303223, | |
| "learning_rate": 5.2501061571125264e-05, | |
| "loss": 0.3603, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 0.7410526315789474, | |
| "grad_norm": 1.2780221700668335, | |
| "learning_rate": 5.2331210191082805e-05, | |
| "loss": 0.8323, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.7418947368421053, | |
| "grad_norm": 0.3879433572292328, | |
| "learning_rate": 5.216135881104034e-05, | |
| "loss": 0.6051, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 0.7427368421052631, | |
| "grad_norm": 0.3971099555492401, | |
| "learning_rate": 5.199150743099788e-05, | |
| "loss": 0.3754, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 0.743578947368421, | |
| "grad_norm": 0.3676632046699524, | |
| "learning_rate": 5.1821656050955414e-05, | |
| "loss": 0.4155, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 0.744421052631579, | |
| "grad_norm": 1.5542161464691162, | |
| "learning_rate": 5.1651804670912954e-05, | |
| "loss": 0.4394, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 0.7452631578947368, | |
| "grad_norm": 1.4534896612167358, | |
| "learning_rate": 5.148195329087049e-05, | |
| "loss": 0.829, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 0.7461052631578947, | |
| "grad_norm": 0.33657190203666687, | |
| "learning_rate": 5.131210191082802e-05, | |
| "loss": 0.6299, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 0.7469473684210526, | |
| "grad_norm": 0.46987345814704895, | |
| "learning_rate": 5.114225053078556e-05, | |
| "loss": 0.4006, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 0.7477894736842106, | |
| "grad_norm": 0.4385448098182678, | |
| "learning_rate": 5.09723991507431e-05, | |
| "loss": 0.4004, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 0.7486315789473684, | |
| "grad_norm": 0.5760071277618408, | |
| "learning_rate": 5.080254777070064e-05, | |
| "loss": 0.4355, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 0.7494736842105263, | |
| "grad_norm": 1.576697826385498, | |
| "learning_rate": 5.063269639065817e-05, | |
| "loss": 0.8731, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.7503157894736842, | |
| "grad_norm": 0.32101970911026, | |
| "learning_rate": 5.046284501061571e-05, | |
| "loss": 0.6691, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 0.7511578947368421, | |
| "grad_norm": 0.5039673447608948, | |
| "learning_rate": 5.0292993630573246e-05, | |
| "loss": 0.4811, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 0.752, | |
| "grad_norm": 0.7147976160049438, | |
| "learning_rate": 5.0123142250530787e-05, | |
| "loss": 0.429, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 0.7528421052631579, | |
| "grad_norm": 0.5322943329811096, | |
| "learning_rate": 4.995329087048832e-05, | |
| "loss": 0.3924, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 0.7536842105263157, | |
| "grad_norm": 2.1477651596069336, | |
| "learning_rate": 4.978343949044586e-05, | |
| "loss": 0.9269, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 0.7545263157894737, | |
| "grad_norm": 0.35740819573402405, | |
| "learning_rate": 4.9613588110403395e-05, | |
| "loss": 0.6156, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 0.7553684210526316, | |
| "grad_norm": 0.35125550627708435, | |
| "learning_rate": 4.9443736730360936e-05, | |
| "loss": 0.5494, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 0.7562105263157894, | |
| "grad_norm": 0.49241068959236145, | |
| "learning_rate": 4.927388535031847e-05, | |
| "loss": 0.3939, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 0.7570526315789474, | |
| "grad_norm": 1.1347302198410034, | |
| "learning_rate": 4.910403397027601e-05, | |
| "loss": 0.435, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 0.7578947368421053, | |
| "grad_norm": 1.793202519416809, | |
| "learning_rate": 4.8934182590233544e-05, | |
| "loss": 0.8874, | |
| "step": 9000 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 11875, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.788050680719032e+17, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |