| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9984532159196, | |
| "eval_steps": 500, | |
| "global_step": 122000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 1.3454886674880981, | |
| "learning_rate": 9.991815957246561e-06, | |
| "loss": 5.7653, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.9953828454017639, | |
| "learning_rate": 9.983631914493123e-06, | |
| "loss": 5.6192, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.7844876646995544, | |
| "learning_rate": 9.975447871739683e-06, | |
| "loss": 5.4452, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.567190945148468, | |
| "learning_rate": 9.967263828986244e-06, | |
| "loss": 5.3183, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.5304737687110901, | |
| "learning_rate": 9.959079786232804e-06, | |
| "loss": 5.2156, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.3840586841106415, | |
| "learning_rate": 9.950895743479366e-06, | |
| "loss": 5.1203, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.34571415185928345, | |
| "learning_rate": 9.942711700725926e-06, | |
| "loss": 5.0732, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.32429179549217224, | |
| "learning_rate": 9.934527657972486e-06, | |
| "loss": 5.0458, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.4024583101272583, | |
| "learning_rate": 9.926343615219047e-06, | |
| "loss": 5.0157, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.3507966101169586, | |
| "learning_rate": 9.918159572465607e-06, | |
| "loss": 4.9921, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.37788429856300354, | |
| "learning_rate": 9.909975529712169e-06, | |
| "loss": 4.9803, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.3670382797718048, | |
| "learning_rate": 9.901791486958729e-06, | |
| "loss": 4.9651, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.2876070737838745, | |
| "learning_rate": 9.89360744420529e-06, | |
| "loss": 4.9585, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.3649226725101471, | |
| "learning_rate": 9.88542340145185e-06, | |
| "loss": 4.9491, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.39525964856147766, | |
| "learning_rate": 9.87723935869841e-06, | |
| "loss": 4.9416, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.30799126625061035, | |
| "learning_rate": 9.86905531594497e-06, | |
| "loss": 4.9318, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.2949569523334503, | |
| "learning_rate": 9.860871273191532e-06, | |
| "loss": 4.9201, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 0.3036958575248718, | |
| "learning_rate": 9.852687230438092e-06, | |
| "loss": 4.9212, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.7450295686721802, | |
| "learning_rate": 9.844503187684653e-06, | |
| "loss": 4.9166, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.439261794090271, | |
| "learning_rate": 9.836319144931213e-06, | |
| "loss": 4.9028, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.28415539860725403, | |
| "learning_rate": 9.828135102177775e-06, | |
| "loss": 4.8989, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.3490685522556305, | |
| "learning_rate": 9.819951059424335e-06, | |
| "loss": 4.9008, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.4549264907836914, | |
| "learning_rate": 9.811767016670895e-06, | |
| "loss": 4.8968, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.3350650668144226, | |
| "learning_rate": 9.803582973917457e-06, | |
| "loss": 4.8886, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.41439351439476013, | |
| "learning_rate": 9.795398931164018e-06, | |
| "loss": 4.8869, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.47745582461357117, | |
| "learning_rate": 9.787214888410578e-06, | |
| "loss": 4.8864, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.4192076027393341, | |
| "learning_rate": 9.779030845657138e-06, | |
| "loss": 4.886, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.38134631514549255, | |
| "learning_rate": 9.7708468029037e-06, | |
| "loss": 4.8803, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 1.0664669275283813, | |
| "learning_rate": 9.76266276015026e-06, | |
| "loss": 4.8764, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.6459155678749084, | |
| "learning_rate": 9.75447871739682e-06, | |
| "loss": 4.8721, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.4217374622821808, | |
| "learning_rate": 9.746294674643381e-06, | |
| "loss": 4.8758, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 1.4077221155166626, | |
| "learning_rate": 9.738110631889943e-06, | |
| "loss": 4.8694, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.793872058391571, | |
| "learning_rate": 9.729926589136503e-06, | |
| "loss": 4.872, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.6410694718360901, | |
| "learning_rate": 9.721742546383063e-06, | |
| "loss": 4.8697, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.9800311923027039, | |
| "learning_rate": 9.713558503629624e-06, | |
| "loss": 4.86, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.999591052532196, | |
| "learning_rate": 9.705374460876184e-06, | |
| "loss": 4.8601, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 1.2334632873535156, | |
| "learning_rate": 9.697190418122744e-06, | |
| "loss": 4.8603, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.6848945617675781, | |
| "learning_rate": 9.689006375369305e-06, | |
| "loss": 4.8537, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.4569337069988251, | |
| "learning_rate": 9.680822332615866e-06, | |
| "loss": 4.859, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 1.2655045986175537, | |
| "learning_rate": 9.672638289862427e-06, | |
| "loss": 4.8556, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 1.2269976139068604, | |
| "learning_rate": 9.664454247108987e-06, | |
| "loss": 4.8569, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 0.5749172568321228, | |
| "learning_rate": 9.656270204355547e-06, | |
| "loss": 4.8549, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.5450171232223511, | |
| "learning_rate": 9.64808616160211e-06, | |
| "loss": 4.853, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.6002165675163269, | |
| "learning_rate": 9.63990211884867e-06, | |
| "loss": 4.8495, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.4352850914001465, | |
| "learning_rate": 9.63171807609523e-06, | |
| "loss": 4.8471, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.8037896752357483, | |
| "learning_rate": 9.623534033341792e-06, | |
| "loss": 4.841, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.7141408324241638, | |
| "learning_rate": 9.615349990588352e-06, | |
| "loss": 4.8433, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.3830125331878662, | |
| "learning_rate": 9.607165947834912e-06, | |
| "loss": 4.84, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.2149487733840942, | |
| "learning_rate": 9.598981905081473e-06, | |
| "loss": 4.8414, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.5489853620529175, | |
| "learning_rate": 9.590797862328034e-06, | |
| "loss": 4.8425, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.0431714057922363, | |
| "learning_rate": 9.582613819574595e-06, | |
| "loss": 4.8369, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 1.2853955030441284, | |
| "learning_rate": 9.574429776821155e-06, | |
| "loss": 4.8411, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.6190789937973022, | |
| "learning_rate": 9.566245734067715e-06, | |
| "loss": 4.8381, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.9078882336616516, | |
| "learning_rate": 9.558061691314277e-06, | |
| "loss": 4.8355, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.6225169897079468, | |
| "learning_rate": 9.549877648560838e-06, | |
| "loss": 4.8382, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.5269715189933777, | |
| "learning_rate": 9.541693605807398e-06, | |
| "loss": 4.8368, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.5707417130470276, | |
| "learning_rate": 9.533509563053958e-06, | |
| "loss": 4.8331, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.0135433673858643, | |
| "learning_rate": 9.525325520300518e-06, | |
| "loss": 4.8372, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.9796397686004639, | |
| "learning_rate": 9.517141477547079e-06, | |
| "loss": 4.8301, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.6885799765586853, | |
| "learning_rate": 9.50895743479364e-06, | |
| "loss": 4.8309, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.8593583703041077, | |
| "learning_rate": 9.5007733920402e-06, | |
| "loss": 4.829, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.0259130001068115, | |
| "learning_rate": 9.492589349286761e-06, | |
| "loss": 4.8298, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.5682145357131958, | |
| "learning_rate": 9.484405306533321e-06, | |
| "loss": 4.8283, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.9293478727340698, | |
| "learning_rate": 9.476221263779882e-06, | |
| "loss": 4.8321, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.0222758054733276, | |
| "learning_rate": 9.468037221026444e-06, | |
| "loss": 4.8286, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 0.748030960559845, | |
| "learning_rate": 9.459853178273004e-06, | |
| "loss": 4.8277, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 1.7026931047439575, | |
| "learning_rate": 9.451669135519564e-06, | |
| "loss": 4.8227, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.8596793413162231, | |
| "learning_rate": 9.443485092766126e-06, | |
| "loss": 4.8296, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.9839978218078613, | |
| "learning_rate": 9.435301050012686e-06, | |
| "loss": 4.8298, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 2.7572035789489746, | |
| "learning_rate": 9.427117007259247e-06, | |
| "loss": 4.8285, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.3615273237228394, | |
| "learning_rate": 9.418932964505807e-06, | |
| "loss": 4.8263, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.8525121808052063, | |
| "learning_rate": 9.410748921752369e-06, | |
| "loss": 4.8233, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.6726747751235962, | |
| "learning_rate": 9.402564878998929e-06, | |
| "loss": 4.8232, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 2.404517412185669, | |
| "learning_rate": 9.39438083624549e-06, | |
| "loss": 4.8222, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 2.3438527584075928, | |
| "learning_rate": 9.38619679349205e-06, | |
| "loss": 4.8275, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.9558647871017456, | |
| "learning_rate": 9.378012750738612e-06, | |
| "loss": 4.8188, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.5687648057937622, | |
| "learning_rate": 9.369828707985172e-06, | |
| "loss": 4.8203, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 4.528586387634277, | |
| "learning_rate": 9.361644665231732e-06, | |
| "loss": 4.8237, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 1.5821335315704346, | |
| "learning_rate": 9.353460622478292e-06, | |
| "loss": 4.8213, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.3139538764953613, | |
| "learning_rate": 9.345276579724853e-06, | |
| "loss": 4.8241, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.6411435008049011, | |
| "learning_rate": 9.337092536971415e-06, | |
| "loss": 4.8176, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 2.011660575866699, | |
| "learning_rate": 9.328908494217975e-06, | |
| "loss": 4.8142, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.9855501055717468, | |
| "learning_rate": 9.320724451464535e-06, | |
| "loss": 4.816, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.8996455073356628, | |
| "learning_rate": 9.312540408711095e-06, | |
| "loss": 4.8162, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.75440514087677, | |
| "learning_rate": 9.304356365957656e-06, | |
| "loss": 4.8174, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.5616024136543274, | |
| "learning_rate": 9.296172323204216e-06, | |
| "loss": 4.8137, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.5236667394638062, | |
| "learning_rate": 9.287988280450778e-06, | |
| "loss": 4.8156, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.869793176651001, | |
| "learning_rate": 9.279804237697338e-06, | |
| "loss": 4.8201, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 2.2847354412078857, | |
| "learning_rate": 9.271620194943898e-06, | |
| "loss": 4.816, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 1.1544415950775146, | |
| "learning_rate": 9.26343615219046e-06, | |
| "loss": 4.8174, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 0.6653382182121277, | |
| "learning_rate": 9.25525210943702e-06, | |
| "loss": 4.8136, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.0501242876052856, | |
| "learning_rate": 9.247068066683581e-06, | |
| "loss": 4.8104, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.6880828142166138, | |
| "learning_rate": 9.238884023930141e-06, | |
| "loss": 4.8093, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.9488196969032288, | |
| "learning_rate": 9.230699981176703e-06, | |
| "loss": 4.8121, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.403717041015625, | |
| "learning_rate": 9.222515938423263e-06, | |
| "loss": 4.8113, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 2.2665512561798096, | |
| "learning_rate": 9.214331895669824e-06, | |
| "loss": 4.8092, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.7057957053184509, | |
| "learning_rate": 9.206147852916384e-06, | |
| "loss": 4.8108, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 3.9034390449523926, | |
| "learning_rate": 9.197963810162946e-06, | |
| "loss": 4.808, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.9307584762573242, | |
| "learning_rate": 9.189779767409506e-06, | |
| "loss": 4.809, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 3.995647668838501, | |
| "learning_rate": 9.181595724656066e-06, | |
| "loss": 4.8079, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 2.2378621101379395, | |
| "learning_rate": 9.173411681902627e-06, | |
| "loss": 4.81, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.424851417541504, | |
| "learning_rate": 9.165227639149189e-06, | |
| "loss": 4.8045, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 1.8927842378616333, | |
| "learning_rate": 9.157043596395749e-06, | |
| "loss": 4.8085, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 3.9594876766204834, | |
| "learning_rate": 9.14885955364231e-06, | |
| "loss": 4.808, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.6992530822753906, | |
| "learning_rate": 9.14067551088887e-06, | |
| "loss": 4.8099, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.1311434507369995, | |
| "learning_rate": 9.13249146813543e-06, | |
| "loss": 4.8071, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.8243028521537781, | |
| "learning_rate": 9.12430742538199e-06, | |
| "loss": 4.8056, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.7261260747909546, | |
| "learning_rate": 9.11612338262855e-06, | |
| "loss": 4.8076, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 4.485065460205078, | |
| "learning_rate": 9.107939339875112e-06, | |
| "loss": 4.8072, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.044236660003662, | |
| "learning_rate": 9.099755297121673e-06, | |
| "loss": 4.8076, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.155515670776367, | |
| "learning_rate": 9.091571254368233e-06, | |
| "loss": 4.8061, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.4560600519180298, | |
| "learning_rate": 9.083387211614795e-06, | |
| "loss": 4.8079, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.577777624130249, | |
| "learning_rate": 9.075203168861355e-06, | |
| "loss": 4.8058, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.1256022453308105, | |
| "learning_rate": 9.067019126107915e-06, | |
| "loss": 4.8047, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 0.9217951893806458, | |
| "learning_rate": 9.058835083354476e-06, | |
| "loss": 4.8047, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 1.4003558158874512, | |
| "learning_rate": 9.050651040601038e-06, | |
| "loss": 4.8006, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.1220279932022095, | |
| "learning_rate": 9.042466997847598e-06, | |
| "loss": 4.8061, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 3.371317148208618, | |
| "learning_rate": 9.034282955094158e-06, | |
| "loss": 4.8034, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.5556405782699585, | |
| "learning_rate": 9.026098912340718e-06, | |
| "loss": 4.8022, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.9803473949432373, | |
| "learning_rate": 9.01791486958728e-06, | |
| "loss": 4.8029, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.7116793990135193, | |
| "learning_rate": 9.00973082683384e-06, | |
| "loss": 4.8047, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.8312737941741943, | |
| "learning_rate": 9.0015467840804e-06, | |
| "loss": 4.8004, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 2.492680072784424, | |
| "learning_rate": 8.993362741326963e-06, | |
| "loss": 4.8031, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.0807607173919678, | |
| "learning_rate": 8.985178698573523e-06, | |
| "loss": 4.7997, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 4.5318403244018555, | |
| "learning_rate": 8.976994655820083e-06, | |
| "loss": 4.8052, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 6.294131278991699, | |
| "learning_rate": 8.968810613066644e-06, | |
| "loss": 4.8025, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.2838228940963745, | |
| "learning_rate": 8.960626570313204e-06, | |
| "loss": 4.8014, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 3.4684486389160156, | |
| "learning_rate": 8.952442527559764e-06, | |
| "loss": 4.7991, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 0.9761227965354919, | |
| "learning_rate": 8.944258484806324e-06, | |
| "loss": 4.8013, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 5.355940341949463, | |
| "learning_rate": 8.936074442052886e-06, | |
| "loss": 4.8014, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 3.334829568862915, | |
| "learning_rate": 8.927890399299447e-06, | |
| "loss": 4.7994, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 4.577433109283447, | |
| "learning_rate": 8.919706356546007e-06, | |
| "loss": 4.7931, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 4.9149322509765625, | |
| "learning_rate": 8.911522313792567e-06, | |
| "loss": 4.8025, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.5174356698989868, | |
| "learning_rate": 8.903338271039129e-06, | |
| "loss": 4.7982, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 4.420125961303711, | |
| "learning_rate": 8.89515422828569e-06, | |
| "loss": 4.7976, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 4.246557712554932, | |
| "learning_rate": 8.88697018553225e-06, | |
| "loss": 4.7954, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.887624979019165, | |
| "learning_rate": 8.87878614277881e-06, | |
| "loss": 4.7956, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.2699388265609741, | |
| "learning_rate": 8.870602100025372e-06, | |
| "loss": 4.7993, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 2.5432891845703125, | |
| "learning_rate": 8.862418057271932e-06, | |
| "loss": 4.8017, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 3.0773308277130127, | |
| "learning_rate": 8.854234014518492e-06, | |
| "loss": 4.8002, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 2.0699973106384277, | |
| "learning_rate": 8.846049971765053e-06, | |
| "loss": 4.7958, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.9238589406013489, | |
| "learning_rate": 8.837865929011615e-06, | |
| "loss": 4.7907, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.0784467458724976, | |
| "learning_rate": 8.829681886258175e-06, | |
| "loss": 4.797, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.1584768295288086, | |
| "learning_rate": 8.821497843504735e-06, | |
| "loss": 4.7947, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.0812736749649048, | |
| "learning_rate": 8.813313800751297e-06, | |
| "loss": 4.7894, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.7061692476272583, | |
| "learning_rate": 8.805129757997857e-06, | |
| "loss": 4.7946, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 2.771735429763794, | |
| "learning_rate": 8.796945715244418e-06, | |
| "loss": 4.7973, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 2.1708245277404785, | |
| "learning_rate": 8.788761672490978e-06, | |
| "loss": 4.7953, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 2.4777073860168457, | |
| "learning_rate": 8.780577629737538e-06, | |
| "loss": 4.7917, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.5953549146652222, | |
| "learning_rate": 8.772393586984098e-06, | |
| "loss": 4.7917, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.7896368503570557, | |
| "learning_rate": 8.76420954423066e-06, | |
| "loss": 4.7923, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 0.7050272822380066, | |
| "learning_rate": 8.75602550147722e-06, | |
| "loss": 4.7977, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 4.6306023597717285, | |
| "learning_rate": 8.747841458723781e-06, | |
| "loss": 4.7911, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 2.3503997325897217, | |
| "learning_rate": 8.739657415970341e-06, | |
| "loss": 4.7895, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 4.242427825927734, | |
| "learning_rate": 8.731473373216901e-06, | |
| "loss": 4.7902, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.8283806443214417, | |
| "learning_rate": 8.723289330463463e-06, | |
| "loss": 4.7938, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 3.901630401611328, | |
| "learning_rate": 8.715105287710024e-06, | |
| "loss": 4.7936, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 6.563976764678955, | |
| "learning_rate": 8.706921244956584e-06, | |
| "loss": 4.7907, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 2.2243263721466064, | |
| "learning_rate": 8.698737202203144e-06, | |
| "loss": 4.7852, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 2.0529608726501465, | |
| "learning_rate": 8.690553159449706e-06, | |
| "loss": 4.7916, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 3.0728704929351807, | |
| "learning_rate": 8.682369116696266e-06, | |
| "loss": 4.7947, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.8949060440063477, | |
| "learning_rate": 8.674185073942827e-06, | |
| "loss": 4.7935, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.9687060713768005, | |
| "learning_rate": 8.666001031189387e-06, | |
| "loss": 4.7939, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.091792345046997, | |
| "learning_rate": 8.657816988435949e-06, | |
| "loss": 4.7941, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 0.7492835521697998, | |
| "learning_rate": 8.64963294568251e-06, | |
| "loss": 4.7897, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.856666088104248, | |
| "learning_rate": 8.64144890292907e-06, | |
| "loss": 4.7917, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 2.1373181343078613, | |
| "learning_rate": 8.633264860175631e-06, | |
| "loss": 4.7875, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 3.3695366382598877, | |
| "learning_rate": 8.625080817422192e-06, | |
| "loss": 4.7935, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 3.0197083950042725, | |
| "learning_rate": 8.616896774668752e-06, | |
| "loss": 4.7896, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 2.717109441757202, | |
| "learning_rate": 8.608712731915312e-06, | |
| "loss": 4.7898, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 2.4040961265563965, | |
| "learning_rate": 8.600528689161873e-06, | |
| "loss": 4.7919, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 7.917181968688965, | |
| "learning_rate": 8.592344646408434e-06, | |
| "loss": 4.7904, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.2932788133621216, | |
| "learning_rate": 8.584160603654995e-06, | |
| "loss": 4.7856, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 4.480923175811768, | |
| "learning_rate": 8.575976560901555e-06, | |
| "loss": 4.7952, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.211406946182251, | |
| "learning_rate": 8.567792518148115e-06, | |
| "loss": 4.7903, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.0489588975906372, | |
| "learning_rate": 8.559608475394676e-06, | |
| "loss": 4.7941, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 3.5961625576019287, | |
| "learning_rate": 8.551424432641236e-06, | |
| "loss": 4.7845, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.3053170442581177, | |
| "learning_rate": 8.543240389887798e-06, | |
| "loss": 4.7919, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.7930333614349365, | |
| "learning_rate": 8.535056347134358e-06, | |
| "loss": 4.7893, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.9989149570465088, | |
| "learning_rate": 8.526872304380918e-06, | |
| "loss": 4.7879, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 7.412938594818115, | |
| "learning_rate": 8.518688261627479e-06, | |
| "loss": 4.7876, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.9795100688934326, | |
| "learning_rate": 8.51050421887404e-06, | |
| "loss": 4.7895, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.8868602514266968, | |
| "learning_rate": 8.5023201761206e-06, | |
| "loss": 4.7895, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.9831721186637878, | |
| "learning_rate": 8.494136133367161e-06, | |
| "loss": 4.7874, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 3.5411503314971924, | |
| "learning_rate": 8.485952090613721e-06, | |
| "loss": 4.786, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 2.617466926574707, | |
| "learning_rate": 8.477768047860283e-06, | |
| "loss": 4.7832, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 2.1326940059661865, | |
| "learning_rate": 8.469584005106844e-06, | |
| "loss": 4.789, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 2.020888328552246, | |
| "learning_rate": 8.461399962353404e-06, | |
| "loss": 4.7867, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 2.366783618927002, | |
| "learning_rate": 8.453215919599966e-06, | |
| "loss": 4.7871, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 2.739832878112793, | |
| "learning_rate": 8.445031876846526e-06, | |
| "loss": 4.7829, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.9564040899276733, | |
| "learning_rate": 8.436847834093086e-06, | |
| "loss": 4.7901, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 4.063528537750244, | |
| "learning_rate": 8.428663791339647e-06, | |
| "loss": 4.7839, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.9199309349060059, | |
| "learning_rate": 8.420479748586209e-06, | |
| "loss": 4.7919, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.8330548405647278, | |
| "learning_rate": 8.412295705832769e-06, | |
| "loss": 4.7877, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 2.459280490875244, | |
| "learning_rate": 8.404111663079329e-06, | |
| "loss": 4.7854, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 5.8329572677612305, | |
| "learning_rate": 8.39592762032589e-06, | |
| "loss": 4.7864, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.6338814496994019, | |
| "learning_rate": 8.38774357757245e-06, | |
| "loss": 4.7855, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 0.8343069553375244, | |
| "learning_rate": 8.37955953481901e-06, | |
| "loss": 4.786, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.5812463760375977, | |
| "learning_rate": 8.37137549206557e-06, | |
| "loss": 4.784, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.3143149614334106, | |
| "learning_rate": 8.363191449312132e-06, | |
| "loss": 4.7761, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 2.27919340133667, | |
| "learning_rate": 8.355007406558692e-06, | |
| "loss": 4.7841, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 2.111786365509033, | |
| "learning_rate": 8.346823363805253e-06, | |
| "loss": 4.7902, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.4459553956985474, | |
| "learning_rate": 8.338639321051813e-06, | |
| "loss": 4.7827, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 5.489434242248535, | |
| "learning_rate": 8.330455278298375e-06, | |
| "loss": 4.7804, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.8438489437103271, | |
| "learning_rate": 8.322271235544935e-06, | |
| "loss": 4.7826, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.8289409875869751, | |
| "learning_rate": 8.314087192791495e-06, | |
| "loss": 4.788, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 2.2698094844818115, | |
| "learning_rate": 8.305903150038056e-06, | |
| "loss": 4.785, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 6.737819194793701, | |
| "learning_rate": 8.297719107284618e-06, | |
| "loss": 4.7879, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.1006556749343872, | |
| "learning_rate": 8.289535064531178e-06, | |
| "loss": 4.7861, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.757918119430542, | |
| "learning_rate": 8.281351021777738e-06, | |
| "loss": 4.7838, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.443129539489746, | |
| "learning_rate": 8.273166979024298e-06, | |
| "loss": 4.7815, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.2434520721435547, | |
| "learning_rate": 8.26498293627086e-06, | |
| "loss": 4.7837, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.9488204717636108, | |
| "learning_rate": 8.25679889351742e-06, | |
| "loss": 4.7826, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.4644496440887451, | |
| "learning_rate": 8.248614850763981e-06, | |
| "loss": 4.7805, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.1131641864776611, | |
| "learning_rate": 8.240430808010543e-06, | |
| "loss": 4.7848, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.9792927503585815, | |
| "learning_rate": 8.232246765257103e-06, | |
| "loss": 4.7844, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 6.909112453460693, | |
| "learning_rate": 8.224062722503663e-06, | |
| "loss": 4.7791, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.1648467779159546, | |
| "learning_rate": 8.215878679750224e-06, | |
| "loss": 4.7834, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.4722325801849365, | |
| "learning_rate": 8.207694636996784e-06, | |
| "loss": 4.7775, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.8322229385375977, | |
| "learning_rate": 8.199510594243344e-06, | |
| "loss": 4.7848, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.4244656562805176, | |
| "learning_rate": 8.191326551489906e-06, | |
| "loss": 4.7784, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 3.1128671169281006, | |
| "learning_rate": 8.183142508736466e-06, | |
| "loss": 4.7819, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 2.6106808185577393, | |
| "learning_rate": 8.174958465983027e-06, | |
| "loss": 4.7791, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 5.204963207244873, | |
| "learning_rate": 8.166774423229587e-06, | |
| "loss": 4.7841, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 0.9447894096374512, | |
| "learning_rate": 8.158590380476147e-06, | |
| "loss": 4.7776, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.2476049661636353, | |
| "learning_rate": 8.15040633772271e-06, | |
| "loss": 4.7759, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.7892690896987915, | |
| "learning_rate": 8.14222229496927e-06, | |
| "loss": 4.7797, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 2.874403476715088, | |
| "learning_rate": 8.13403825221583e-06, | |
| "loss": 4.7807, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 2.8661632537841797, | |
| "learning_rate": 8.12585420946239e-06, | |
| "loss": 4.7772, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.8099445104598999, | |
| "learning_rate": 8.117670166708952e-06, | |
| "loss": 4.7814, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.0212429761886597, | |
| "learning_rate": 8.109486123955512e-06, | |
| "loss": 4.7802, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.0928311347961426, | |
| "learning_rate": 8.101302081202072e-06, | |
| "loss": 4.777, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.8933788537979126, | |
| "learning_rate": 8.093118038448633e-06, | |
| "loss": 4.7832, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.9154900908470154, | |
| "learning_rate": 8.084933995695195e-06, | |
| "loss": 4.7854, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 4.510463714599609, | |
| "learning_rate": 8.076749952941755e-06, | |
| "loss": 4.7869, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 2.9989612102508545, | |
| "learning_rate": 8.068565910188315e-06, | |
| "loss": 4.785, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.1799613237380981, | |
| "learning_rate": 8.060381867434877e-06, | |
| "loss": 4.7845, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.8453476428985596, | |
| "learning_rate": 8.052197824681437e-06, | |
| "loss": 4.7849, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.3734474182128906, | |
| "learning_rate": 8.044013781927998e-06, | |
| "loss": 4.7799, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 7.692543983459473, | |
| "learning_rate": 8.035829739174558e-06, | |
| "loss": 4.7787, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 4.369629859924316, | |
| "learning_rate": 8.027645696421118e-06, | |
| "loss": 4.779, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.2958143949508667, | |
| "learning_rate": 8.01946165366768e-06, | |
| "loss": 4.7801, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.838066041469574, | |
| "learning_rate": 8.01127761091424e-06, | |
| "loss": 4.78, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 2.3854634761810303, | |
| "learning_rate": 8.0030935681608e-06, | |
| "loss": 4.7797, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.1053544282913208, | |
| "learning_rate": 7.994909525407361e-06, | |
| "loss": 4.7816, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.4308538436889648, | |
| "learning_rate": 7.986725482653921e-06, | |
| "loss": 4.7819, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 3.392876148223877, | |
| "learning_rate": 7.978541439900482e-06, | |
| "loss": 4.7782, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.5023071765899658, | |
| "learning_rate": 7.970357397147044e-06, | |
| "loss": 4.7828, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 0.7376088500022888, | |
| "learning_rate": 7.962173354393604e-06, | |
| "loss": 4.7816, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 3.311122417449951, | |
| "learning_rate": 7.953989311640164e-06, | |
| "loss": 4.7819, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.5772916078567505, | |
| "learning_rate": 7.945805268886724e-06, | |
| "loss": 4.7779, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.9813963174819946, | |
| "learning_rate": 7.937621226133286e-06, | |
| "loss": 4.781, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.7451574802398682, | |
| "learning_rate": 7.929437183379847e-06, | |
| "loss": 4.7817, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.064371943473816, | |
| "learning_rate": 7.921253140626407e-06, | |
| "loss": 4.7794, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.4085423946380615, | |
| "learning_rate": 7.913069097872967e-06, | |
| "loss": 4.7838, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.9586715698242188, | |
| "learning_rate": 7.904885055119529e-06, | |
| "loss": 4.7799, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 2.1608307361602783, | |
| "learning_rate": 7.89670101236609e-06, | |
| "loss": 4.7803, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.8091859221458435, | |
| "learning_rate": 7.88851696961265e-06, | |
| "loss": 4.784, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.7846326231956482, | |
| "learning_rate": 7.880332926859212e-06, | |
| "loss": 4.778, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 2.0327844619750977, | |
| "learning_rate": 7.872148884105772e-06, | |
| "loss": 4.7825, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.0779097080230713, | |
| "learning_rate": 7.863964841352332e-06, | |
| "loss": 4.7765, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.2201029062271118, | |
| "learning_rate": 7.855780798598892e-06, | |
| "loss": 4.776, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.4344494342803955, | |
| "learning_rate": 7.847596755845454e-06, | |
| "loss": 4.7781, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.114273190498352, | |
| "learning_rate": 7.839412713092015e-06, | |
| "loss": 4.7764, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.8567458391189575, | |
| "learning_rate": 7.831228670338575e-06, | |
| "loss": 4.7831, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 4.325285911560059, | |
| "learning_rate": 7.823044627585135e-06, | |
| "loss": 4.7811, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.2828031778335571, | |
| "learning_rate": 7.814860584831695e-06, | |
| "loss": 4.7795, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 2.048180341720581, | |
| "learning_rate": 7.806676542078256e-06, | |
| "loss": 4.7782, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.9398120045661926, | |
| "learning_rate": 7.798492499324818e-06, | |
| "loss": 4.778, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 0.8492949604988098, | |
| "learning_rate": 7.790308456571378e-06, | |
| "loss": 4.776, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.9857730865478516, | |
| "learning_rate": 7.782124413817938e-06, | |
| "loss": 4.7741, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.0787758827209473, | |
| "learning_rate": 7.773940371064498e-06, | |
| "loss": 4.7738, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.2202094793319702, | |
| "learning_rate": 7.765756328311059e-06, | |
| "loss": 4.777, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 6.543772220611572, | |
| "learning_rate": 7.75757228555762e-06, | |
| "loss": 4.778, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.9749574065208435, | |
| "learning_rate": 7.749388242804181e-06, | |
| "loss": 4.7761, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.2425750494003296, | |
| "learning_rate": 7.741204200050741e-06, | |
| "loss": 4.7799, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 2.1919734477996826, | |
| "learning_rate": 7.733020157297301e-06, | |
| "loss": 4.7781, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.6858105659484863, | |
| "learning_rate": 7.724836114543863e-06, | |
| "loss": 4.7761, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.082306146621704, | |
| "learning_rate": 7.716652071790424e-06, | |
| "loss": 4.7727, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.4394657611846924, | |
| "learning_rate": 7.708468029036984e-06, | |
| "loss": 4.7773, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 3.90745210647583, | |
| "learning_rate": 7.700283986283546e-06, | |
| "loss": 4.7771, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.1074409484863281, | |
| "learning_rate": 7.692099943530106e-06, | |
| "loss": 4.7813, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.775972306728363, | |
| "learning_rate": 7.683915900776666e-06, | |
| "loss": 4.7793, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.8545662760734558, | |
| "learning_rate": 7.675731858023227e-06, | |
| "loss": 4.7745, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.9553655385971069, | |
| "learning_rate": 7.667547815269789e-06, | |
| "loss": 4.7741, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.9458415508270264, | |
| "learning_rate": 7.659363772516349e-06, | |
| "loss": 4.7757, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.3447215557098389, | |
| "learning_rate": 7.65117972976291e-06, | |
| "loss": 4.7721, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.8614388108253479, | |
| "learning_rate": 7.64299568700947e-06, | |
| "loss": 4.7728, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.083814263343811, | |
| "learning_rate": 7.63481164425603e-06, | |
| "loss": 4.7714, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.9633229374885559, | |
| "learning_rate": 7.626627601502591e-06, | |
| "loss": 4.7736, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.1467323303222656, | |
| "learning_rate": 7.618443558749151e-06, | |
| "loss": 4.7771, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.2095165252685547, | |
| "learning_rate": 7.610259515995713e-06, | |
| "loss": 4.7712, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.9931008815765381, | |
| "learning_rate": 7.602075473242273e-06, | |
| "loss": 4.7776, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 12.287310600280762, | |
| "learning_rate": 7.593891430488834e-06, | |
| "loss": 4.7797, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 3.100107192993164, | |
| "learning_rate": 7.585707387735394e-06, | |
| "loss": 4.7759, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 2.249577045440674, | |
| "learning_rate": 7.577523344981955e-06, | |
| "loss": 4.7708, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 3.1430749893188477, | |
| "learning_rate": 7.569339302228515e-06, | |
| "loss": 4.7742, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 0.9823663830757141, | |
| "learning_rate": 7.5611552594750755e-06, | |
| "loss": 4.7739, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 4.1920166015625, | |
| "learning_rate": 7.552971216721637e-06, | |
| "loss": 4.778, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.579256534576416, | |
| "learning_rate": 7.544787173968198e-06, | |
| "loss": 4.7784, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.4403995275497437, | |
| "learning_rate": 7.536603131214758e-06, | |
| "loss": 4.7701, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 3.48757266998291, | |
| "learning_rate": 7.528419088461318e-06, | |
| "loss": 4.7705, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 4.121363162994385, | |
| "learning_rate": 7.52023504570788e-06, | |
| "loss": 4.7704, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.439936876296997, | |
| "learning_rate": 7.5120510029544405e-06, | |
| "loss": 4.7711, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.546036720275879, | |
| "learning_rate": 7.503866960201001e-06, | |
| "loss": 4.7691, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 5.736770153045654, | |
| "learning_rate": 7.495682917447561e-06, | |
| "loss": 4.7802, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.7358049154281616, | |
| "learning_rate": 7.487498874694122e-06, | |
| "loss": 4.771, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.9707129001617432, | |
| "learning_rate": 7.479314831940682e-06, | |
| "loss": 4.7691, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.6308770179748535, | |
| "learning_rate": 7.471130789187243e-06, | |
| "loss": 4.7706, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 2.7046618461608887, | |
| "learning_rate": 7.462946746433804e-06, | |
| "loss": 4.7767, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.627031922340393, | |
| "learning_rate": 7.454762703680365e-06, | |
| "loss": 4.7743, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.203337550163269, | |
| "learning_rate": 7.446578660926925e-06, | |
| "loss": 4.7764, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.2097506523132324, | |
| "learning_rate": 7.4383946181734854e-06, | |
| "loss": 4.7711, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.1853944063186646, | |
| "learning_rate": 7.430210575420047e-06, | |
| "loss": 4.7699, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.8438019752502441, | |
| "learning_rate": 7.422026532666608e-06, | |
| "loss": 4.7732, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.5300862789154053, | |
| "learning_rate": 7.413842489913168e-06, | |
| "loss": 4.7747, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.4141496419906616, | |
| "learning_rate": 7.405658447159728e-06, | |
| "loss": 4.7711, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.673567533493042, | |
| "learning_rate": 7.397474404406289e-06, | |
| "loss": 4.777, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 2.1357908248901367, | |
| "learning_rate": 7.38929036165285e-06, | |
| "loss": 4.7723, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 4.316195964813232, | |
| "learning_rate": 7.381106318899411e-06, | |
| "loss": 4.7678, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.7443231344223022, | |
| "learning_rate": 7.372922276145971e-06, | |
| "loss": 4.7707, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 2.7221803665161133, | |
| "learning_rate": 7.364738233392532e-06, | |
| "loss": 4.7708, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 0.8770394921302795, | |
| "learning_rate": 7.356554190639092e-06, | |
| "loss": 4.7721, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.9764662384986877, | |
| "learning_rate": 7.348370147885653e-06, | |
| "loss": 4.7756, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 3.6692917346954346, | |
| "learning_rate": 7.3401861051322146e-06, | |
| "loss": 4.7773, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.8451597690582275, | |
| "learning_rate": 7.332002062378775e-06, | |
| "loss": 4.7742, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.9092561602592468, | |
| "learning_rate": 7.323818019625335e-06, | |
| "loss": 4.7666, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.9218481779098511, | |
| "learning_rate": 7.315633976871895e-06, | |
| "loss": 4.7717, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.8461436033248901, | |
| "learning_rate": 7.3074499341184565e-06, | |
| "loss": 4.7752, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.879395067691803, | |
| "learning_rate": 7.299265891365017e-06, | |
| "loss": 4.7736, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.9056565165519714, | |
| "learning_rate": 7.291081848611578e-06, | |
| "loss": 4.7671, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 2.0318729877471924, | |
| "learning_rate": 7.282897805858138e-06, | |
| "loss": 4.7672, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.0115817785263062, | |
| "learning_rate": 7.274713763104699e-06, | |
| "loss": 4.7782, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.3754358291625977, | |
| "learning_rate": 7.2665297203512595e-06, | |
| "loss": 4.7711, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.6245019435882568, | |
| "learning_rate": 7.25834567759782e-06, | |
| "loss": 4.7716, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.5871042013168335, | |
| "learning_rate": 7.250161634844382e-06, | |
| "loss": 4.7689, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.6681900024414062, | |
| "learning_rate": 7.241977592090942e-06, | |
| "loss": 4.7734, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.2739856243133545, | |
| "learning_rate": 7.233793549337502e-06, | |
| "loss": 4.7746, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.8256975412368774, | |
| "learning_rate": 7.2256095065840625e-06, | |
| "loss": 4.7683, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 2.5301151275634766, | |
| "learning_rate": 7.217425463830624e-06, | |
| "loss": 4.7715, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.9835234880447388, | |
| "learning_rate": 7.209241421077185e-06, | |
| "loss": 4.7675, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.1152873039245605, | |
| "learning_rate": 7.201057378323745e-06, | |
| "loss": 4.767, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 4.025820732116699, | |
| "learning_rate": 7.192873335570305e-06, | |
| "loss": 4.7739, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 4.386086463928223, | |
| "learning_rate": 7.184689292816866e-06, | |
| "loss": 4.7716, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 3.9003477096557617, | |
| "learning_rate": 7.176505250063427e-06, | |
| "loss": 4.7691, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.3681395053863525, | |
| "learning_rate": 7.168321207309987e-06, | |
| "loss": 4.7747, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 12.061843872070312, | |
| "learning_rate": 7.160137164556549e-06, | |
| "loss": 4.7714, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.0302884578704834, | |
| "learning_rate": 7.151953121803109e-06, | |
| "loss": 4.7711, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.5776879787445068, | |
| "learning_rate": 7.1437690790496694e-06, | |
| "loss": 4.7682, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.52390718460083, | |
| "learning_rate": 7.13558503629623e-06, | |
| "loss": 4.7712, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 2.443223237991333, | |
| "learning_rate": 7.127400993542791e-06, | |
| "loss": 4.7696, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.4894499778747559, | |
| "learning_rate": 7.119216950789352e-06, | |
| "loss": 4.7739, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.0729478597640991, | |
| "learning_rate": 7.111032908035912e-06, | |
| "loss": 4.7728, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.0306514501571655, | |
| "learning_rate": 7.1028488652824725e-06, | |
| "loss": 4.7739, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 2.0036280155181885, | |
| "learning_rate": 7.094664822529034e-06, | |
| "loss": 4.7714, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.9095188975334167, | |
| "learning_rate": 7.086480779775594e-06, | |
| "loss": 4.7647, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.0899231433868408, | |
| "learning_rate": 7.078296737022154e-06, | |
| "loss": 4.7637, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 5.961045742034912, | |
| "learning_rate": 7.070112694268716e-06, | |
| "loss": 4.7722, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.9270179867744446, | |
| "learning_rate": 7.061928651515276e-06, | |
| "loss": 4.7682, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.7026699781417847, | |
| "learning_rate": 7.053744608761837e-06, | |
| "loss": 4.7697, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.5568283796310425, | |
| "learning_rate": 7.045560566008397e-06, | |
| "loss": 4.7695, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 4.148453712463379, | |
| "learning_rate": 7.037376523254959e-06, | |
| "loss": 4.7667, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.0565524101257324, | |
| "learning_rate": 7.029192480501519e-06, | |
| "loss": 4.7728, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.3457672595977783, | |
| "learning_rate": 7.021008437748079e-06, | |
| "loss": 4.7719, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.2144137620925903, | |
| "learning_rate": 7.01282439499464e-06, | |
| "loss": 4.7714, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.7697350978851318, | |
| "learning_rate": 7.004640352241201e-06, | |
| "loss": 4.7712, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 0.9844958782196045, | |
| "learning_rate": 6.996456309487761e-06, | |
| "loss": 4.7723, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 3.750422716140747, | |
| "learning_rate": 6.988272266734321e-06, | |
| "loss": 4.7717, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 2.6416289806365967, | |
| "learning_rate": 6.980088223980883e-06, | |
| "loss": 4.7696, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.745283842086792, | |
| "learning_rate": 6.9719041812274435e-06, | |
| "loss": 4.7681, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.1269015073776245, | |
| "learning_rate": 6.963720138474004e-06, | |
| "loss": 4.7659, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 4.63193941116333, | |
| "learning_rate": 6.955536095720564e-06, | |
| "loss": 4.7676, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.586788296699524, | |
| "learning_rate": 6.947352052967126e-06, | |
| "loss": 4.7729, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.2502408027648926, | |
| "learning_rate": 6.939168010213686e-06, | |
| "loss": 4.7718, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.620895504951477, | |
| "learning_rate": 6.9309839674602465e-06, | |
| "loss": 4.7703, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 0.9904155731201172, | |
| "learning_rate": 6.922799924706807e-06, | |
| "loss": 4.77, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.9440988302230835, | |
| "learning_rate": 6.914615881953368e-06, | |
| "loss": 4.7688, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 2.9490699768066406, | |
| "learning_rate": 6.906431839199928e-06, | |
| "loss": 4.7718, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.7235488891601562, | |
| "learning_rate": 6.898247796446489e-06, | |
| "loss": 4.7685, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.076762318611145, | |
| "learning_rate": 6.89006375369305e-06, | |
| "loss": 4.7708, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 0.8281159400939941, | |
| "learning_rate": 6.881879710939611e-06, | |
| "loss": 4.7697, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.1257106065750122, | |
| "learning_rate": 6.873695668186171e-06, | |
| "loss": 4.7727, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 4.7161760330200195, | |
| "learning_rate": 6.865511625432731e-06, | |
| "loss": 4.7695, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 2.1901695728302, | |
| "learning_rate": 6.857327582679293e-06, | |
| "loss": 4.7712, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.988418459892273, | |
| "learning_rate": 6.8491435399258535e-06, | |
| "loss": 4.7691, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.4924893379211426, | |
| "learning_rate": 6.840959497172414e-06, | |
| "loss": 4.7705, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.154937744140625, | |
| "learning_rate": 6.832775454418974e-06, | |
| "loss": 4.7652, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.1525272130966187, | |
| "learning_rate": 6.824591411665535e-06, | |
| "loss": 4.7705, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.9818894863128662, | |
| "learning_rate": 6.816407368912095e-06, | |
| "loss": 4.767, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 4.012678146362305, | |
| "learning_rate": 6.8082233261586565e-06, | |
| "loss": 4.7657, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 3.9307897090911865, | |
| "learning_rate": 6.800039283405218e-06, | |
| "loss": 4.7629, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 2.6684534549713135, | |
| "learning_rate": 6.791855240651778e-06, | |
| "loss": 4.7695, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 3.2453012466430664, | |
| "learning_rate": 6.783671197898338e-06, | |
| "loss": 4.7733, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.1828510761260986, | |
| "learning_rate": 6.775487155144898e-06, | |
| "loss": 4.7696, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.2635380029678345, | |
| "learning_rate": 6.76730311239146e-06, | |
| "loss": 4.7726, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 0.8589321970939636, | |
| "learning_rate": 6.759119069638021e-06, | |
| "loss": 4.768, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.4459155797958374, | |
| "learning_rate": 6.750935026884581e-06, | |
| "loss": 4.7664, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 3.3713083267211914, | |
| "learning_rate": 6.742750984131141e-06, | |
| "loss": 4.7684, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 3.4334757328033447, | |
| "learning_rate": 6.734566941377702e-06, | |
| "loss": 4.767, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.072906255722046, | |
| "learning_rate": 6.726382898624263e-06, | |
| "loss": 4.7623, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.8414703607559204, | |
| "learning_rate": 6.718198855870824e-06, | |
| "loss": 4.7657, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.423050045967102, | |
| "learning_rate": 6.710014813117385e-06, | |
| "loss": 4.7684, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.1521214246749878, | |
| "learning_rate": 6.701830770363945e-06, | |
| "loss": 4.7682, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.2841377258300781, | |
| "learning_rate": 6.693646727610505e-06, | |
| "loss": 4.7661, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 2.815016508102417, | |
| "learning_rate": 6.6854626848570656e-06, | |
| "loss": 4.7724, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 0.9150479435920715, | |
| "learning_rate": 6.6772786421036275e-06, | |
| "loss": 4.7667, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.2387914657592773, | |
| "learning_rate": 6.669094599350188e-06, | |
| "loss": 4.7695, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 3.5044503211975098, | |
| "learning_rate": 6.660910556596748e-06, | |
| "loss": 4.7671, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.1371254920959473, | |
| "learning_rate": 6.652726513843308e-06, | |
| "loss": 4.7652, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.3063691854476929, | |
| "learning_rate": 6.6445424710898694e-06, | |
| "loss": 4.7674, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.7822604179382324, | |
| "learning_rate": 6.6363584283364306e-06, | |
| "loss": 4.7692, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.9057841300964355, | |
| "learning_rate": 6.628174385582991e-06, | |
| "loss": 4.7676, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.9072014093399048, | |
| "learning_rate": 6.619990342829552e-06, | |
| "loss": 4.7625, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 0.9912985563278198, | |
| "learning_rate": 6.611806300076112e-06, | |
| "loss": 4.7709, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.278571605682373, | |
| "learning_rate": 6.6036222573226725e-06, | |
| "loss": 4.7725, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.2209765911102295, | |
| "learning_rate": 6.595438214569233e-06, | |
| "loss": 4.7693, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.4683972597122192, | |
| "learning_rate": 6.587254171815795e-06, | |
| "loss": 4.7715, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 2.1982457637786865, | |
| "learning_rate": 6.579070129062355e-06, | |
| "loss": 4.7629, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 3.916114568710327, | |
| "learning_rate": 6.570886086308915e-06, | |
| "loss": 4.7628, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 4.219468116760254, | |
| "learning_rate": 6.5627020435554755e-06, | |
| "loss": 4.766, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.3876959085464478, | |
| "learning_rate": 6.5545180008020375e-06, | |
| "loss": 4.7689, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 4.21665620803833, | |
| "learning_rate": 6.546333958048598e-06, | |
| "loss": 4.767, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 0.8205769658088684, | |
| "learning_rate": 6.538149915295158e-06, | |
| "loss": 4.7673, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.474163293838501, | |
| "learning_rate": 6.529965872541719e-06, | |
| "loss": 4.7662, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.4474871158599854, | |
| "learning_rate": 6.521781829788279e-06, | |
| "loss": 4.7672, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.422839879989624, | |
| "learning_rate": 6.51359778703484e-06, | |
| "loss": 4.7639, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.002898097038269, | |
| "learning_rate": 6.5054137442814e-06, | |
| "loss": 4.7707, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.21475350856781, | |
| "learning_rate": 6.497229701527962e-06, | |
| "loss": 4.7643, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.273319959640503, | |
| "learning_rate": 6.489045658774522e-06, | |
| "loss": 4.764, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.963934063911438, | |
| "learning_rate": 6.480861616021082e-06, | |
| "loss": 4.7675, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 2.1018524169921875, | |
| "learning_rate": 6.472677573267643e-06, | |
| "loss": 4.7722, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.1742087602615356, | |
| "learning_rate": 6.464493530514205e-06, | |
| "loss": 4.7697, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.3663380146026611, | |
| "learning_rate": 6.456309487760765e-06, | |
| "loss": 4.7654, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.2402204275131226, | |
| "learning_rate": 6.448125445007325e-06, | |
| "loss": 4.7657, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.8193938732147217, | |
| "learning_rate": 6.439941402253886e-06, | |
| "loss": 4.7631, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.9568284749984741, | |
| "learning_rate": 6.4317573595004465e-06, | |
| "loss": 4.7687, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.9550923109054565, | |
| "learning_rate": 6.423573316747007e-06, | |
| "loss": 4.766, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.496541976928711, | |
| "learning_rate": 6.415389273993567e-06, | |
| "loss": 4.7674, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 7.160891532897949, | |
| "learning_rate": 6.407205231240129e-06, | |
| "loss": 4.7667, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.1409087181091309, | |
| "learning_rate": 6.399021188486689e-06, | |
| "loss": 4.7657, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.3777464628219604, | |
| "learning_rate": 6.3908371457332496e-06, | |
| "loss": 4.7657, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.171425223350525, | |
| "learning_rate": 6.38265310297981e-06, | |
| "loss": 4.7645, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.4748115539550781, | |
| "learning_rate": 6.374469060226372e-06, | |
| "loss": 4.7622, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 4.880084991455078, | |
| "learning_rate": 6.366285017472932e-06, | |
| "loss": 4.766, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.557969331741333, | |
| "learning_rate": 6.358100974719492e-06, | |
| "loss": 4.7688, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 2.078839063644409, | |
| "learning_rate": 6.3499169319660534e-06, | |
| "loss": 4.7652, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 5.024000644683838, | |
| "learning_rate": 6.341732889212614e-06, | |
| "loss": 4.767, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.7246519327163696, | |
| "learning_rate": 6.333548846459174e-06, | |
| "loss": 4.7631, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 0.9058144688606262, | |
| "learning_rate": 6.325364803705735e-06, | |
| "loss": 4.7641, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.1066261529922485, | |
| "learning_rate": 6.317180760952296e-06, | |
| "loss": 4.7657, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.048341989517212, | |
| "learning_rate": 6.3089967181988565e-06, | |
| "loss": 4.7573, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 2.8883166313171387, | |
| "learning_rate": 6.300812675445417e-06, | |
| "loss": 4.7681, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 3.5398635864257812, | |
| "learning_rate": 6.292628632691977e-06, | |
| "loss": 4.7622, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 4.646157741546631, | |
| "learning_rate": 6.284444589938539e-06, | |
| "loss": 4.7675, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.9889692068099976, | |
| "learning_rate": 6.276260547185099e-06, | |
| "loss": 4.7616, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.8759746551513672, | |
| "learning_rate": 6.2680765044316595e-06, | |
| "loss": 4.7674, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.6846829652786255, | |
| "learning_rate": 6.259892461678221e-06, | |
| "loss": 4.762, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 0.9014168381690979, | |
| "learning_rate": 6.251708418924781e-06, | |
| "loss": 4.7671, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 2.2552223205566406, | |
| "learning_rate": 6.243524376171341e-06, | |
| "loss": 4.7665, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 7.994378566741943, | |
| "learning_rate": 6.235340333417902e-06, | |
| "loss": 4.7679, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.071326732635498, | |
| "learning_rate": 6.227156290664463e-06, | |
| "loss": 4.7621, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.366959810256958, | |
| "learning_rate": 6.218972247911024e-06, | |
| "loss": 4.7714, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.2230727672576904, | |
| "learning_rate": 6.210788205157584e-06, | |
| "loss": 4.7671, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 3.3939905166625977, | |
| "learning_rate": 6.202604162404144e-06, | |
| "loss": 4.7652, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.4360277652740479, | |
| "learning_rate": 6.194420119650706e-06, | |
| "loss": 4.7613, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 2.0407369136810303, | |
| "learning_rate": 6.186236076897266e-06, | |
| "loss": 4.7704, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.9247026443481445, | |
| "learning_rate": 6.178052034143827e-06, | |
| "loss": 4.7611, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.4636540412902832, | |
| "learning_rate": 6.169867991390388e-06, | |
| "loss": 4.7658, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.443965196609497, | |
| "learning_rate": 6.161683948636948e-06, | |
| "loss": 4.7674, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.4305676221847534, | |
| "learning_rate": 6.153499905883509e-06, | |
| "loss": 4.7653, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 0.9233379364013672, | |
| "learning_rate": 6.1453158631300694e-06, | |
| "loss": 4.7685, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 0.9005165100097656, | |
| "learning_rate": 6.1371318203766305e-06, | |
| "loss": 4.7605, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 2.1827316284179688, | |
| "learning_rate": 6.128947777623191e-06, | |
| "loss": 4.7639, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.330802083015442, | |
| "learning_rate": 6.120763734869751e-06, | |
| "loss": 4.765, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.0128445625305176, | |
| "learning_rate": 6.112579692116311e-06, | |
| "loss": 4.7704, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 3.6746978759765625, | |
| "learning_rate": 6.104395649362873e-06, | |
| "loss": 4.7645, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.546904444694519, | |
| "learning_rate": 6.0962116066094336e-06, | |
| "loss": 4.764, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 3.3686161041259766, | |
| "learning_rate": 6.088027563855994e-06, | |
| "loss": 4.7679, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.7452073097229004, | |
| "learning_rate": 6.079843521102555e-06, | |
| "loss": 4.7597, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.0575987100601196, | |
| "learning_rate": 6.071659478349115e-06, | |
| "loss": 4.7656, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 5.366837501525879, | |
| "learning_rate": 6.063475435595676e-06, | |
| "loss": 4.761, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 0.977851927280426, | |
| "learning_rate": 6.055291392842237e-06, | |
| "loss": 4.7644, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 4.891844749450684, | |
| "learning_rate": 6.047107350088798e-06, | |
| "loss": 4.7681, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.2509273290634155, | |
| "learning_rate": 6.038923307335358e-06, | |
| "loss": 4.7587, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.095798373222351, | |
| "learning_rate": 6.030739264581918e-06, | |
| "loss": 4.7617, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.093984842300415, | |
| "learning_rate": 6.0225552218284785e-06, | |
| "loss": 4.7629, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.8967887759208679, | |
| "learning_rate": 6.0143711790750405e-06, | |
| "loss": 4.7623, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.8838722705841064, | |
| "learning_rate": 6.006187136321601e-06, | |
| "loss": 4.7658, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.5173430442810059, | |
| "learning_rate": 5.998003093568161e-06, | |
| "loss": 4.7631, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.8584578037261963, | |
| "learning_rate": 5.989819050814722e-06, | |
| "loss": 4.7597, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.672813892364502, | |
| "learning_rate": 5.981635008061283e-06, | |
| "loss": 4.7614, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.2858943939208984, | |
| "learning_rate": 5.9734509653078435e-06, | |
| "loss": 4.7585, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 2.0808370113372803, | |
| "learning_rate": 5.965266922554404e-06, | |
| "loss": 4.7652, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.6658849716186523, | |
| "learning_rate": 5.957082879800965e-06, | |
| "loss": 4.7622, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.312129259109497, | |
| "learning_rate": 5.948898837047525e-06, | |
| "loss": 4.7643, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.109014868736267, | |
| "learning_rate": 5.940714794294085e-06, | |
| "loss": 4.7635, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.059754729270935, | |
| "learning_rate": 5.932530751540646e-06, | |
| "loss": 4.7635, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.4219303131103516, | |
| "learning_rate": 5.924346708787208e-06, | |
| "loss": 4.7644, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 4.515329360961914, | |
| "learning_rate": 5.916162666033768e-06, | |
| "loss": 4.764, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 2.7879600524902344, | |
| "learning_rate": 5.907978623280328e-06, | |
| "loss": 4.7629, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.4872534275054932, | |
| "learning_rate": 5.899794580526889e-06, | |
| "loss": 4.7627, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.7036062479019165, | |
| "learning_rate": 5.89161053777345e-06, | |
| "loss": 4.7602, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.1334501504898071, | |
| "learning_rate": 5.883426495020011e-06, | |
| "loss": 4.7649, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.8255513906478882, | |
| "learning_rate": 5.875242452266571e-06, | |
| "loss": 4.7662, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.9870157241821289, | |
| "learning_rate": 5.867058409513132e-06, | |
| "loss": 4.7613, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.7012158632278442, | |
| "learning_rate": 5.858874366759692e-06, | |
| "loss": 4.7609, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 0.9148879051208496, | |
| "learning_rate": 5.850690324006253e-06, | |
| "loss": 4.7601, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.1383265256881714, | |
| "learning_rate": 5.842506281252813e-06, | |
| "loss": 4.7583, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.8608391284942627, | |
| "learning_rate": 5.834322238499375e-06, | |
| "loss": 4.7639, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 2.019380807876587, | |
| "learning_rate": 5.826138195745935e-06, | |
| "loss": 4.7638, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 2.069756031036377, | |
| "learning_rate": 5.817954152992495e-06, | |
| "loss": 4.7691, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.2474431991577148, | |
| "learning_rate": 5.809770110239057e-06, | |
| "loss": 4.7668, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.1330208778381348, | |
| "learning_rate": 5.8015860674856176e-06, | |
| "loss": 4.7647, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.4908925294876099, | |
| "learning_rate": 5.793402024732178e-06, | |
| "loss": 4.759, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.1022288799285889, | |
| "learning_rate": 5.785217981978738e-06, | |
| "loss": 4.7583, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.2714922428131104, | |
| "learning_rate": 5.777033939225299e-06, | |
| "loss": 4.7619, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.3751145601272583, | |
| "learning_rate": 5.7688498964718595e-06, | |
| "loss": 4.7607, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.9955260753631592, | |
| "learning_rate": 5.76066585371842e-06, | |
| "loss": 4.7612, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.48429274559021, | |
| "learning_rate": 5.752481810964981e-06, | |
| "loss": 4.7631, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 3.2120578289031982, | |
| "learning_rate": 5.744297768211542e-06, | |
| "loss": 4.7652, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.1913766860961914, | |
| "learning_rate": 5.736113725458102e-06, | |
| "loss": 4.7651, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 3.3173420429229736, | |
| "learning_rate": 5.7279296827046625e-06, | |
| "loss": 4.7583, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.184640407562256, | |
| "learning_rate": 5.7197456399512245e-06, | |
| "loss": 4.7674, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.406989574432373, | |
| "learning_rate": 5.711561597197785e-06, | |
| "loss": 4.7624, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 4.420721530914307, | |
| "learning_rate": 5.703377554444345e-06, | |
| "loss": 4.7584, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.6438698768615723, | |
| "learning_rate": 5.695193511690905e-06, | |
| "loss": 4.759, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.0008363723754883, | |
| "learning_rate": 5.687009468937466e-06, | |
| "loss": 4.7622, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 0.9954501986503601, | |
| "learning_rate": 5.678825426184027e-06, | |
| "loss": 4.7599, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 2.2024335861206055, | |
| "learning_rate": 5.670641383430587e-06, | |
| "loss": 4.757, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 0.8690518140792847, | |
| "learning_rate": 5.662457340677148e-06, | |
| "loss": 4.7638, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 4.789288520812988, | |
| "learning_rate": 5.654273297923709e-06, | |
| "loss": 4.7651, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.9161509275436401, | |
| "learning_rate": 5.646089255170269e-06, | |
| "loss": 4.762, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.1791253089904785, | |
| "learning_rate": 5.63790521241683e-06, | |
| "loss": 4.7629, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 3.780832529067993, | |
| "learning_rate": 5.629721169663392e-06, | |
| "loss": 4.7553, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.1403292417526245, | |
| "learning_rate": 5.621537126909952e-06, | |
| "loss": 4.7615, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.2787580490112305, | |
| "learning_rate": 5.613353084156512e-06, | |
| "loss": 4.7623, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 2.774376153945923, | |
| "learning_rate": 5.6051690414030724e-06, | |
| "loss": 4.7626, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 3.4365766048431396, | |
| "learning_rate": 5.5969849986496336e-06, | |
| "loss": 4.7573, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.0744109153747559, | |
| "learning_rate": 5.588800955896194e-06, | |
| "loss": 4.7621, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.9712745547294617, | |
| "learning_rate": 5.580616913142755e-06, | |
| "loss": 4.7605, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.24153470993042, | |
| "learning_rate": 5.572432870389315e-06, | |
| "loss": 4.7654, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.9112986326217651, | |
| "learning_rate": 5.564248827635876e-06, | |
| "loss": 4.7666, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 2.2796552181243896, | |
| "learning_rate": 5.556064784882437e-06, | |
| "loss": 4.7594, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.3988897800445557, | |
| "learning_rate": 5.547880742128997e-06, | |
| "loss": 4.7596, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.2597512006759644, | |
| "learning_rate": 5.539696699375559e-06, | |
| "loss": 4.7648, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.735841989517212, | |
| "learning_rate": 5.531512656622119e-06, | |
| "loss": 4.766, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.2517529726028442, | |
| "learning_rate": 5.523328613868679e-06, | |
| "loss": 4.7585, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.7145378589630127, | |
| "learning_rate": 5.51514457111524e-06, | |
| "loss": 4.7575, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.1680995225906372, | |
| "learning_rate": 5.506960528361801e-06, | |
| "loss": 4.7611, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 3.5684611797332764, | |
| "learning_rate": 5.498776485608361e-06, | |
| "loss": 4.7579, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.0898152589797974, | |
| "learning_rate": 5.490592442854922e-06, | |
| "loss": 4.7587, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.0716261863708496, | |
| "learning_rate": 5.482408400101482e-06, | |
| "loss": 4.7605, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 2.373514413833618, | |
| "learning_rate": 5.4742243573480435e-06, | |
| "loss": 4.7626, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 3.0533201694488525, | |
| "learning_rate": 5.466040314594604e-06, | |
| "loss": 4.7601, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.9586790800094604, | |
| "learning_rate": 5.457856271841164e-06, | |
| "loss": 4.7607, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.8159968852996826, | |
| "learning_rate": 5.449672229087726e-06, | |
| "loss": 4.7613, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.0811960697174072, | |
| "learning_rate": 5.441488186334286e-06, | |
| "loss": 4.7613, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.0583518743515015, | |
| "learning_rate": 5.4333041435808465e-06, | |
| "loss": 4.7558, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.993607223033905, | |
| "learning_rate": 5.425120100827407e-06, | |
| "loss": 4.7577, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.3715596199035645, | |
| "learning_rate": 5.416936058073968e-06, | |
| "loss": 4.7565, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.3586755990982056, | |
| "learning_rate": 5.408752015320529e-06, | |
| "loss": 4.7627, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.5538294315338135, | |
| "learning_rate": 5.400567972567089e-06, | |
| "loss": 4.7616, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.3117858171463013, | |
| "learning_rate": 5.3923839298136495e-06, | |
| "loss": 4.7615, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 2.4825961589813232, | |
| "learning_rate": 5.384199887060211e-06, | |
| "loss": 4.7673, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 3.6540427207946777, | |
| "learning_rate": 5.376015844306771e-06, | |
| "loss": 4.7583, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.217731237411499, | |
| "learning_rate": 5.367831801553331e-06, | |
| "loss": 4.7647, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.3690531253814697, | |
| "learning_rate": 5.359647758799893e-06, | |
| "loss": 4.7607, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 2.986236572265625, | |
| "learning_rate": 5.351463716046453e-06, | |
| "loss": 4.7649, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 4.169172763824463, | |
| "learning_rate": 5.343279673293014e-06, | |
| "loss": 4.7609, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 3.941506862640381, | |
| "learning_rate": 5.335095630539574e-06, | |
| "loss": 4.7588, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.151626467704773, | |
| "learning_rate": 5.326911587786135e-06, | |
| "loss": 4.7626, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.1332154273986816, | |
| "learning_rate": 5.318727545032696e-06, | |
| "loss": 4.7638, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.0343974828720093, | |
| "learning_rate": 5.3105435022792564e-06, | |
| "loss": 4.7591, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.0772465467453003, | |
| "learning_rate": 5.302359459525817e-06, | |
| "loss": 4.7594, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 5.96359920501709, | |
| "learning_rate": 5.294175416772378e-06, | |
| "loss": 4.7597, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.451434850692749, | |
| "learning_rate": 5.285991374018938e-06, | |
| "loss": 4.7622, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 3.3634424209594727, | |
| "learning_rate": 5.277807331265498e-06, | |
| "loss": 4.7639, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.3753291368484497, | |
| "learning_rate": 5.2696232885120595e-06, | |
| "loss": 4.762, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.0764108896255493, | |
| "learning_rate": 5.261439245758621e-06, | |
| "loss": 4.7565, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.7822853326797485, | |
| "learning_rate": 5.253255203005181e-06, | |
| "loss": 4.76, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.0505635738372803, | |
| "learning_rate": 5.245071160251741e-06, | |
| "loss": 4.7563, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 2.2111964225769043, | |
| "learning_rate": 5.236887117498303e-06, | |
| "loss": 4.7613, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.0773766040802002, | |
| "learning_rate": 5.228703074744863e-06, | |
| "loss": 4.7645, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.387395977973938, | |
| "learning_rate": 5.220519031991424e-06, | |
| "loss": 4.7581, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.5407155752182007, | |
| "learning_rate": 5.212334989237984e-06, | |
| "loss": 4.7599, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.9045255184173584, | |
| "learning_rate": 5.204150946484545e-06, | |
| "loss": 4.7632, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.1140589714050293, | |
| "learning_rate": 5.195966903731105e-06, | |
| "loss": 4.758, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 3.0775272846221924, | |
| "learning_rate": 5.1877828609776655e-06, | |
| "loss": 4.7591, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.5358976125717163, | |
| "learning_rate": 5.179598818224227e-06, | |
| "loss": 4.7613, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 2.506425380706787, | |
| "learning_rate": 5.171414775470788e-06, | |
| "loss": 4.7584, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.562016248703003, | |
| "learning_rate": 5.163230732717348e-06, | |
| "loss": 4.7603, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 3.894599437713623, | |
| "learning_rate": 5.155046689963908e-06, | |
| "loss": 4.7582, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.7475073337554932, | |
| "learning_rate": 5.14686264721047e-06, | |
| "loss": 4.7552, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 2.168311357498169, | |
| "learning_rate": 5.1386786044570305e-06, | |
| "loss": 4.7546, | |
| "step": 59400 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.3866901397705078, | |
| "learning_rate": 5.130494561703591e-06, | |
| "loss": 4.7595, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.5141569375991821, | |
| "learning_rate": 5.122310518950151e-06, | |
| "loss": 4.7594, | |
| "step": 59600 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.22174072265625, | |
| "learning_rate": 5.114126476196712e-06, | |
| "loss": 4.7591, | |
| "step": 59700 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.8501654863357544, | |
| "learning_rate": 5.1059424334432724e-06, | |
| "loss": 4.7566, | |
| "step": 59800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.8938846588134766, | |
| "learning_rate": 5.0977583906898336e-06, | |
| "loss": 4.7601, | |
| "step": 59900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 3.8000481128692627, | |
| "learning_rate": 5.089574347936394e-06, | |
| "loss": 4.7645, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 2.3883254528045654, | |
| "learning_rate": 5.081390305182955e-06, | |
| "loss": 4.759, | |
| "step": 60100 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.5231989622116089, | |
| "learning_rate": 5.073206262429515e-06, | |
| "loss": 4.7591, | |
| "step": 60200 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 2.584988832473755, | |
| "learning_rate": 5.0650222196760755e-06, | |
| "loss": 4.7614, | |
| "step": 60300 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.4782294034957886, | |
| "learning_rate": 5.0568381769226374e-06, | |
| "loss": 4.7577, | |
| "step": 60400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.2520744800567627, | |
| "learning_rate": 5.048654134169198e-06, | |
| "loss": 4.7576, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.1555761098861694, | |
| "learning_rate": 5.040470091415758e-06, | |
| "loss": 4.7613, | |
| "step": 60600 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.0871673822402954, | |
| "learning_rate": 5.032286048662318e-06, | |
| "loss": 4.7534, | |
| "step": 60700 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.9198378324508667, | |
| "learning_rate": 5.024102005908879e-06, | |
| "loss": 4.7599, | |
| "step": 60800 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.8254669904708862, | |
| "learning_rate": 5.01591796315544e-06, | |
| "loss": 4.7577, | |
| "step": 60900 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 2.592374563217163, | |
| "learning_rate": 5.007733920402001e-06, | |
| "loss": 4.7596, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.4294451475143433, | |
| "learning_rate": 4.999549877648561e-06, | |
| "loss": 4.7587, | |
| "step": 61100 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.3959410190582275, | |
| "learning_rate": 4.991365834895122e-06, | |
| "loss": 4.7587, | |
| "step": 61200 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.02007257938385, | |
| "learning_rate": 4.983181792141682e-06, | |
| "loss": 4.7591, | |
| "step": 61300 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.9636856317520142, | |
| "learning_rate": 4.9749977493882435e-06, | |
| "loss": 4.7586, | |
| "step": 61400 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.2003675699234009, | |
| "learning_rate": 4.966813706634804e-06, | |
| "loss": 4.7578, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.2069259881973267, | |
| "learning_rate": 4.958629663881365e-06, | |
| "loss": 4.7594, | |
| "step": 61600 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.160438895225525, | |
| "learning_rate": 4.950445621127925e-06, | |
| "loss": 4.7537, | |
| "step": 61700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.4509081840515137, | |
| "learning_rate": 4.942261578374486e-06, | |
| "loss": 4.7566, | |
| "step": 61800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.4008055925369263, | |
| "learning_rate": 4.9340775356210465e-06, | |
| "loss": 4.7596, | |
| "step": 61900 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 2.445732593536377, | |
| "learning_rate": 4.925893492867608e-06, | |
| "loss": 4.7585, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 2.155773162841797, | |
| "learning_rate": 4.917709450114168e-06, | |
| "loss": 4.7594, | |
| "step": 62100 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.3300435543060303, | |
| "learning_rate": 4.909525407360728e-06, | |
| "loss": 4.7599, | |
| "step": 62200 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.1474816799163818, | |
| "learning_rate": 4.901341364607289e-06, | |
| "loss": 4.7575, | |
| "step": 62300 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.1090396642684937, | |
| "learning_rate": 4.8931573218538495e-06, | |
| "loss": 4.7556, | |
| "step": 62400 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.4400147199630737, | |
| "learning_rate": 4.884973279100411e-06, | |
| "loss": 4.7543, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.534568190574646, | |
| "learning_rate": 4.876789236346971e-06, | |
| "loss": 4.7542, | |
| "step": 62600 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.1177995204925537, | |
| "learning_rate": 4.868605193593532e-06, | |
| "loss": 4.7557, | |
| "step": 62700 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.2250655889511108, | |
| "learning_rate": 4.860421150840092e-06, | |
| "loss": 4.7588, | |
| "step": 62800 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.2332854270935059, | |
| "learning_rate": 4.852237108086653e-06, | |
| "loss": 4.7623, | |
| "step": 62900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.0365347862243652, | |
| "learning_rate": 4.844053065333214e-06, | |
| "loss": 4.7591, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 3.2265894412994385, | |
| "learning_rate": 4.835869022579775e-06, | |
| "loss": 4.7589, | |
| "step": 63100 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.374605417251587, | |
| "learning_rate": 4.827684979826335e-06, | |
| "loss": 4.7574, | |
| "step": 63200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.239890694618225, | |
| "learning_rate": 4.819500937072895e-06, | |
| "loss": 4.7622, | |
| "step": 63300 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 4.042061805725098, | |
| "learning_rate": 4.8113168943194564e-06, | |
| "loss": 4.7591, | |
| "step": 63400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 2.166978597640991, | |
| "learning_rate": 4.803132851566017e-06, | |
| "loss": 4.7594, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.0814965963363647, | |
| "learning_rate": 4.794948808812578e-06, | |
| "loss": 4.759, | |
| "step": 63600 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.7993803024291992, | |
| "learning_rate": 4.786764766059138e-06, | |
| "loss": 4.7574, | |
| "step": 63700 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.1397624015808105, | |
| "learning_rate": 4.778580723305699e-06, | |
| "loss": 4.7568, | |
| "step": 63800 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.801677942276001, | |
| "learning_rate": 4.7703966805522595e-06, | |
| "loss": 4.7561, | |
| "step": 63900 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.5364161729812622, | |
| "learning_rate": 4.762212637798821e-06, | |
| "loss": 4.7584, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.0341291427612305, | |
| "learning_rate": 4.754028595045381e-06, | |
| "loss": 4.7581, | |
| "step": 64100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.0642578601837158, | |
| "learning_rate": 4.745844552291942e-06, | |
| "loss": 4.7566, | |
| "step": 64200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.6422146558761597, | |
| "learning_rate": 4.737660509538502e-06, | |
| "loss": 4.7583, | |
| "step": 64300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.8048427104949951, | |
| "learning_rate": 4.7294764667850625e-06, | |
| "loss": 4.7575, | |
| "step": 64400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.5397706031799316, | |
| "learning_rate": 4.721292424031624e-06, | |
| "loss": 4.7603, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.1673585176467896, | |
| "learning_rate": 4.713108381278184e-06, | |
| "loss": 4.7603, | |
| "step": 64600 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.130509376525879, | |
| "learning_rate": 4.704924338524745e-06, | |
| "loss": 4.7582, | |
| "step": 64700 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.8829139471054077, | |
| "learning_rate": 4.696740295771305e-06, | |
| "loss": 4.7559, | |
| "step": 64800 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.014657974243164, | |
| "learning_rate": 4.688556253017866e-06, | |
| "loss": 4.7585, | |
| "step": 64900 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.3047457933425903, | |
| "learning_rate": 4.680372210264427e-06, | |
| "loss": 4.7582, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 5.515758514404297, | |
| "learning_rate": 4.672188167510988e-06, | |
| "loss": 4.7581, | |
| "step": 65100 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.061341643333435, | |
| "learning_rate": 4.664004124757548e-06, | |
| "loss": 4.7537, | |
| "step": 65200 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.1620299816131592, | |
| "learning_rate": 4.655820082004109e-06, | |
| "loss": 4.7589, | |
| "step": 65300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.9839982390403748, | |
| "learning_rate": 4.647636039250669e-06, | |
| "loss": 4.7602, | |
| "step": 65400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.1345555782318115, | |
| "learning_rate": 4.6394519964972305e-06, | |
| "loss": 4.7595, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.2723256349563599, | |
| "learning_rate": 4.631267953743791e-06, | |
| "loss": 4.7631, | |
| "step": 65600 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.2903733253479004, | |
| "learning_rate": 4.623083910990351e-06, | |
| "loss": 4.7572, | |
| "step": 65700 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.849879503250122, | |
| "learning_rate": 4.614899868236912e-06, | |
| "loss": 4.7536, | |
| "step": 65800 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.9856821298599243, | |
| "learning_rate": 4.6067158254834724e-06, | |
| "loss": 4.7566, | |
| "step": 65900 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.0516011714935303, | |
| "learning_rate": 4.5985317827300335e-06, | |
| "loss": 4.7577, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.0833971500396729, | |
| "learning_rate": 4.590347739976594e-06, | |
| "loss": 4.7553, | |
| "step": 66100 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 3.236478805541992, | |
| "learning_rate": 4.582163697223155e-06, | |
| "loss": 4.7524, | |
| "step": 66200 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 4.561278820037842, | |
| "learning_rate": 4.573979654469715e-06, | |
| "loss": 4.7598, | |
| "step": 66300 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.0862793922424316, | |
| "learning_rate": 4.565795611716276e-06, | |
| "loss": 4.756, | |
| "step": 66400 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.248744249343872, | |
| "learning_rate": 4.5576115689628366e-06, | |
| "loss": 4.7579, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.2721776962280273, | |
| "learning_rate": 4.549427526209398e-06, | |
| "loss": 4.7549, | |
| "step": 66600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.6902192831039429, | |
| "learning_rate": 4.541243483455958e-06, | |
| "loss": 4.7605, | |
| "step": 66700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 2.721341609954834, | |
| "learning_rate": 4.533059440702518e-06, | |
| "loss": 4.7619, | |
| "step": 66800 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.5549167394638062, | |
| "learning_rate": 4.524875397949079e-06, | |
| "loss": 4.7581, | |
| "step": 66900 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 4.686578273773193, | |
| "learning_rate": 4.51669135519564e-06, | |
| "loss": 4.757, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.2544666528701782, | |
| "learning_rate": 4.508507312442201e-06, | |
| "loss": 4.7614, | |
| "step": 67100 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.3165531158447266, | |
| "learning_rate": 4.500323269688761e-06, | |
| "loss": 4.7549, | |
| "step": 67200 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.4608168601989746, | |
| "learning_rate": 4.492139226935322e-06, | |
| "loss": 4.7553, | |
| "step": 67300 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.003299593925476, | |
| "learning_rate": 4.483955184181882e-06, | |
| "loss": 4.7638, | |
| "step": 67400 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.496551752090454, | |
| "learning_rate": 4.4757711414284435e-06, | |
| "loss": 4.759, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.3934059143066406, | |
| "learning_rate": 4.467587098675004e-06, | |
| "loss": 4.7566, | |
| "step": 67600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 2.459867238998413, | |
| "learning_rate": 4.459403055921565e-06, | |
| "loss": 4.7567, | |
| "step": 67700 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.2848294973373413, | |
| "learning_rate": 4.451219013168125e-06, | |
| "loss": 4.7562, | |
| "step": 67800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.5182512998580933, | |
| "learning_rate": 4.443034970414685e-06, | |
| "loss": 4.7595, | |
| "step": 67900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.2391725778579712, | |
| "learning_rate": 4.4348509276612465e-06, | |
| "loss": 4.7538, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 3.008521318435669, | |
| "learning_rate": 4.426666884907807e-06, | |
| "loss": 4.7568, | |
| "step": 68100 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.6599717140197754, | |
| "learning_rate": 4.418482842154368e-06, | |
| "loss": 4.7598, | |
| "step": 68200 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 2.2164254188537598, | |
| "learning_rate": 4.410298799400928e-06, | |
| "loss": 4.7545, | |
| "step": 68300 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 3.473665237426758, | |
| "learning_rate": 4.402114756647489e-06, | |
| "loss": 4.7601, | |
| "step": 68400 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.9182640314102173, | |
| "learning_rate": 4.3939307138940495e-06, | |
| "loss": 4.7559, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 2.2187399864196777, | |
| "learning_rate": 4.385746671140611e-06, | |
| "loss": 4.7611, | |
| "step": 68600 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 2.2415308952331543, | |
| "learning_rate": 4.377562628387171e-06, | |
| "loss": 4.7572, | |
| "step": 68700 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.0853921175003052, | |
| "learning_rate": 4.369378585633732e-06, | |
| "loss": 4.7522, | |
| "step": 68800 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 2.0470669269561768, | |
| "learning_rate": 4.361194542880292e-06, | |
| "loss": 4.7567, | |
| "step": 68900 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.5501480102539062, | |
| "learning_rate": 4.353010500126853e-06, | |
| "loss": 4.7548, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.0756503343582153, | |
| "learning_rate": 4.344826457373414e-06, | |
| "loss": 4.7556, | |
| "step": 69100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.0396485328674316, | |
| "learning_rate": 4.336642414619974e-06, | |
| "loss": 4.756, | |
| "step": 69200 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.5130740404129028, | |
| "learning_rate": 4.328458371866535e-06, | |
| "loss": 4.7538, | |
| "step": 69300 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.2191152572631836, | |
| "learning_rate": 4.320274329113095e-06, | |
| "loss": 4.7594, | |
| "step": 69400 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.1031177043914795, | |
| "learning_rate": 4.3120902863596564e-06, | |
| "loss": 4.7585, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 3.345165967941284, | |
| "learning_rate": 4.303906243606217e-06, | |
| "loss": 4.7601, | |
| "step": 69600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.058370590209961, | |
| "learning_rate": 4.295722200852778e-06, | |
| "loss": 4.7575, | |
| "step": 69700 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.364247441291809, | |
| "learning_rate": 4.287538158099338e-06, | |
| "loss": 4.7592, | |
| "step": 69800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 2.520071029663086, | |
| "learning_rate": 4.279354115345899e-06, | |
| "loss": 4.7597, | |
| "step": 69900 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.4218943119049072, | |
| "learning_rate": 4.2711700725924595e-06, | |
| "loss": 4.7525, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 2.9582276344299316, | |
| "learning_rate": 4.2629860298390206e-06, | |
| "loss": 4.7611, | |
| "step": 70100 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 2.1016061305999756, | |
| "learning_rate": 4.254801987085581e-06, | |
| "loss": 4.7557, | |
| "step": 70200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.1501710414886475, | |
| "learning_rate": 4.246617944332141e-06, | |
| "loss": 4.7582, | |
| "step": 70300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 2.3157947063446045, | |
| "learning_rate": 4.238433901578702e-06, | |
| "loss": 4.7574, | |
| "step": 70400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.0421010255813599, | |
| "learning_rate": 4.2302498588252625e-06, | |
| "loss": 4.7583, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.3601773977279663, | |
| "learning_rate": 4.222065816071824e-06, | |
| "loss": 4.7567, | |
| "step": 70600 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.1386362314224243, | |
| "learning_rate": 4.213881773318384e-06, | |
| "loss": 4.7535, | |
| "step": 70700 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.3439152240753174, | |
| "learning_rate": 4.205697730564945e-06, | |
| "loss": 4.7595, | |
| "step": 70800 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.9923715591430664, | |
| "learning_rate": 4.197513687811505e-06, | |
| "loss": 4.7561, | |
| "step": 70900 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.0728856325149536, | |
| "learning_rate": 4.189329645058066e-06, | |
| "loss": 4.7528, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.5504320859909058, | |
| "learning_rate": 4.181145602304627e-06, | |
| "loss": 4.7555, | |
| "step": 71100 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 3.476879358291626, | |
| "learning_rate": 4.172961559551188e-06, | |
| "loss": 4.7571, | |
| "step": 71200 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.4305684566497803, | |
| "learning_rate": 4.164777516797748e-06, | |
| "loss": 4.7501, | |
| "step": 71300 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.2255500555038452, | |
| "learning_rate": 4.156593474044308e-06, | |
| "loss": 4.7591, | |
| "step": 71400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 2.292752742767334, | |
| "learning_rate": 4.148409431290869e-06, | |
| "loss": 4.7576, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.9670140743255615, | |
| "learning_rate": 4.14022538853743e-06, | |
| "loss": 4.7605, | |
| "step": 71600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 2.035198450088501, | |
| "learning_rate": 4.132041345783991e-06, | |
| "loss": 4.7564, | |
| "step": 71700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.918428659439087, | |
| "learning_rate": 4.123857303030551e-06, | |
| "loss": 4.7538, | |
| "step": 71800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 4.245315074920654, | |
| "learning_rate": 4.115673260277112e-06, | |
| "loss": 4.7585, | |
| "step": 71900 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 3.4246652126312256, | |
| "learning_rate": 4.107489217523672e-06, | |
| "loss": 4.7507, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.2266836166381836, | |
| "learning_rate": 4.0993051747702335e-06, | |
| "loss": 4.7602, | |
| "step": 72100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.9559603929519653, | |
| "learning_rate": 4.091121132016794e-06, | |
| "loss": 4.7555, | |
| "step": 72200 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 2.2520241737365723, | |
| "learning_rate": 4.082937089263355e-06, | |
| "loss": 4.7551, | |
| "step": 72300 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 2.5236260890960693, | |
| "learning_rate": 4.074753046509915e-06, | |
| "loss": 4.7552, | |
| "step": 72400 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.0424671173095703, | |
| "learning_rate": 4.066569003756476e-06, | |
| "loss": 4.7548, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.1566205024719238, | |
| "learning_rate": 4.0583849610030366e-06, | |
| "loss": 4.7522, | |
| "step": 72600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.9585150480270386, | |
| "learning_rate": 4.050200918249597e-06, | |
| "loss": 4.7609, | |
| "step": 72700 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 6.500349044799805, | |
| "learning_rate": 4.042016875496158e-06, | |
| "loss": 4.7562, | |
| "step": 72800 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.1571673154830933, | |
| "learning_rate": 4.033832832742718e-06, | |
| "loss": 4.7571, | |
| "step": 72900 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.7180365324020386, | |
| "learning_rate": 4.025648789989279e-06, | |
| "loss": 4.7614, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.5343599319458008, | |
| "learning_rate": 4.01746474723584e-06, | |
| "loss": 4.7588, | |
| "step": 73100 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.5855658054351807, | |
| "learning_rate": 4.009280704482401e-06, | |
| "loss": 4.7603, | |
| "step": 73200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.0107240676879883, | |
| "learning_rate": 4.001096661728961e-06, | |
| "loss": 4.7576, | |
| "step": 73300 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.6505345106124878, | |
| "learning_rate": 3.992912618975522e-06, | |
| "loss": 4.7562, | |
| "step": 73400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.6212563514709473, | |
| "learning_rate": 3.984728576222082e-06, | |
| "loss": 4.7516, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.5305055379867554, | |
| "learning_rate": 3.9765445334686435e-06, | |
| "loss": 4.7533, | |
| "step": 73600 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 3.195974826812744, | |
| "learning_rate": 3.968360490715204e-06, | |
| "loss": 4.7509, | |
| "step": 73700 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.088273286819458, | |
| "learning_rate": 3.960176447961764e-06, | |
| "loss": 4.7559, | |
| "step": 73800 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 2.045375108718872, | |
| "learning_rate": 3.951992405208325e-06, | |
| "loss": 4.7516, | |
| "step": 73900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.5279570817947388, | |
| "learning_rate": 3.943808362454885e-06, | |
| "loss": 4.7566, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.987199306488037, | |
| "learning_rate": 3.9356243197014465e-06, | |
| "loss": 4.7531, | |
| "step": 74100 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.7594095468521118, | |
| "learning_rate": 3.927440276948007e-06, | |
| "loss": 4.7568, | |
| "step": 74200 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.9795931577682495, | |
| "learning_rate": 3.919256234194568e-06, | |
| "loss": 4.7606, | |
| "step": 74300 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.383016586303711, | |
| "learning_rate": 3.911072191441128e-06, | |
| "loss": 4.7559, | |
| "step": 74400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.4739179611206055, | |
| "learning_rate": 3.902888148687689e-06, | |
| "loss": 4.7553, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.1515618562698364, | |
| "learning_rate": 3.89470410593425e-06, | |
| "loss": 4.7608, | |
| "step": 74600 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.4380580186843872, | |
| "learning_rate": 3.886520063180811e-06, | |
| "loss": 4.7578, | |
| "step": 74700 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.187768578529358, | |
| "learning_rate": 3.878336020427371e-06, | |
| "loss": 4.7533, | |
| "step": 74800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 4.768670082092285, | |
| "learning_rate": 3.870151977673931e-06, | |
| "loss": 4.7573, | |
| "step": 74900 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.2797937393188477, | |
| "learning_rate": 3.861967934920492e-06, | |
| "loss": 4.7546, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 2.6686596870422363, | |
| "learning_rate": 3.8537838921670525e-06, | |
| "loss": 4.7562, | |
| "step": 75100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.777021646499634, | |
| "learning_rate": 3.845599849413614e-06, | |
| "loss": 4.7578, | |
| "step": 75200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.4774482250213623, | |
| "learning_rate": 3.837415806660174e-06, | |
| "loss": 4.7553, | |
| "step": 75300 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.154783010482788, | |
| "learning_rate": 3.829231763906735e-06, | |
| "loss": 4.7536, | |
| "step": 75400 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.1363816261291504, | |
| "learning_rate": 3.821047721153295e-06, | |
| "loss": 4.7553, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.314833402633667, | |
| "learning_rate": 3.8128636783998564e-06, | |
| "loss": 4.754, | |
| "step": 75600 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.0026655197143555, | |
| "learning_rate": 3.804679635646417e-06, | |
| "loss": 4.7564, | |
| "step": 75700 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 2.530662775039673, | |
| "learning_rate": 3.7964955928929774e-06, | |
| "loss": 4.7621, | |
| "step": 75800 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.7200578451156616, | |
| "learning_rate": 3.7883115501395385e-06, | |
| "loss": 4.7534, | |
| "step": 75900 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.1230708360671997, | |
| "learning_rate": 3.7801275073860988e-06, | |
| "loss": 4.7573, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.1518919467926025, | |
| "learning_rate": 3.77194346463266e-06, | |
| "loss": 4.756, | |
| "step": 76100 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 4.6440582275390625, | |
| "learning_rate": 3.76375942187922e-06, | |
| "loss": 4.7538, | |
| "step": 76200 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.2866283655166626, | |
| "learning_rate": 3.755575379125781e-06, | |
| "loss": 4.7572, | |
| "step": 76300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.0763148069381714, | |
| "learning_rate": 3.7473913363723415e-06, | |
| "loss": 4.7568, | |
| "step": 76400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.0883269309997559, | |
| "learning_rate": 3.739207293618902e-06, | |
| "loss": 4.7562, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.50298011302948, | |
| "learning_rate": 3.7310232508654625e-06, | |
| "loss": 4.7512, | |
| "step": 76600 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.144468069076538, | |
| "learning_rate": 3.7228392081120236e-06, | |
| "loss": 4.7571, | |
| "step": 76700 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.2953712940216064, | |
| "learning_rate": 3.7146551653585843e-06, | |
| "loss": 4.7502, | |
| "step": 76800 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 3.047788143157959, | |
| "learning_rate": 3.7064711226051445e-06, | |
| "loss": 4.7561, | |
| "step": 76900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.7507997751235962, | |
| "learning_rate": 3.6982870798517057e-06, | |
| "loss": 4.7547, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.189469814300537, | |
| "learning_rate": 3.690103037098266e-06, | |
| "loss": 4.7554, | |
| "step": 77100 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.2107117176055908, | |
| "learning_rate": 3.681918994344827e-06, | |
| "loss": 4.7544, | |
| "step": 77200 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.0349069833755493, | |
| "learning_rate": 3.6737349515913873e-06, | |
| "loss": 4.7603, | |
| "step": 77300 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.1641030311584473, | |
| "learning_rate": 3.665550908837948e-06, | |
| "loss": 4.7566, | |
| "step": 77400 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.1426669359207153, | |
| "learning_rate": 3.6573668660845087e-06, | |
| "loss": 4.7535, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.1960771083831787, | |
| "learning_rate": 3.6491828233310694e-06, | |
| "loss": 4.7559, | |
| "step": 77600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1451727151870728, | |
| "learning_rate": 3.6409987805776296e-06, | |
| "loss": 4.7591, | |
| "step": 77700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.3886533975601196, | |
| "learning_rate": 3.6328147378241908e-06, | |
| "loss": 4.7553, | |
| "step": 77800 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1393433809280396, | |
| "learning_rate": 3.6246306950707514e-06, | |
| "loss": 4.7581, | |
| "step": 77900 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.2284306287765503, | |
| "learning_rate": 3.616446652317312e-06, | |
| "loss": 4.7566, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.0621302127838135, | |
| "learning_rate": 3.608262609563873e-06, | |
| "loss": 4.7532, | |
| "step": 78100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1183658838272095, | |
| "learning_rate": 3.600078566810433e-06, | |
| "loss": 4.7583, | |
| "step": 78200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1688071489334106, | |
| "learning_rate": 3.591894524056994e-06, | |
| "loss": 4.7542, | |
| "step": 78300 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 3.1448984146118164, | |
| "learning_rate": 3.5837104813035545e-06, | |
| "loss": 4.7592, | |
| "step": 78400 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1687928438186646, | |
| "learning_rate": 3.5755264385501156e-06, | |
| "loss": 4.7553, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.1805286407470703, | |
| "learning_rate": 3.567342395796676e-06, | |
| "loss": 4.7544, | |
| "step": 78600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 2.4032955169677734, | |
| "learning_rate": 3.5591583530432365e-06, | |
| "loss": 4.7569, | |
| "step": 78700 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 3.784090757369995, | |
| "learning_rate": 3.550974310289797e-06, | |
| "loss": 4.7565, | |
| "step": 78800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.2469580173492432, | |
| "learning_rate": 3.542790267536358e-06, | |
| "loss": 4.7485, | |
| "step": 78900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 2.6100597381591797, | |
| "learning_rate": 3.5346062247829186e-06, | |
| "loss": 4.7561, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.4072147607803345, | |
| "learning_rate": 3.5264221820294793e-06, | |
| "loss": 4.7556, | |
| "step": 79100 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.2158293724060059, | |
| "learning_rate": 3.51823813927604e-06, | |
| "loss": 4.7503, | |
| "step": 79200 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.4874674081802368, | |
| "learning_rate": 3.5100540965226003e-06, | |
| "loss": 4.7547, | |
| "step": 79300 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.221482515335083, | |
| "learning_rate": 3.5018700537691614e-06, | |
| "loss": 4.7559, | |
| "step": 79400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.1589709520339966, | |
| "learning_rate": 3.4936860110157216e-06, | |
| "loss": 4.7545, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.2871575355529785, | |
| "learning_rate": 3.4855019682622828e-06, | |
| "loss": 4.7522, | |
| "step": 79600 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.7240387201309204, | |
| "learning_rate": 3.477317925508843e-06, | |
| "loss": 4.7566, | |
| "step": 79700 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 6.059484004974365, | |
| "learning_rate": 3.4691338827554037e-06, | |
| "loss": 4.7566, | |
| "step": 79800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.1639505624771118, | |
| "learning_rate": 3.4609498400019644e-06, | |
| "loss": 4.7521, | |
| "step": 79900 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.1786649227142334, | |
| "learning_rate": 3.452765797248525e-06, | |
| "loss": 4.7475, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.275763988494873, | |
| "learning_rate": 3.4445817544950862e-06, | |
| "loss": 4.7512, | |
| "step": 80100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.3286848068237305, | |
| "learning_rate": 3.4363977117416465e-06, | |
| "loss": 4.7573, | |
| "step": 80200 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.2990089654922485, | |
| "learning_rate": 3.428213668988207e-06, | |
| "loss": 4.752, | |
| "step": 80300 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.1798534393310547, | |
| "learning_rate": 3.4200296262347674e-06, | |
| "loss": 4.7566, | |
| "step": 80400 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 2.029482841491699, | |
| "learning_rate": 3.4118455834813286e-06, | |
| "loss": 4.7528, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.3646825551986694, | |
| "learning_rate": 3.403661540727889e-06, | |
| "loss": 4.7567, | |
| "step": 80600 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.1107501983642578, | |
| "learning_rate": 3.39547749797445e-06, | |
| "loss": 4.7518, | |
| "step": 80700 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.7238624095916748, | |
| "learning_rate": 3.38729345522101e-06, | |
| "loss": 4.7535, | |
| "step": 80800 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.2147496938705444, | |
| "learning_rate": 3.379109412467571e-06, | |
| "loss": 4.7529, | |
| "step": 80900 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.3453848361968994, | |
| "learning_rate": 3.3709253697141316e-06, | |
| "loss": 4.7549, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.3312301635742188, | |
| "learning_rate": 3.3627413269606923e-06, | |
| "loss": 4.7533, | |
| "step": 81100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.0629857778549194, | |
| "learning_rate": 3.3545572842072534e-06, | |
| "loss": 4.754, | |
| "step": 81200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.145863652229309, | |
| "learning_rate": 3.3463732414538136e-06, | |
| "loss": 4.7561, | |
| "step": 81300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.1477543115615845, | |
| "learning_rate": 3.3381891987003743e-06, | |
| "loss": 4.7546, | |
| "step": 81400 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 2.111903190612793, | |
| "learning_rate": 3.330005155946935e-06, | |
| "loss": 4.7523, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 2.4378135204315186, | |
| "learning_rate": 3.3218211131934957e-06, | |
| "loss": 4.7524, | |
| "step": 81600 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.0920718908309937, | |
| "learning_rate": 3.313637070440056e-06, | |
| "loss": 4.7579, | |
| "step": 81700 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.142166018486023, | |
| "learning_rate": 3.305453027686617e-06, | |
| "loss": 4.751, | |
| "step": 81800 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 2.497532844543457, | |
| "learning_rate": 3.2972689849331774e-06, | |
| "loss": 4.7587, | |
| "step": 81900 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.1811760663986206, | |
| "learning_rate": 3.2890849421797385e-06, | |
| "loss": 4.7565, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.4381294250488281, | |
| "learning_rate": 3.2809008994262987e-06, | |
| "loss": 4.7523, | |
| "step": 82100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.1105141639709473, | |
| "learning_rate": 3.2727168566728594e-06, | |
| "loss": 4.755, | |
| "step": 82200 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.0709772109985352, | |
| "learning_rate": 3.2645328139194206e-06, | |
| "loss": 4.7518, | |
| "step": 82300 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.3575836420059204, | |
| "learning_rate": 3.256348771165981e-06, | |
| "loss": 4.7538, | |
| "step": 82400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.1453856229782104, | |
| "learning_rate": 3.2481647284125415e-06, | |
| "loss": 4.7567, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.2590690851211548, | |
| "learning_rate": 3.239980685659102e-06, | |
| "loss": 4.7556, | |
| "step": 82600 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.3445689678192139, | |
| "learning_rate": 3.231796642905663e-06, | |
| "loss": 4.7529, | |
| "step": 82700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.1240034103393555, | |
| "learning_rate": 3.223612600152223e-06, | |
| "loss": 4.7595, | |
| "step": 82800 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.2913769483566284, | |
| "learning_rate": 3.2154285573987843e-06, | |
| "loss": 4.7523, | |
| "step": 82900 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.2136105298995972, | |
| "learning_rate": 3.2072445146453445e-06, | |
| "loss": 4.7548, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.0630725622177124, | |
| "learning_rate": 3.1990604718919057e-06, | |
| "loss": 4.7551, | |
| "step": 83100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.495082139968872, | |
| "learning_rate": 3.190876429138466e-06, | |
| "loss": 4.7553, | |
| "step": 83200 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.9895689487457275, | |
| "learning_rate": 3.1826923863850266e-06, | |
| "loss": 4.759, | |
| "step": 83300 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.4668093919754028, | |
| "learning_rate": 3.1745083436315877e-06, | |
| "loss": 4.7561, | |
| "step": 83400 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.5256825685501099, | |
| "learning_rate": 3.166324300878148e-06, | |
| "loss": 4.7573, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 3.0631277561187744, | |
| "learning_rate": 3.158140258124709e-06, | |
| "loss": 4.7543, | |
| "step": 83600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.171787977218628, | |
| "learning_rate": 3.1499562153712694e-06, | |
| "loss": 4.7508, | |
| "step": 83700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 7.035879611968994, | |
| "learning_rate": 3.14177217261783e-06, | |
| "loss": 4.7519, | |
| "step": 83800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.3109359741210938, | |
| "learning_rate": 3.1335881298643903e-06, | |
| "loss": 4.7573, | |
| "step": 83900 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.3658266067504883, | |
| "learning_rate": 3.1254040871109514e-06, | |
| "loss": 4.754, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.6524670124053955, | |
| "learning_rate": 3.1172200443575117e-06, | |
| "loss": 4.7549, | |
| "step": 84100 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.797340989112854, | |
| "learning_rate": 3.109036001604073e-06, | |
| "loss": 4.7499, | |
| "step": 84200 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 3.3878042697906494, | |
| "learning_rate": 3.100851958850633e-06, | |
| "loss": 4.7518, | |
| "step": 84300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.5656503438949585, | |
| "learning_rate": 3.0926679160971938e-06, | |
| "loss": 4.7588, | |
| "step": 84400 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.4081205129623413, | |
| "learning_rate": 3.084483873343755e-06, | |
| "loss": 4.7587, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 2.011707305908203, | |
| "learning_rate": 3.076299830590315e-06, | |
| "loss": 4.7525, | |
| "step": 84600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.1103359460830688, | |
| "learning_rate": 3.0681157878368763e-06, | |
| "loss": 4.7547, | |
| "step": 84700 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.331764578819275, | |
| "learning_rate": 3.0599317450834365e-06, | |
| "loss": 4.7544, | |
| "step": 84800 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.1731749773025513, | |
| "learning_rate": 3.0517477023299972e-06, | |
| "loss": 4.7527, | |
| "step": 84900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 2.4476029872894287, | |
| "learning_rate": 3.043563659576558e-06, | |
| "loss": 4.7529, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 2.6501026153564453, | |
| "learning_rate": 3.0353796168231186e-06, | |
| "loss": 4.7495, | |
| "step": 85100 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.4330114126205444, | |
| "learning_rate": 3.027195574069679e-06, | |
| "loss": 4.7551, | |
| "step": 85200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.794797420501709, | |
| "learning_rate": 3.01901153131624e-06, | |
| "loss": 4.7554, | |
| "step": 85300 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.1396609544754028, | |
| "learning_rate": 3.0108274885628003e-06, | |
| "loss": 4.7521, | |
| "step": 85400 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.5291541814804077, | |
| "learning_rate": 3.0026434458093614e-06, | |
| "loss": 4.7538, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.9390245676040649, | |
| "learning_rate": 2.9944594030559216e-06, | |
| "loss": 4.7499, | |
| "step": 85600 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 4.141879558563232, | |
| "learning_rate": 2.9862753603024823e-06, | |
| "loss": 4.7587, | |
| "step": 85700 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 2.151954412460327, | |
| "learning_rate": 2.9780913175490434e-06, | |
| "loss": 4.7525, | |
| "step": 85800 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.340173363685608, | |
| "learning_rate": 2.9699072747956037e-06, | |
| "loss": 4.7519, | |
| "step": 85900 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.9204574823379517, | |
| "learning_rate": 2.9617232320421644e-06, | |
| "loss": 4.7583, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.391839623451233, | |
| "learning_rate": 2.953539189288725e-06, | |
| "loss": 4.751, | |
| "step": 86100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.4064441919326782, | |
| "learning_rate": 2.9453551465352858e-06, | |
| "loss": 4.7506, | |
| "step": 86200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.2319107055664062, | |
| "learning_rate": 2.937171103781846e-06, | |
| "loss": 4.7551, | |
| "step": 86300 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 2.515320301055908, | |
| "learning_rate": 2.928987061028407e-06, | |
| "loss": 4.7517, | |
| "step": 86400 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 2.4007177352905273, | |
| "learning_rate": 2.9208030182749674e-06, | |
| "loss": 4.7513, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.4867286682128906, | |
| "learning_rate": 2.9126189755215285e-06, | |
| "loss": 4.7549, | |
| "step": 86600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.2570160627365112, | |
| "learning_rate": 2.904434932768089e-06, | |
| "loss": 4.753, | |
| "step": 86700 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 2.847069025039673, | |
| "learning_rate": 2.8962508900146495e-06, | |
| "loss": 4.7555, | |
| "step": 86800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.0997235774993896, | |
| "learning_rate": 2.8880668472612106e-06, | |
| "loss": 4.7532, | |
| "step": 86900 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.8394368886947632, | |
| "learning_rate": 2.879882804507771e-06, | |
| "loss": 4.7504, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 3.6865549087524414, | |
| "learning_rate": 2.871698761754332e-06, | |
| "loss": 4.7567, | |
| "step": 87100 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 3.022850275039673, | |
| "learning_rate": 2.8635147190008923e-06, | |
| "loss": 4.7509, | |
| "step": 87200 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.7531808614730835, | |
| "learning_rate": 2.855330676247453e-06, | |
| "loss": 4.7527, | |
| "step": 87300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 2.0469372272491455, | |
| "learning_rate": 2.8471466334940136e-06, | |
| "loss": 4.7564, | |
| "step": 87400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.4322601556777954, | |
| "learning_rate": 2.8389625907405743e-06, | |
| "loss": 4.7552, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.2034333944320679, | |
| "learning_rate": 2.8307785479871346e-06, | |
| "loss": 4.7555, | |
| "step": 87600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.0759299993515015, | |
| "learning_rate": 2.8225945052336957e-06, | |
| "loss": 4.7508, | |
| "step": 87700 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.1701573133468628, | |
| "learning_rate": 2.814410462480256e-06, | |
| "loss": 4.7535, | |
| "step": 87800 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.4818124771118164, | |
| "learning_rate": 2.8062264197268167e-06, | |
| "loss": 4.7528, | |
| "step": 87900 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.362298846244812, | |
| "learning_rate": 2.7980423769733778e-06, | |
| "loss": 4.7488, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.9951609969139099, | |
| "learning_rate": 2.789858334219938e-06, | |
| "loss": 4.7509, | |
| "step": 88100 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.2555766105651855, | |
| "learning_rate": 2.781674291466499e-06, | |
| "loss": 4.7559, | |
| "step": 88200 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.8623309135437012, | |
| "learning_rate": 2.7734902487130594e-06, | |
| "loss": 4.7489, | |
| "step": 88300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.2883721590042114, | |
| "learning_rate": 2.76530620595962e-06, | |
| "loss": 4.751, | |
| "step": 88400 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.1867636442184448, | |
| "learning_rate": 2.757122163206181e-06, | |
| "loss": 4.7524, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.4036273956298828, | |
| "learning_rate": 2.7489381204527415e-06, | |
| "loss": 4.755, | |
| "step": 88600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.2148162126541138, | |
| "learning_rate": 2.7407540776993018e-06, | |
| "loss": 4.7582, | |
| "step": 88700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 2.2214956283569336, | |
| "learning_rate": 2.732570034945863e-06, | |
| "loss": 4.7543, | |
| "step": 88800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.103264331817627, | |
| "learning_rate": 2.724385992192423e-06, | |
| "loss": 4.7468, | |
| "step": 88900 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.3318493366241455, | |
| "learning_rate": 2.7162019494389843e-06, | |
| "loss": 4.7547, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.7869521379470825, | |
| "learning_rate": 2.708017906685545e-06, | |
| "loss": 4.7528, | |
| "step": 89100 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.0730737447738647, | |
| "learning_rate": 2.6998338639321052e-06, | |
| "loss": 4.7554, | |
| "step": 89200 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.677322268486023, | |
| "learning_rate": 2.6916498211786663e-06, | |
| "loss": 4.7574, | |
| "step": 89300 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.7166889905929565, | |
| "learning_rate": 2.6834657784252266e-06, | |
| "loss": 4.7563, | |
| "step": 89400 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.3023245334625244, | |
| "learning_rate": 2.6752817356717877e-06, | |
| "loss": 4.7569, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.2815351486206055, | |
| "learning_rate": 2.667097692918348e-06, | |
| "loss": 4.7568, | |
| "step": 89600 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.1161749362945557, | |
| "learning_rate": 2.6589136501649087e-06, | |
| "loss": 4.7536, | |
| "step": 89700 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.4548900127410889, | |
| "learning_rate": 2.650729607411469e-06, | |
| "loss": 4.7566, | |
| "step": 89800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 8.324539184570312, | |
| "learning_rate": 2.64254556465803e-06, | |
| "loss": 4.7539, | |
| "step": 89900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 2.0228288173675537, | |
| "learning_rate": 2.6343615219045903e-06, | |
| "loss": 4.7514, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.1695142984390259, | |
| "learning_rate": 2.6261774791511514e-06, | |
| "loss": 4.7519, | |
| "step": 90100 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.6865144968032837, | |
| "learning_rate": 2.617993436397712e-06, | |
| "loss": 4.7557, | |
| "step": 90200 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.9601481556892395, | |
| "learning_rate": 2.6098093936442724e-06, | |
| "loss": 4.7518, | |
| "step": 90300 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.0379222631454468, | |
| "learning_rate": 2.6016253508908335e-06, | |
| "loss": 4.7521, | |
| "step": 90400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.6704763174057007, | |
| "learning_rate": 2.5934413081373938e-06, | |
| "loss": 4.7526, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.0544642210006714, | |
| "learning_rate": 2.585257265383955e-06, | |
| "loss": 4.7529, | |
| "step": 90600 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.2152049541473389, | |
| "learning_rate": 2.577073222630515e-06, | |
| "loss": 4.7557, | |
| "step": 90700 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.1299751996994019, | |
| "learning_rate": 2.568889179877076e-06, | |
| "loss": 4.7552, | |
| "step": 90800 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.3130440711975098, | |
| "learning_rate": 2.5607051371236365e-06, | |
| "loss": 4.7512, | |
| "step": 90900 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.1738765239715576, | |
| "learning_rate": 2.5525210943701972e-06, | |
| "loss": 4.7478, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.3825798034667969, | |
| "learning_rate": 2.5443370516167575e-06, | |
| "loss": 4.7545, | |
| "step": 91100 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.2850853204727173, | |
| "learning_rate": 2.5361530088633186e-06, | |
| "loss": 4.7546, | |
| "step": 91200 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.215085506439209, | |
| "learning_rate": 2.5279689661098793e-06, | |
| "loss": 4.7488, | |
| "step": 91300 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.4124336242675781, | |
| "learning_rate": 2.5197849233564396e-06, | |
| "loss": 4.7441, | |
| "step": 91400 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 2.5708861351013184, | |
| "learning_rate": 2.5116008806030007e-06, | |
| "loss": 4.7553, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.9249249696731567, | |
| "learning_rate": 2.503416837849561e-06, | |
| "loss": 4.7565, | |
| "step": 91600 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.1398611068725586, | |
| "learning_rate": 2.4952327950961216e-06, | |
| "loss": 4.7513, | |
| "step": 91700 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 2.037564516067505, | |
| "learning_rate": 2.4870487523426827e-06, | |
| "loss": 4.7517, | |
| "step": 91800 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.4297902584075928, | |
| "learning_rate": 2.478864709589243e-06, | |
| "loss": 4.7494, | |
| "step": 91900 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.369734764099121, | |
| "learning_rate": 2.4706806668358037e-06, | |
| "loss": 4.7511, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.1665796041488647, | |
| "learning_rate": 2.4624966240823644e-06, | |
| "loss": 4.7576, | |
| "step": 92100 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.085404396057129, | |
| "learning_rate": 2.454312581328925e-06, | |
| "loss": 4.7535, | |
| "step": 92200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 5.764316082000732, | |
| "learning_rate": 2.4461285385754858e-06, | |
| "loss": 4.7497, | |
| "step": 92300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.3492110967636108, | |
| "learning_rate": 2.4379444958220465e-06, | |
| "loss": 4.7558, | |
| "step": 92400 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.0760524272918701, | |
| "learning_rate": 2.429760453068607e-06, | |
| "loss": 4.7525, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.2596811056137085, | |
| "learning_rate": 2.4215764103151674e-06, | |
| "loss": 4.7532, | |
| "step": 92600 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.0836505889892578, | |
| "learning_rate": 2.413392367561728e-06, | |
| "loss": 4.7506, | |
| "step": 92700 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.759760618209839, | |
| "learning_rate": 2.405208324808289e-06, | |
| "loss": 4.7493, | |
| "step": 92800 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.3454488515853882, | |
| "learning_rate": 2.39702428205485e-06, | |
| "loss": 4.7539, | |
| "step": 92900 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.2812906503677368, | |
| "learning_rate": 2.3888402393014106e-06, | |
| "loss": 4.7509, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.247383952140808, | |
| "learning_rate": 2.380656196547971e-06, | |
| "loss": 4.7493, | |
| "step": 93100 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.803625226020813, | |
| "learning_rate": 2.3724721537945316e-06, | |
| "loss": 4.7527, | |
| "step": 93200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.4045066833496094, | |
| "learning_rate": 2.3642881110410922e-06, | |
| "loss": 4.7578, | |
| "step": 93300 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 2.0578811168670654, | |
| "learning_rate": 2.356104068287653e-06, | |
| "loss": 4.7539, | |
| "step": 93400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 2.907444477081299, | |
| "learning_rate": 2.3479200255342136e-06, | |
| "loss": 4.7534, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.3155359029769897, | |
| "learning_rate": 2.3397359827807743e-06, | |
| "loss": 4.7543, | |
| "step": 93600 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.2676302194595337, | |
| "learning_rate": 2.331551940027335e-06, | |
| "loss": 4.7501, | |
| "step": 93700 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.1166573762893677, | |
| "learning_rate": 2.3233678972738953e-06, | |
| "loss": 4.7518, | |
| "step": 93800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.3180181980133057, | |
| "learning_rate": 2.315183854520456e-06, | |
| "loss": 4.7548, | |
| "step": 93900 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 2.867478132247925, | |
| "learning_rate": 2.306999811767017e-06, | |
| "loss": 4.7551, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.3548469543457031, | |
| "learning_rate": 2.2988157690135778e-06, | |
| "loss": 4.7524, | |
| "step": 94100 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 3.94809627532959, | |
| "learning_rate": 2.290631726260138e-06, | |
| "loss": 4.7516, | |
| "step": 94200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.0845712423324585, | |
| "learning_rate": 2.2824476835066987e-06, | |
| "loss": 4.7549, | |
| "step": 94300 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.9430265426635742, | |
| "learning_rate": 2.2742636407532594e-06, | |
| "loss": 4.7552, | |
| "step": 94400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.1491626501083374, | |
| "learning_rate": 2.26607959799982e-06, | |
| "loss": 4.7533, | |
| "step": 94500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.323564887046814, | |
| "learning_rate": 2.257895555246381e-06, | |
| "loss": 4.7502, | |
| "step": 94600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.2415287494659424, | |
| "learning_rate": 2.2497115124929415e-06, | |
| "loss": 4.754, | |
| "step": 94700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.1996134519577026, | |
| "learning_rate": 2.241527469739502e-06, | |
| "loss": 4.7475, | |
| "step": 94800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.3265007734298706, | |
| "learning_rate": 2.2333434269860624e-06, | |
| "loss": 4.7504, | |
| "step": 94900 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.0656216144561768, | |
| "learning_rate": 2.225159384232623e-06, | |
| "loss": 4.7536, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.7077275514602661, | |
| "learning_rate": 2.2169753414791843e-06, | |
| "loss": 4.7559, | |
| "step": 95100 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.9614852070808411, | |
| "learning_rate": 2.208791298725745e-06, | |
| "loss": 4.753, | |
| "step": 95200 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.010793685913086, | |
| "learning_rate": 2.2006072559723056e-06, | |
| "loss": 4.7531, | |
| "step": 95300 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.7269645929336548, | |
| "learning_rate": 2.192423213218866e-06, | |
| "loss": 4.7525, | |
| "step": 95400 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.1839239597320557, | |
| "learning_rate": 2.1842391704654266e-06, | |
| "loss": 4.7516, | |
| "step": 95500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.0646226406097412, | |
| "learning_rate": 2.1760551277119873e-06, | |
| "loss": 4.7489, | |
| "step": 95600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.2255668640136719, | |
| "learning_rate": 2.167871084958548e-06, | |
| "loss": 4.7525, | |
| "step": 95700 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.5146337747573853, | |
| "learning_rate": 2.1596870422051087e-06, | |
| "loss": 4.7524, | |
| "step": 95800 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 2.578728437423706, | |
| "learning_rate": 2.1515029994516693e-06, | |
| "loss": 4.7537, | |
| "step": 95900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.3910084962844849, | |
| "learning_rate": 2.14331895669823e-06, | |
| "loss": 4.7557, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.6304432153701782, | |
| "learning_rate": 2.1351349139447903e-06, | |
| "loss": 4.7509, | |
| "step": 96100 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.6290279626846313, | |
| "learning_rate": 2.1269508711913514e-06, | |
| "loss": 4.7499, | |
| "step": 96200 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.312935471534729, | |
| "learning_rate": 2.118766828437912e-06, | |
| "loss": 4.7512, | |
| "step": 96300 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 2.8677687644958496, | |
| "learning_rate": 2.110582785684473e-06, | |
| "loss": 4.7507, | |
| "step": 96400 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 2.544320583343506, | |
| "learning_rate": 2.1023987429310335e-06, | |
| "loss": 4.7503, | |
| "step": 96500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 2.5052340030670166, | |
| "learning_rate": 2.0942147001775938e-06, | |
| "loss": 4.7543, | |
| "step": 96600 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 2.0886638164520264, | |
| "learning_rate": 2.0860306574241544e-06, | |
| "loss": 4.7513, | |
| "step": 96700 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.1290991306304932, | |
| "learning_rate": 2.077846614670715e-06, | |
| "loss": 4.7486, | |
| "step": 96800 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.968976616859436, | |
| "learning_rate": 2.069662571917276e-06, | |
| "loss": 4.7549, | |
| "step": 96900 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.1029621362686157, | |
| "learning_rate": 2.0614785291638365e-06, | |
| "loss": 4.7545, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.5654712915420532, | |
| "learning_rate": 2.053294486410397e-06, | |
| "loss": 4.7516, | |
| "step": 97100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.3423889875411987, | |
| "learning_rate": 2.045110443656958e-06, | |
| "loss": 4.7529, | |
| "step": 97200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.2194217443466187, | |
| "learning_rate": 2.0369264009035186e-06, | |
| "loss": 4.7503, | |
| "step": 97300 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.0679503679275513, | |
| "learning_rate": 2.0287423581500793e-06, | |
| "loss": 4.7515, | |
| "step": 97400 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.2756659984588623, | |
| "learning_rate": 2.02055831539664e-06, | |
| "loss": 4.752, | |
| "step": 97500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.315800428390503, | |
| "learning_rate": 2.0123742726432007e-06, | |
| "loss": 4.7554, | |
| "step": 97600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.2954620122909546, | |
| "learning_rate": 2.0041902298897614e-06, | |
| "loss": 4.7525, | |
| "step": 97700 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.1520215272903442, | |
| "learning_rate": 1.9960061871363216e-06, | |
| "loss": 4.7548, | |
| "step": 97800 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.7471413612365723, | |
| "learning_rate": 1.9878221443828823e-06, | |
| "loss": 4.7549, | |
| "step": 97900 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.0936230421066284, | |
| "learning_rate": 1.979638101629443e-06, | |
| "loss": 4.753, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.110677719116211, | |
| "learning_rate": 1.9714540588760037e-06, | |
| "loss": 4.7524, | |
| "step": 98100 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.094068169593811, | |
| "learning_rate": 1.9632700161225644e-06, | |
| "loss": 4.7508, | |
| "step": 98200 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.3435810804367065, | |
| "learning_rate": 1.955085973369125e-06, | |
| "loss": 4.7504, | |
| "step": 98300 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.7671642303466797, | |
| "learning_rate": 1.9469019306156858e-06, | |
| "loss": 4.7504, | |
| "step": 98400 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.0996792316436768, | |
| "learning_rate": 1.9387178878622464e-06, | |
| "loss": 4.7468, | |
| "step": 98500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.256888508796692, | |
| "learning_rate": 1.930533845108807e-06, | |
| "loss": 4.7501, | |
| "step": 98600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.4650033712387085, | |
| "learning_rate": 1.922349802355368e-06, | |
| "loss": 4.7493, | |
| "step": 98700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.3852843046188354, | |
| "learning_rate": 1.9141657596019285e-06, | |
| "loss": 4.7553, | |
| "step": 98800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.2050074338912964, | |
| "learning_rate": 1.905981716848489e-06, | |
| "loss": 4.7481, | |
| "step": 98900 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.955382823944092, | |
| "learning_rate": 1.8977976740950497e-06, | |
| "loss": 4.7498, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.6441978216171265, | |
| "learning_rate": 1.8896136313416102e-06, | |
| "loss": 4.7498, | |
| "step": 99100 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.577948808670044, | |
| "learning_rate": 1.8814295885881709e-06, | |
| "loss": 4.7552, | |
| "step": 99200 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.3677524328231812, | |
| "learning_rate": 1.8732455458347315e-06, | |
| "loss": 4.7555, | |
| "step": 99300 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.4369767904281616, | |
| "learning_rate": 1.8650615030812922e-06, | |
| "loss": 4.7499, | |
| "step": 99400 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 1.5186824798583984, | |
| "learning_rate": 1.8568774603278531e-06, | |
| "loss": 4.7539, | |
| "step": 99500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.2422914505004883, | |
| "learning_rate": 1.8486934175744136e-06, | |
| "loss": 4.7557, | |
| "step": 99600 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.3044426441192627, | |
| "learning_rate": 1.8405093748209743e-06, | |
| "loss": 4.7525, | |
| "step": 99700 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 3.3080742359161377, | |
| "learning_rate": 1.832325332067535e-06, | |
| "loss": 4.7509, | |
| "step": 99800 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.1785410642623901, | |
| "learning_rate": 1.8241412893140955e-06, | |
| "loss": 4.7519, | |
| "step": 99900 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.4587723016738892, | |
| "learning_rate": 1.8159572465606562e-06, | |
| "loss": 4.7484, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 3.0926313400268555, | |
| "learning_rate": 1.8077732038072169e-06, | |
| "loss": 4.7562, | |
| "step": 100100 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.2200719118118286, | |
| "learning_rate": 1.7995891610537775e-06, | |
| "loss": 4.7584, | |
| "step": 100200 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.3386414051055908, | |
| "learning_rate": 1.791405118300338e-06, | |
| "loss": 4.7458, | |
| "step": 100300 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.484997034072876, | |
| "learning_rate": 1.7832210755468987e-06, | |
| "loss": 4.7449, | |
| "step": 100400 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 2.519181489944458, | |
| "learning_rate": 1.7750370327934594e-06, | |
| "loss": 4.7529, | |
| "step": 100500 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 2.090131998062134, | |
| "learning_rate": 1.7668529900400199e-06, | |
| "loss": 4.756, | |
| "step": 100600 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.105173110961914, | |
| "learning_rate": 1.7586689472865808e-06, | |
| "loss": 4.754, | |
| "step": 100700 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.1731809377670288, | |
| "learning_rate": 1.7504849045331415e-06, | |
| "loss": 4.7607, | |
| "step": 100800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.1397889852523804, | |
| "learning_rate": 1.7423008617797022e-06, | |
| "loss": 4.7506, | |
| "step": 100900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.1067900657653809, | |
| "learning_rate": 1.7341168190262629e-06, | |
| "loss": 4.7521, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.0581586360931396, | |
| "learning_rate": 1.7259327762728233e-06, | |
| "loss": 4.7531, | |
| "step": 101100 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.9792087078094482, | |
| "learning_rate": 1.717748733519384e-06, | |
| "loss": 4.7541, | |
| "step": 101200 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.133318305015564, | |
| "learning_rate": 1.7095646907659447e-06, | |
| "loss": 4.7554, | |
| "step": 101300 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.241073489189148, | |
| "learning_rate": 1.7013806480125052e-06, | |
| "loss": 4.7493, | |
| "step": 101400 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.2004437446594238, | |
| "learning_rate": 1.6931966052590659e-06, | |
| "loss": 4.7515, | |
| "step": 101500 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.545440912246704, | |
| "learning_rate": 1.6850125625056266e-06, | |
| "loss": 4.7498, | |
| "step": 101600 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.2501575946807861, | |
| "learning_rate": 1.6768285197521873e-06, | |
| "loss": 4.7457, | |
| "step": 101700 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.2254008054733276, | |
| "learning_rate": 1.6686444769987482e-06, | |
| "loss": 4.754, | |
| "step": 101800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.8597551584243774, | |
| "learning_rate": 1.6604604342453086e-06, | |
| "loss": 4.7545, | |
| "step": 101900 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 1.5887017250061035, | |
| "learning_rate": 1.6522763914918693e-06, | |
| "loss": 4.7491, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.3773962259292603, | |
| "learning_rate": 1.64409234873843e-06, | |
| "loss": 4.753, | |
| "step": 102100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.1974895000457764, | |
| "learning_rate": 1.6359083059849907e-06, | |
| "loss": 4.7563, | |
| "step": 102200 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.3141651153564453, | |
| "learning_rate": 1.6277242632315512e-06, | |
| "loss": 4.7483, | |
| "step": 102300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 2.256546974182129, | |
| "learning_rate": 1.6195402204781119e-06, | |
| "loss": 4.747, | |
| "step": 102400 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 2.344313859939575, | |
| "learning_rate": 1.6113561777246726e-06, | |
| "loss": 4.7458, | |
| "step": 102500 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.533346176147461, | |
| "learning_rate": 1.603172134971233e-06, | |
| "loss": 4.7488, | |
| "step": 102600 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.1802254915237427, | |
| "learning_rate": 1.5949880922177937e-06, | |
| "loss": 4.7503, | |
| "step": 102700 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.1822803020477295, | |
| "learning_rate": 1.5868040494643544e-06, | |
| "loss": 4.7526, | |
| "step": 102800 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.3468492031097412, | |
| "learning_rate": 1.5786200067109153e-06, | |
| "loss": 4.7511, | |
| "step": 102900 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 4.000704765319824, | |
| "learning_rate": 1.570435963957476e-06, | |
| "loss": 4.754, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.139367699623108, | |
| "learning_rate": 1.5622519212040365e-06, | |
| "loss": 4.7488, | |
| "step": 103100 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 4.506742000579834, | |
| "learning_rate": 1.5540678784505972e-06, | |
| "loss": 4.7518, | |
| "step": 103200 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.9105794429779053, | |
| "learning_rate": 1.5458838356971579e-06, | |
| "loss": 4.751, | |
| "step": 103300 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.203366994857788, | |
| "learning_rate": 1.5376997929437184e-06, | |
| "loss": 4.7505, | |
| "step": 103400 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.2069025039672852, | |
| "learning_rate": 1.529515750190279e-06, | |
| "loss": 4.7502, | |
| "step": 103500 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.0046311616897583, | |
| "learning_rate": 1.5213317074368397e-06, | |
| "loss": 4.7522, | |
| "step": 103600 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 2.022199869155884, | |
| "learning_rate": 1.5131476646834004e-06, | |
| "loss": 4.7543, | |
| "step": 103700 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.0690085887908936, | |
| "learning_rate": 1.504963621929961e-06, | |
| "loss": 4.7497, | |
| "step": 103800 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.2978872060775757, | |
| "learning_rate": 1.4967795791765216e-06, | |
| "loss": 4.7511, | |
| "step": 103900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.110472321510315, | |
| "learning_rate": 1.4885955364230825e-06, | |
| "loss": 4.7504, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 2.129612684249878, | |
| "learning_rate": 1.4804114936696432e-06, | |
| "loss": 4.7538, | |
| "step": 104100 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.1347908973693848, | |
| "learning_rate": 1.4722274509162037e-06, | |
| "loss": 4.7535, | |
| "step": 104200 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.1420745849609375, | |
| "learning_rate": 1.4640434081627644e-06, | |
| "loss": 4.7491, | |
| "step": 104300 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 2.80501127243042, | |
| "learning_rate": 1.455859365409325e-06, | |
| "loss": 4.7507, | |
| "step": 104400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.399776816368103, | |
| "learning_rate": 1.4476753226558857e-06, | |
| "loss": 4.7523, | |
| "step": 104500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.2114007472991943, | |
| "learning_rate": 1.4394912799024462e-06, | |
| "loss": 4.7522, | |
| "step": 104600 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.078600525856018, | |
| "learning_rate": 1.431307237149007e-06, | |
| "loss": 4.7535, | |
| "step": 104700 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.3322091102600098, | |
| "learning_rate": 1.4231231943955676e-06, | |
| "loss": 4.7511, | |
| "step": 104800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.2436057329177856, | |
| "learning_rate": 1.4149391516421283e-06, | |
| "loss": 4.7501, | |
| "step": 104900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.163930058479309, | |
| "learning_rate": 1.4067551088886888e-06, | |
| "loss": 4.7533, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.1139936447143555, | |
| "learning_rate": 1.3985710661352497e-06, | |
| "loss": 4.7474, | |
| "step": 105100 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.704499363899231, | |
| "learning_rate": 1.3903870233818104e-06, | |
| "loss": 4.7524, | |
| "step": 105200 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.2708555459976196, | |
| "learning_rate": 1.382202980628371e-06, | |
| "loss": 4.7558, | |
| "step": 105300 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 2.6546807289123535, | |
| "learning_rate": 1.3740189378749315e-06, | |
| "loss": 4.7514, | |
| "step": 105400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.196606159210205, | |
| "learning_rate": 1.3658348951214922e-06, | |
| "loss": 4.7479, | |
| "step": 105500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 2.2983286380767822, | |
| "learning_rate": 1.357650852368053e-06, | |
| "loss": 4.7532, | |
| "step": 105600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.0857946872711182, | |
| "learning_rate": 1.3494668096146136e-06, | |
| "loss": 4.7531, | |
| "step": 105700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.606785535812378, | |
| "learning_rate": 1.341282766861174e-06, | |
| "loss": 4.7506, | |
| "step": 105800 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.9557284116744995, | |
| "learning_rate": 1.3330987241077348e-06, | |
| "loss": 4.7553, | |
| "step": 105900 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 2.19726824760437, | |
| "learning_rate": 1.3249146813542955e-06, | |
| "loss": 4.7524, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.0980935096740723, | |
| "learning_rate": 1.316730638600856e-06, | |
| "loss": 4.7515, | |
| "step": 106100 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.3451943397521973, | |
| "learning_rate": 1.3085465958474168e-06, | |
| "loss": 4.7547, | |
| "step": 106200 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.2886918783187866, | |
| "learning_rate": 1.3003625530939775e-06, | |
| "loss": 4.7505, | |
| "step": 106300 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.1479195356369019, | |
| "learning_rate": 1.2921785103405382e-06, | |
| "loss": 4.7538, | |
| "step": 106400 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.7975718975067139, | |
| "learning_rate": 1.283994467587099e-06, | |
| "loss": 4.7472, | |
| "step": 106500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.536818027496338, | |
| "learning_rate": 1.2758104248336594e-06, | |
| "loss": 4.7498, | |
| "step": 106600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.1835073232650757, | |
| "learning_rate": 1.26762638208022e-06, | |
| "loss": 4.751, | |
| "step": 106700 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.418748378753662, | |
| "learning_rate": 1.2594423393267808e-06, | |
| "loss": 4.7506, | |
| "step": 106800 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.7083241939544678, | |
| "learning_rate": 1.2512582965733413e-06, | |
| "loss": 4.757, | |
| "step": 106900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.9533259868621826, | |
| "learning_rate": 1.243074253819902e-06, | |
| "loss": 4.7491, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.549060344696045, | |
| "learning_rate": 1.2348902110664629e-06, | |
| "loss": 4.7502, | |
| "step": 107100 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.22414231300354, | |
| "learning_rate": 1.2267061683130233e-06, | |
| "loss": 4.7512, | |
| "step": 107200 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.5019216537475586, | |
| "learning_rate": 1.218522125559584e-06, | |
| "loss": 4.7551, | |
| "step": 107300 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.4612125158309937, | |
| "learning_rate": 1.2103380828061447e-06, | |
| "loss": 4.7517, | |
| "step": 107400 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.41428542137146, | |
| "learning_rate": 1.2021540400527052e-06, | |
| "loss": 4.7522, | |
| "step": 107500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.1158181428909302, | |
| "learning_rate": 1.193969997299266e-06, | |
| "loss": 4.7534, | |
| "step": 107600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.4456719160079956, | |
| "learning_rate": 1.1857859545458268e-06, | |
| "loss": 4.7515, | |
| "step": 107700 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 2.0484957695007324, | |
| "learning_rate": 1.1776019117923873e-06, | |
| "loss": 4.7475, | |
| "step": 107800 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.0839197635650635, | |
| "learning_rate": 1.169417869038948e-06, | |
| "loss": 4.7474, | |
| "step": 107900 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.4242494106292725, | |
| "learning_rate": 1.1612338262855086e-06, | |
| "loss": 4.7516, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.1142181158065796, | |
| "learning_rate": 1.1530497835320691e-06, | |
| "loss": 4.7513, | |
| "step": 108100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.2992855310440063, | |
| "learning_rate": 1.14486574077863e-06, | |
| "loss": 4.7497, | |
| "step": 108200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.1050403118133545, | |
| "learning_rate": 1.1366816980251905e-06, | |
| "loss": 4.7478, | |
| "step": 108300 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.6111624240875244, | |
| "learning_rate": 1.1284976552717512e-06, | |
| "loss": 4.7521, | |
| "step": 108400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.6379482746124268, | |
| "learning_rate": 1.1203136125183119e-06, | |
| "loss": 4.7456, | |
| "step": 108500 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.8351396322250366, | |
| "learning_rate": 1.1121295697648726e-06, | |
| "loss": 4.7518, | |
| "step": 108600 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.1721076965332031, | |
| "learning_rate": 1.1039455270114333e-06, | |
| "loss": 4.7557, | |
| "step": 108700 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.4993492364883423, | |
| "learning_rate": 1.095761484257994e-06, | |
| "loss": 4.7519, | |
| "step": 108800 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.1917214393615723, | |
| "learning_rate": 1.0875774415045544e-06, | |
| "loss": 4.748, | |
| "step": 108900 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.0404828786849976, | |
| "learning_rate": 1.0793933987511151e-06, | |
| "loss": 4.7565, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.5994240045547485, | |
| "learning_rate": 1.0712093559976758e-06, | |
| "loss": 4.7499, | |
| "step": 109100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.197583556175232, | |
| "learning_rate": 1.0630253132442365e-06, | |
| "loss": 4.7537, | |
| "step": 109200 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.6032483577728271, | |
| "learning_rate": 1.0548412704907972e-06, | |
| "loss": 4.7542, | |
| "step": 109300 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.39584481716156, | |
| "learning_rate": 1.0466572277373579e-06, | |
| "loss": 4.7493, | |
| "step": 109400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.410801649093628, | |
| "learning_rate": 1.0384731849839184e-06, | |
| "loss": 4.7559, | |
| "step": 109500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.246910810470581, | |
| "learning_rate": 1.030289142230479e-06, | |
| "loss": 4.751, | |
| "step": 109600 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 4.328908920288086, | |
| "learning_rate": 1.0221050994770397e-06, | |
| "loss": 4.7496, | |
| "step": 109700 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.5280972719192505, | |
| "learning_rate": 1.0139210567236004e-06, | |
| "loss": 4.7539, | |
| "step": 109800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.1216630935668945, | |
| "learning_rate": 1.0057370139701611e-06, | |
| "loss": 4.7523, | |
| "step": 109900 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.4128057956695557, | |
| "learning_rate": 9.975529712167218e-07, | |
| "loss": 4.7492, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 1.2564375400543213, | |
| "learning_rate": 9.893689284632823e-07, | |
| "loss": 4.754, | |
| "step": 110100 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.3144404888153076, | |
| "learning_rate": 9.81184885709843e-07, | |
| "loss": 4.7473, | |
| "step": 110200 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.9776962399482727, | |
| "learning_rate": 9.730008429564037e-07, | |
| "loss": 4.7479, | |
| "step": 110300 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.479701519012451, | |
| "learning_rate": 9.648168002029644e-07, | |
| "loss": 4.7514, | |
| "step": 110400 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 3.217172145843506, | |
| "learning_rate": 9.56632757449525e-07, | |
| "loss": 4.7535, | |
| "step": 110500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.103346824645996, | |
| "learning_rate": 9.484487146960856e-07, | |
| "loss": 4.7474, | |
| "step": 110600 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.1965771913528442, | |
| "learning_rate": 9.402646719426463e-07, | |
| "loss": 4.7475, | |
| "step": 110700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.2940635681152344, | |
| "learning_rate": 9.320806291892069e-07, | |
| "loss": 4.7484, | |
| "step": 110800 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.06132972240448, | |
| "learning_rate": 9.238965864357677e-07, | |
| "loss": 4.7547, | |
| "step": 110900 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.8715641498565674, | |
| "learning_rate": 9.157125436823283e-07, | |
| "loss": 4.749, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.1907116174697876, | |
| "learning_rate": 9.07528500928889e-07, | |
| "loss": 4.7539, | |
| "step": 111100 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.5867308378219604, | |
| "learning_rate": 8.993444581754496e-07, | |
| "loss": 4.7562, | |
| "step": 111200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.2849870920181274, | |
| "learning_rate": 8.911604154220103e-07, | |
| "loss": 4.7512, | |
| "step": 111300 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.3407094478607178, | |
| "learning_rate": 8.829763726685708e-07, | |
| "loss": 4.7543, | |
| "step": 111400 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.0691750049591064, | |
| "learning_rate": 8.747923299151316e-07, | |
| "loss": 4.7451, | |
| "step": 111500 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.0635693073272705, | |
| "learning_rate": 8.666082871616922e-07, | |
| "loss": 4.7521, | |
| "step": 111600 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.5273666381835938, | |
| "learning_rate": 8.584242444082529e-07, | |
| "loss": 4.7551, | |
| "step": 111700 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 1.7429158687591553, | |
| "learning_rate": 8.502402016548135e-07, | |
| "loss": 4.7544, | |
| "step": 111800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.0581636428833008, | |
| "learning_rate": 8.420561589013741e-07, | |
| "loss": 4.7532, | |
| "step": 111900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.3187443017959595, | |
| "learning_rate": 8.338721161479348e-07, | |
| "loss": 4.7551, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.2842453718185425, | |
| "learning_rate": 8.256880733944956e-07, | |
| "loss": 4.7482, | |
| "step": 112100 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.264115571975708, | |
| "learning_rate": 8.175040306410561e-07, | |
| "loss": 4.7475, | |
| "step": 112200 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.338619589805603, | |
| "learning_rate": 8.093199878876168e-07, | |
| "loss": 4.7465, | |
| "step": 112300 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.2081668376922607, | |
| "learning_rate": 8.011359451341774e-07, | |
| "loss": 4.75, | |
| "step": 112400 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.475716471672058, | |
| "learning_rate": 7.92951902380738e-07, | |
| "loss": 4.7475, | |
| "step": 112500 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.1391123533248901, | |
| "learning_rate": 7.847678596272988e-07, | |
| "loss": 4.7529, | |
| "step": 112600 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.5139949321746826, | |
| "learning_rate": 7.765838168738595e-07, | |
| "loss": 4.7457, | |
| "step": 112700 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.250877857208252, | |
| "learning_rate": 7.683997741204201e-07, | |
| "loss": 4.7512, | |
| "step": 112800 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.3660330772399902, | |
| "learning_rate": 7.602157313669807e-07, | |
| "loss": 4.7519, | |
| "step": 112900 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.3007818460464478, | |
| "learning_rate": 7.520316886135414e-07, | |
| "loss": 4.7539, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.1533290147781372, | |
| "learning_rate": 7.438476458601019e-07, | |
| "loss": 4.7522, | |
| "step": 113100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.1087945699691772, | |
| "learning_rate": 7.356636031066627e-07, | |
| "loss": 4.7549, | |
| "step": 113200 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.3991641998291016, | |
| "learning_rate": 7.274795603532233e-07, | |
| "loss": 4.7538, | |
| "step": 113300 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.820162296295166, | |
| "learning_rate": 7.19295517599784e-07, | |
| "loss": 4.7521, | |
| "step": 113400 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.1187483072280884, | |
| "learning_rate": 7.111114748463446e-07, | |
| "loss": 4.7577, | |
| "step": 113500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.0411303043365479, | |
| "learning_rate": 7.029274320929053e-07, | |
| "loss": 4.7514, | |
| "step": 113600 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.2369052171707153, | |
| "learning_rate": 6.947433893394661e-07, | |
| "loss": 4.7497, | |
| "step": 113700 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.505008578300476, | |
| "learning_rate": 6.865593465860267e-07, | |
| "loss": 4.7505, | |
| "step": 113800 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.2643870115280151, | |
| "learning_rate": 6.783753038325872e-07, | |
| "loss": 4.7499, | |
| "step": 113900 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.095914363861084, | |
| "learning_rate": 6.701912610791479e-07, | |
| "loss": 4.7478, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.4800920486450195, | |
| "learning_rate": 6.620072183257085e-07, | |
| "loss": 4.748, | |
| "step": 114100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.6267393827438354, | |
| "learning_rate": 6.538231755722692e-07, | |
| "loss": 4.7523, | |
| "step": 114200 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.7788121700286865, | |
| "learning_rate": 6.456391328188299e-07, | |
| "loss": 4.7548, | |
| "step": 114300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.2876346111297607, | |
| "learning_rate": 6.374550900653906e-07, | |
| "loss": 4.7555, | |
| "step": 114400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.0011918544769287, | |
| "learning_rate": 6.292710473119512e-07, | |
| "loss": 4.7487, | |
| "step": 114500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 2.096606731414795, | |
| "learning_rate": 6.210870045585119e-07, | |
| "loss": 4.7527, | |
| "step": 114600 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.2112175226211548, | |
| "learning_rate": 6.129029618050726e-07, | |
| "loss": 4.7482, | |
| "step": 114700 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.4160529375076294, | |
| "learning_rate": 6.047189190516331e-07, | |
| "loss": 4.7567, | |
| "step": 114800 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.36955988407135, | |
| "learning_rate": 5.965348762981938e-07, | |
| "loss": 4.7562, | |
| "step": 114900 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.577446460723877, | |
| "learning_rate": 5.883508335447545e-07, | |
| "loss": 4.7534, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.2918033599853516, | |
| "learning_rate": 5.801667907913151e-07, | |
| "loss": 4.7472, | |
| "step": 115100 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.07747220993042, | |
| "learning_rate": 5.719827480378758e-07, | |
| "loss": 4.7495, | |
| "step": 115200 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.4656786918640137, | |
| "learning_rate": 5.637987052844365e-07, | |
| "loss": 4.7501, | |
| "step": 115300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.2017009258270264, | |
| "learning_rate": 5.556146625309971e-07, | |
| "loss": 4.7482, | |
| "step": 115400 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.3578641414642334, | |
| "learning_rate": 5.474306197775578e-07, | |
| "loss": 4.7467, | |
| "step": 115500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.246904969215393, | |
| "learning_rate": 5.392465770241185e-07, | |
| "loss": 4.7564, | |
| "step": 115600 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.289212942123413, | |
| "learning_rate": 5.31062534270679e-07, | |
| "loss": 4.7516, | |
| "step": 115700 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 2.1466667652130127, | |
| "learning_rate": 5.228784915172397e-07, | |
| "loss": 4.7485, | |
| "step": 115800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.6222789287567139, | |
| "learning_rate": 5.146944487638004e-07, | |
| "loss": 4.7535, | |
| "step": 115900 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.2257126569747925, | |
| "learning_rate": 5.06510406010361e-07, | |
| "loss": 4.7537, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.0743142366409302, | |
| "learning_rate": 4.983263632569217e-07, | |
| "loss": 4.7468, | |
| "step": 116100 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.4326200485229492, | |
| "learning_rate": 4.901423205034824e-07, | |
| "loss": 4.7537, | |
| "step": 116200 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.513900876045227, | |
| "learning_rate": 4.81958277750043e-07, | |
| "loss": 4.7492, | |
| "step": 116300 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.3525514602661133, | |
| "learning_rate": 4.7377423499660366e-07, | |
| "loss": 4.7511, | |
| "step": 116400 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.0643374919891357, | |
| "learning_rate": 4.655901922431643e-07, | |
| "loss": 4.756, | |
| "step": 116500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.0973927974700928, | |
| "learning_rate": 4.57406149489725e-07, | |
| "loss": 4.7512, | |
| "step": 116600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.498604416847229, | |
| "learning_rate": 4.492221067362856e-07, | |
| "loss": 4.7534, | |
| "step": 116700 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 2.6713321208953857, | |
| "learning_rate": 4.4103806398284626e-07, | |
| "loss": 4.7497, | |
| "step": 116800 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.2505509853363037, | |
| "learning_rate": 4.3285402122940695e-07, | |
| "loss": 4.7551, | |
| "step": 116900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.1062592267990112, | |
| "learning_rate": 4.246699784759676e-07, | |
| "loss": 4.7536, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.4121407270431519, | |
| "learning_rate": 4.1648593572252823e-07, | |
| "loss": 4.7548, | |
| "step": 117100 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.2598930597305298, | |
| "learning_rate": 4.083018929690889e-07, | |
| "loss": 4.7559, | |
| "step": 117200 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.1028268337249756, | |
| "learning_rate": 4.0011785021564955e-07, | |
| "loss": 4.7495, | |
| "step": 117300 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.5532176494598389, | |
| "learning_rate": 3.919338074622102e-07, | |
| "loss": 4.7543, | |
| "step": 117400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.2546781301498413, | |
| "learning_rate": 3.837497647087709e-07, | |
| "loss": 4.7502, | |
| "step": 117500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.0731525421142578, | |
| "learning_rate": 3.755657219553315e-07, | |
| "loss": 4.7519, | |
| "step": 117600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.3289110660552979, | |
| "learning_rate": 3.6738167920189216e-07, | |
| "loss": 4.7495, | |
| "step": 117700 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.2810921669006348, | |
| "learning_rate": 3.5919763644845285e-07, | |
| "loss": 4.7522, | |
| "step": 117800 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.0300296545028687, | |
| "learning_rate": 3.510135936950135e-07, | |
| "loss": 4.7491, | |
| "step": 117900 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.9749176502227783, | |
| "learning_rate": 3.428295509415742e-07, | |
| "loss": 4.7558, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.6398601531982422, | |
| "learning_rate": 3.346455081881348e-07, | |
| "loss": 4.7489, | |
| "step": 118100 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.154733657836914, | |
| "learning_rate": 3.2646146543469545e-07, | |
| "loss": 4.747, | |
| "step": 118200 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.1068767309188843, | |
| "learning_rate": 3.1827742268125614e-07, | |
| "loss": 4.7532, | |
| "step": 118300 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.5782713890075684, | |
| "learning_rate": 3.100933799278168e-07, | |
| "loss": 4.7525, | |
| "step": 118400 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.2185344696044922, | |
| "learning_rate": 3.019093371743774e-07, | |
| "loss": 4.7507, | |
| "step": 118500 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.168750286102295, | |
| "learning_rate": 2.9372529442093805e-07, | |
| "loss": 4.7503, | |
| "step": 118600 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.3794721364974976, | |
| "learning_rate": 2.8554125166749874e-07, | |
| "loss": 4.7539, | |
| "step": 118700 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.0481081008911133, | |
| "learning_rate": 2.773572089140594e-07, | |
| "loss": 4.7493, | |
| "step": 118800 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.1850849390029907, | |
| "learning_rate": 2.6917316616062007e-07, | |
| "loss": 4.752, | |
| "step": 118900 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.3937416076660156, | |
| "learning_rate": 2.609891234071807e-07, | |
| "loss": 4.7518, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.0362590551376343, | |
| "learning_rate": 2.5280508065374134e-07, | |
| "loss": 4.7494, | |
| "step": 119100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.226879358291626, | |
| "learning_rate": 2.4462103790030203e-07, | |
| "loss": 4.7541, | |
| "step": 119200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.289158582687378, | |
| "learning_rate": 2.3643699514686265e-07, | |
| "loss": 4.7541, | |
| "step": 119300 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.2052675485610962, | |
| "learning_rate": 2.282529523934233e-07, | |
| "loss": 4.7494, | |
| "step": 119400 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.3836339712142944, | |
| "learning_rate": 2.2006890963998397e-07, | |
| "loss": 4.7501, | |
| "step": 119500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.075812578201294, | |
| "learning_rate": 2.1188486688654464e-07, | |
| "loss": 4.7466, | |
| "step": 119600 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.0450024604797363, | |
| "learning_rate": 2.0370082413310527e-07, | |
| "loss": 4.7513, | |
| "step": 119700 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.6995435953140259, | |
| "learning_rate": 1.9551678137966594e-07, | |
| "loss": 4.7544, | |
| "step": 119800 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 2.0586297512054443, | |
| "learning_rate": 1.873327386262266e-07, | |
| "loss": 4.7512, | |
| "step": 119900 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.734843134880066, | |
| "learning_rate": 1.7914869587278727e-07, | |
| "loss": 4.752, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.8307639360427856, | |
| "learning_rate": 1.709646531193479e-07, | |
| "loss": 4.7486, | |
| "step": 120100 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.0754849910736084, | |
| "learning_rate": 1.6278061036590857e-07, | |
| "loss": 4.7518, | |
| "step": 120200 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.8558402061462402, | |
| "learning_rate": 1.545965676124692e-07, | |
| "loss": 4.7496, | |
| "step": 120300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.3178366422653198, | |
| "learning_rate": 1.4641252485902987e-07, | |
| "loss": 4.7474, | |
| "step": 120400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.176018238067627, | |
| "learning_rate": 1.3822848210559053e-07, | |
| "loss": 4.7517, | |
| "step": 120500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.0331361293792725, | |
| "learning_rate": 1.300444393521512e-07, | |
| "loss": 4.7445, | |
| "step": 120600 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.2724260091781616, | |
| "learning_rate": 1.2186039659871183e-07, | |
| "loss": 4.7519, | |
| "step": 120700 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.1402854919433594, | |
| "learning_rate": 1.1367635384527251e-07, | |
| "loss": 4.7547, | |
| "step": 120800 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.6155171394348145, | |
| "learning_rate": 1.0549231109183315e-07, | |
| "loss": 4.7553, | |
| "step": 120900 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.3488671779632568, | |
| "learning_rate": 9.73082683383938e-08, | |
| "loss": 4.7489, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.4476072788238525, | |
| "learning_rate": 8.912422558495446e-08, | |
| "loss": 4.7531, | |
| "step": 121100 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.5881561040878296, | |
| "learning_rate": 8.094018283151511e-08, | |
| "loss": 4.7472, | |
| "step": 121200 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.3526599407196045, | |
| "learning_rate": 7.275614007807578e-08, | |
| "loss": 4.7507, | |
| "step": 121300 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.1709797382354736, | |
| "learning_rate": 6.457209732463643e-08, | |
| "loss": 4.7495, | |
| "step": 121400 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 1.1517142057418823, | |
| "learning_rate": 5.6388054571197084e-08, | |
| "loss": 4.7534, | |
| "step": 121500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.6636133193969727, | |
| "learning_rate": 4.820401181775774e-08, | |
| "loss": 4.7524, | |
| "step": 121600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.1265342235565186, | |
| "learning_rate": 4.001996906431839e-08, | |
| "loss": 4.749, | |
| "step": 121700 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.4785326719284058, | |
| "learning_rate": 3.183592631087905e-08, | |
| "loss": 4.7507, | |
| "step": 121800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.0491678714752197, | |
| "learning_rate": 2.3651883557439706e-08, | |
| "loss": 4.7502, | |
| "step": 121900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.162163257598877, | |
| "learning_rate": 1.5467840804000363e-08, | |
| "loss": 4.7493, | |
| "step": 122000 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 122189, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "total_flos": 1.4891090143551898e+18, | |
| "train_batch_size": 96, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |