Invalid JSON: Unexpected token 'N', ..."ad_norm": NaN,
"... is not valid JSON
| { | |
| "best_metric": 1.2792317867279053, | |
| "best_model_checkpoint": "saved_model/c2s_sep_2024/checkpoint-4606", | |
| "epoch": 2.9998371777476254, | |
| "eval_steps": 500, | |
| "global_step": 13818, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0, | |
| "loss": 77.1448, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 17.278156280517578, | |
| "learning_rate": 2.5e-06, | |
| "loss": 76.5629, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 15.856775283813477, | |
| "learning_rate": 7.000000000000001e-06, | |
| "loss": 75.6974, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 15.606675148010254, | |
| "learning_rate": 1.2e-05, | |
| "loss": 74.9514, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 12.968363761901855, | |
| "learning_rate": 1.7000000000000003e-05, | |
| "loss": 72.4643, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 13.329130172729492, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 69.0552, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "grad_norm": 18.156723022460938, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "loss": 64.2775, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 29.901222229003906, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 52.1897, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 27.163593292236328, | |
| "learning_rate": 3.65e-05, | |
| "loss": 30.5964, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 13.53585433959961, | |
| "learning_rate": 4.15e-05, | |
| "loss": 12.5007, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 7.8353095054626465, | |
| "learning_rate": 4.6500000000000005e-05, | |
| "loss": 6.4802, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 6.838261127471924, | |
| "learning_rate": 5.1500000000000005e-05, | |
| "loss": 4.7819, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 8.852176666259766, | |
| "learning_rate": 5.65e-05, | |
| "loss": 4.1049, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 7.614436149597168, | |
| "learning_rate": 6.15e-05, | |
| "loss": 3.7732, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 7.756160259246826, | |
| "learning_rate": 6.65e-05, | |
| "loss": 3.6324, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 6.736324310302734, | |
| "learning_rate": 7.15e-05, | |
| "loss": 3.4327, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "grad_norm": 8.393209457397461, | |
| "learning_rate": 7.65e-05, | |
| "loss": 3.4096, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.403553485870361, | |
| "learning_rate": 8.15e-05, | |
| "loss": 3.2845, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 5.367032051086426, | |
| "learning_rate": 8.65e-05, | |
| "loss": 3.2462, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 7.965042591094971, | |
| "learning_rate": 9.15e-05, | |
| "loss": 3.1463, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 7.074673175811768, | |
| "learning_rate": 9.65e-05, | |
| "loss": 3.1758, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.894763946533203, | |
| "learning_rate": 9.999345835150458e-05, | |
| "loss": 3.0311, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.925544738769531, | |
| "learning_rate": 9.997165285651984e-05, | |
| "loss": 3.0684, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.285668849945068, | |
| "learning_rate": 9.994984736153511e-05, | |
| "loss": 2.9234, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.81157922744751, | |
| "learning_rate": 9.992804186655037e-05, | |
| "loss": 2.8664, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 6.883147239685059, | |
| "learning_rate": 9.990623637156565e-05, | |
| "loss": 2.9204, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.505452632904053, | |
| "learning_rate": 9.988443087658091e-05, | |
| "loss": 2.8818, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 7.352786064147949, | |
| "learning_rate": 9.986262538159616e-05, | |
| "loss": 2.8999, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 6.875962734222412, | |
| "learning_rate": 9.984081988661144e-05, | |
| "loss": 2.8523, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 5.861810684204102, | |
| "learning_rate": 9.98190143916267e-05, | |
| "loss": 2.8062, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.396953582763672, | |
| "learning_rate": 9.979720889664196e-05, | |
| "loss": 2.7625, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 6.168801307678223, | |
| "learning_rate": 9.977540340165722e-05, | |
| "loss": 2.7063, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 4.478597640991211, | |
| "learning_rate": 9.975359790667249e-05, | |
| "loss": 2.6539, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.2905731201171875, | |
| "learning_rate": 9.973179241168775e-05, | |
| "loss": 2.7406, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "grad_norm": 5.451777935028076, | |
| "learning_rate": 9.970998691670301e-05, | |
| "loss": 2.6599, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 5.45026969909668, | |
| "learning_rate": 9.968818142171828e-05, | |
| "loss": 2.6406, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.353079795837402, | |
| "learning_rate": 9.966637592673354e-05, | |
| "loss": 2.5285, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.052408218383789, | |
| "learning_rate": 9.96445704317488e-05, | |
| "loss": 2.4025, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 4.303618431091309, | |
| "learning_rate": 9.962276493676407e-05, | |
| "loss": 2.2459, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 3.2505452632904053, | |
| "learning_rate": 9.960095944177933e-05, | |
| "loss": 2.166, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.806292772293091, | |
| "learning_rate": 9.95791539467946e-05, | |
| "loss": 2.0462, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.9824328422546387, | |
| "learning_rate": 9.955734845180987e-05, | |
| "loss": 1.9315, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.7355027198791504, | |
| "learning_rate": 9.953554295682512e-05, | |
| "loss": 1.9072, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 2.385045051574707, | |
| "learning_rate": 9.951373746184038e-05, | |
| "loss": 1.8667, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 2.4067020416259766, | |
| "learning_rate": 9.949193196685566e-05, | |
| "loss": 1.8179, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 2.6805872917175293, | |
| "learning_rate": 9.947012647187092e-05, | |
| "loss": 1.8208, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.9335626363754272, | |
| "learning_rate": 9.944832097688618e-05, | |
| "loss": 1.8092, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 1.7954732179641724, | |
| "learning_rate": 9.942651548190143e-05, | |
| "loss": 1.7698, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 2.2542481422424316, | |
| "learning_rate": 9.940470998691671e-05, | |
| "loss": 1.7359, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.8089336156845093, | |
| "learning_rate": 9.938290449193197e-05, | |
| "loss": 1.7195, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 2.3044662475585938, | |
| "learning_rate": 9.936109899694724e-05, | |
| "loss": 1.6901, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 1.8811343908309937, | |
| "learning_rate": 9.93392935019625e-05, | |
| "loss": 1.6757, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "grad_norm": 2.8750667572021484, | |
| "learning_rate": 9.931748800697776e-05, | |
| "loss": 1.6902, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.8759925365447998, | |
| "learning_rate": 9.929568251199303e-05, | |
| "loss": 1.6519, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.7360563278198242, | |
| "learning_rate": 9.927387701700829e-05, | |
| "loss": 1.6381, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.9994693994522095, | |
| "learning_rate": 9.925207152202356e-05, | |
| "loss": 1.6527, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.803330659866333, | |
| "learning_rate": 9.923026602703881e-05, | |
| "loss": 1.6453, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "grad_norm": 1.569846272468567, | |
| "learning_rate": 9.920846053205408e-05, | |
| "loss": 1.6689, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.5712964534759521, | |
| "learning_rate": 9.918665503706934e-05, | |
| "loss": 1.6512, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.643431544303894, | |
| "learning_rate": 9.916484954208462e-05, | |
| "loss": 1.5994, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.619866132736206, | |
| "learning_rate": 9.914304404709988e-05, | |
| "loss": 1.6212, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.8739800453186035, | |
| "learning_rate": 9.912123855211514e-05, | |
| "loss": 1.5664, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 1.9525455236434937, | |
| "learning_rate": 9.909943305713039e-05, | |
| "loss": 1.6108, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.5381406545639038, | |
| "learning_rate": 9.907762756214567e-05, | |
| "loss": 1.6004, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.5303971767425537, | |
| "learning_rate": 9.905582206716093e-05, | |
| "loss": 1.581, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.6467609405517578, | |
| "learning_rate": 9.90340165721762e-05, | |
| "loss": 1.5812, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 1.6094383001327515, | |
| "learning_rate": 9.901221107719146e-05, | |
| "loss": 1.6027, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.5612354278564453, | |
| "learning_rate": 9.899040558220672e-05, | |
| "loss": 1.5477, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.5925028324127197, | |
| "learning_rate": 9.896860008722198e-05, | |
| "loss": 1.5747, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.434138298034668, | |
| "learning_rate": 9.894679459223725e-05, | |
| "loss": 1.5528, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.6473920345306396, | |
| "learning_rate": 9.892498909725251e-05, | |
| "loss": 1.622, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 1.599965214729309, | |
| "learning_rate": 9.890318360226777e-05, | |
| "loss": 1.5691, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.6525471210479736, | |
| "learning_rate": 9.888137810728304e-05, | |
| "loss": 1.6131, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.5170183181762695, | |
| "learning_rate": 9.88595726122983e-05, | |
| "loss": 1.5221, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.596643328666687, | |
| "learning_rate": 9.883776711731358e-05, | |
| "loss": 1.545, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.5849794149398804, | |
| "learning_rate": 9.881596162232884e-05, | |
| "loss": 1.5654, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 1.4768157005310059, | |
| "learning_rate": 9.879415612734409e-05, | |
| "loss": 1.5345, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.5123172998428345, | |
| "learning_rate": 9.877235063235935e-05, | |
| "loss": 1.5236, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.5827418565750122, | |
| "learning_rate": 9.875054513737463e-05, | |
| "loss": 1.5174, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.8722275495529175, | |
| "learning_rate": 9.872873964238989e-05, | |
| "loss": 1.5256, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 1.6323179006576538, | |
| "learning_rate": 9.870693414740515e-05, | |
| "loss": 1.4835, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.618322491645813, | |
| "learning_rate": 9.868512865242042e-05, | |
| "loss": 1.5214, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.6474233865737915, | |
| "learning_rate": 9.866332315743568e-05, | |
| "loss": 1.4811, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.4305635690689087, | |
| "learning_rate": 9.864151766245094e-05, | |
| "loss": 1.4727, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.6656005382537842, | |
| "learning_rate": 9.86197121674662e-05, | |
| "loss": 1.5373, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 1.640834927558899, | |
| "learning_rate": 9.859790667248147e-05, | |
| "loss": 1.4811, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.474351167678833, | |
| "learning_rate": 9.857610117749673e-05, | |
| "loss": 1.4819, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.28626549243927, | |
| "learning_rate": 9.8554295682512e-05, | |
| "loss": 1.5221, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.313599944114685, | |
| "learning_rate": 9.853249018752726e-05, | |
| "loss": 1.5221, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 1.609924554824829, | |
| "learning_rate": 9.851068469254252e-05, | |
| "loss": 1.519, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.2516050338745117, | |
| "learning_rate": 9.84888791975578e-05, | |
| "loss": 1.4906, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.3122848272323608, | |
| "learning_rate": 9.846707370257305e-05, | |
| "loss": 1.5051, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.4828795194625854, | |
| "learning_rate": 9.844526820758831e-05, | |
| "loss": 1.5206, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.3761475086212158, | |
| "learning_rate": 9.842346271260357e-05, | |
| "loss": 1.503, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "grad_norm": 1.4912587404251099, | |
| "learning_rate": 9.840165721761885e-05, | |
| "loss": 1.4932, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.2759939432144165, | |
| "learning_rate": 9.837985172263411e-05, | |
| "loss": 1.4843, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.6568008661270142, | |
| "learning_rate": 9.835804622764938e-05, | |
| "loss": 1.5046, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.4292601346969604, | |
| "learning_rate": 9.833624073266463e-05, | |
| "loss": 1.5249, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.4866324663162231, | |
| "learning_rate": 9.83144352376799e-05, | |
| "loss": 1.4959, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 1.429203748703003, | |
| "learning_rate": 9.829262974269517e-05, | |
| "loss": 1.4725, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.3150511980056763, | |
| "learning_rate": 9.827082424771043e-05, | |
| "loss": 1.4644, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.2386242151260376, | |
| "learning_rate": 9.824901875272569e-05, | |
| "loss": 1.4956, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.74444580078125, | |
| "learning_rate": 9.822721325774095e-05, | |
| "loss": 1.4477, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "grad_norm": 1.21920907497406, | |
| "learning_rate": 9.820540776275622e-05, | |
| "loss": 1.5053, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.172884464263916, | |
| "learning_rate": 9.818360226777148e-05, | |
| "loss": 1.4478, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.3462252616882324, | |
| "learning_rate": 9.816179677278676e-05, | |
| "loss": 1.4749, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.230682373046875, | |
| "learning_rate": 9.8139991277802e-05, | |
| "loss": 1.4608, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.4852972030639648, | |
| "learning_rate": 9.811818578281727e-05, | |
| "loss": 1.5006, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 1.2698734998703003, | |
| "learning_rate": 9.809638028783253e-05, | |
| "loss": 1.4521, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.3210391998291016, | |
| "learning_rate": 9.807457479284781e-05, | |
| "loss": 1.4506, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.329473853111267, | |
| "learning_rate": 9.805276929786307e-05, | |
| "loss": 1.4587, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.185905933380127, | |
| "learning_rate": 9.803096380287832e-05, | |
| "loss": 1.439, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "grad_norm": 1.1401315927505493, | |
| "learning_rate": 9.800915830789358e-05, | |
| "loss": 1.4934, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.2437337636947632, | |
| "learning_rate": 9.798735281290886e-05, | |
| "loss": 1.4771, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.231963872909546, | |
| "learning_rate": 9.796554731792412e-05, | |
| "loss": 1.4428, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.274877905845642, | |
| "learning_rate": 9.794374182293939e-05, | |
| "loss": 1.4414, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.376755952835083, | |
| "learning_rate": 9.792193632795465e-05, | |
| "loss": 1.4366, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 1.0724767446517944, | |
| "learning_rate": 9.790013083296991e-05, | |
| "loss": 1.4817, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.3843764066696167, | |
| "learning_rate": 9.787832533798518e-05, | |
| "loss": 1.4986, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.327138900756836, | |
| "learning_rate": 9.785651984300044e-05, | |
| "loss": 1.4484, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.3678048849105835, | |
| "learning_rate": 9.78347143480157e-05, | |
| "loss": 1.454, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.4238979816436768, | |
| "learning_rate": 9.781290885303097e-05, | |
| "loss": 1.4491, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "grad_norm": 1.1681418418884277, | |
| "learning_rate": 9.779110335804623e-05, | |
| "loss": 1.4524, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.2097047567367554, | |
| "learning_rate": 9.776929786306149e-05, | |
| "loss": 1.4562, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.3048409223556519, | |
| "learning_rate": 9.774749236807677e-05, | |
| "loss": 1.4508, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.3852041959762573, | |
| "learning_rate": 9.772568687309203e-05, | |
| "loss": 1.4277, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 1.179715871810913, | |
| "learning_rate": 9.770388137810728e-05, | |
| "loss": 1.415, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.1659610271453857, | |
| "learning_rate": 9.768207588312254e-05, | |
| "loss": 1.4528, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.334057331085205, | |
| "learning_rate": 9.766027038813782e-05, | |
| "loss": 1.4525, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.5751981735229492, | |
| "learning_rate": 9.763846489315308e-05, | |
| "loss": 1.4427, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.1843003034591675, | |
| "learning_rate": 9.761665939816835e-05, | |
| "loss": 1.4427, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "grad_norm": 1.3135390281677246, | |
| "learning_rate": 9.759485390318361e-05, | |
| "loss": 1.4245, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.1618658304214478, | |
| "learning_rate": 9.757304840819887e-05, | |
| "loss": 1.4622, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.159295678138733, | |
| "learning_rate": 9.755124291321414e-05, | |
| "loss": 1.4557, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.209723949432373, | |
| "learning_rate": 9.75294374182294e-05, | |
| "loss": 1.41, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 1.2520672082901, | |
| "learning_rate": 9.750763192324466e-05, | |
| "loss": 1.4362, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.2639249563217163, | |
| "learning_rate": 9.748582642825992e-05, | |
| "loss": 1.4526, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.2657458782196045, | |
| "learning_rate": 9.746402093327519e-05, | |
| "loss": 1.4479, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.4267339706420898, | |
| "learning_rate": 9.744221543829045e-05, | |
| "loss": 1.4219, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.1722772121429443, | |
| "learning_rate": 9.742040994330571e-05, | |
| "loss": 1.448, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "grad_norm": 1.1443181037902832, | |
| "learning_rate": 9.739860444832099e-05, | |
| "loss": 1.4193, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.2879366874694824, | |
| "learning_rate": 9.737679895333624e-05, | |
| "loss": 1.4196, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.2243574857711792, | |
| "learning_rate": 9.73549934583515e-05, | |
| "loss": 1.4296, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.2071127891540527, | |
| "learning_rate": 9.733318796336677e-05, | |
| "loss": 1.4194, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.1925525665283203, | |
| "learning_rate": 9.731138246838204e-05, | |
| "loss": 1.4243, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 1.2962863445281982, | |
| "learning_rate": 9.72895769733973e-05, | |
| "loss": 1.4371, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.0177215337753296, | |
| "learning_rate": 9.726777147841255e-05, | |
| "loss": 1.4237, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.4175331592559814, | |
| "learning_rate": 9.724596598342783e-05, | |
| "loss": 1.4107, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.0958452224731445, | |
| "learning_rate": 9.72241604884431e-05, | |
| "loss": 1.4176, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 1.1612709760665894, | |
| "learning_rate": 9.720235499345836e-05, | |
| "loss": 1.4051, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.0781750679016113, | |
| "learning_rate": 9.718054949847362e-05, | |
| "loss": 1.4179, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.1481519937515259, | |
| "learning_rate": 9.715874400348888e-05, | |
| "loss": 1.4247, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.155716896057129, | |
| "learning_rate": 9.713693850850415e-05, | |
| "loss": 1.4268, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.0442588329315186, | |
| "learning_rate": 9.711513301351941e-05, | |
| "loss": 1.445, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 1.0979626178741455, | |
| "learning_rate": 9.709332751853467e-05, | |
| "loss": 1.4149, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.119378685951233, | |
| "learning_rate": 9.707152202354995e-05, | |
| "loss": 1.44, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.2214171886444092, | |
| "learning_rate": 9.70497165285652e-05, | |
| "loss": 1.44, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.1184163093566895, | |
| "learning_rate": 9.702791103358046e-05, | |
| "loss": 1.3981, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "grad_norm": 1.130410075187683, | |
| "learning_rate": 9.700610553859572e-05, | |
| "loss": 1.4296, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.1225483417510986, | |
| "learning_rate": 9.6984300043611e-05, | |
| "loss": 1.4153, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.0556180477142334, | |
| "learning_rate": 9.696249454862626e-05, | |
| "loss": 1.4219, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.2000679969787598, | |
| "learning_rate": 9.694068905364151e-05, | |
| "loss": 1.3892, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.0137077569961548, | |
| "learning_rate": 9.691888355865678e-05, | |
| "loss": 1.3976, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "grad_norm": 1.0124636888504028, | |
| "learning_rate": 9.689707806367205e-05, | |
| "loss": 1.4129, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.0647350549697876, | |
| "learning_rate": 9.687527256868732e-05, | |
| "loss": 1.357, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.0684030055999756, | |
| "learning_rate": 9.685346707370258e-05, | |
| "loss": 1.4082, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.0580588579177856, | |
| "learning_rate": 9.683166157871784e-05, | |
| "loss": 1.3959, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.1602911949157715, | |
| "learning_rate": 9.68098560837331e-05, | |
| "loss": 1.3857, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 1.1642051935195923, | |
| "learning_rate": 9.678805058874837e-05, | |
| "loss": 1.4055, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.0410170555114746, | |
| "learning_rate": 9.676624509376363e-05, | |
| "loss": 1.4071, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.067542314529419, | |
| "learning_rate": 9.674443959877891e-05, | |
| "loss": 1.4093, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.2621368169784546, | |
| "learning_rate": 9.672263410379416e-05, | |
| "loss": 1.3814, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "grad_norm": 1.0956709384918213, | |
| "learning_rate": 9.670082860880942e-05, | |
| "loss": 1.4024, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.1027687788009644, | |
| "learning_rate": 9.667902311382468e-05, | |
| "loss": 1.3544, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.1282079219818115, | |
| "learning_rate": 9.665721761883996e-05, | |
| "loss": 1.3818, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.244485855102539, | |
| "learning_rate": 9.663541212385522e-05, | |
| "loss": 1.4024, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.2329769134521484, | |
| "learning_rate": 9.661360662887047e-05, | |
| "loss": 1.413, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 1.2671635150909424, | |
| "learning_rate": 9.659180113388574e-05, | |
| "loss": 1.4002, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.2992949485778809, | |
| "learning_rate": 9.656999563890101e-05, | |
| "loss": 1.3972, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.15711510181427, | |
| "learning_rate": 9.654819014391628e-05, | |
| "loss": 1.3882, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.122938632965088, | |
| "learning_rate": 9.652638464893154e-05, | |
| "loss": 1.4222, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "grad_norm": 1.151628851890564, | |
| "learning_rate": 9.650457915394679e-05, | |
| "loss": 1.3898, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.0860607624053955, | |
| "learning_rate": 9.648277365896206e-05, | |
| "loss": 1.3745, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.9899650812149048, | |
| "learning_rate": 9.646096816397733e-05, | |
| "loss": 1.3985, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.019313097000122, | |
| "learning_rate": 9.643916266899259e-05, | |
| "loss": 1.4031, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.1719962358474731, | |
| "learning_rate": 9.641735717400785e-05, | |
| "loss": 1.3781, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 1.117961049079895, | |
| "learning_rate": 9.639555167902312e-05, | |
| "loss": 1.3885, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.3950169086456299, | |
| "learning_rate": 9.637374618403838e-05, | |
| "loss": 1.3746, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.1064496040344238, | |
| "learning_rate": 9.635194068905364e-05, | |
| "loss": 1.3764, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.174922227859497, | |
| "learning_rate": 9.63301351940689e-05, | |
| "loss": 1.42, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.3221770524978638, | |
| "learning_rate": 9.630832969908418e-05, | |
| "loss": 1.3712, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 1.0039620399475098, | |
| "learning_rate": 9.628652420409943e-05, | |
| "loss": 1.3976, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.9963878393173218, | |
| "learning_rate": 9.62647187091147e-05, | |
| "loss": 1.3977, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.2195067405700684, | |
| "learning_rate": 9.624291321412997e-05, | |
| "loss": 1.3847, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 1.0968499183654785, | |
| "learning_rate": 9.622110771914523e-05, | |
| "loss": 1.3937, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.992825448513031, | |
| "learning_rate": 9.61993022241605e-05, | |
| "loss": 1.4082, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.0395129919052124, | |
| "learning_rate": 9.617749672917575e-05, | |
| "loss": 1.3696, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.030629277229309, | |
| "learning_rate": 9.615569123419102e-05, | |
| "loss": 1.4, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.0580593347549438, | |
| "learning_rate": 9.613388573920629e-05, | |
| "loss": 1.3461, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.2588000297546387, | |
| "learning_rate": 9.611208024422155e-05, | |
| "loss": 1.3687, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "grad_norm": 1.1057671308517456, | |
| "learning_rate": 9.609027474923681e-05, | |
| "loss": 1.3876, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.1952061653137207, | |
| "learning_rate": 9.606846925425208e-05, | |
| "loss": 1.3821, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.105406641960144, | |
| "learning_rate": 9.604666375926734e-05, | |
| "loss": 1.375, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.0594791173934937, | |
| "learning_rate": 9.60248582642826e-05, | |
| "loss": 1.3644, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 1.055421233177185, | |
| "learning_rate": 9.600305276929787e-05, | |
| "loss": 1.3938, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.2545115947723389, | |
| "learning_rate": 9.598124727431314e-05, | |
| "loss": 1.3709, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 0.9864488244056702, | |
| "learning_rate": 9.595944177932839e-05, | |
| "loss": 1.3802, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.0537374019622803, | |
| "learning_rate": 9.593763628434365e-05, | |
| "loss": 1.3847, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.0474879741668701, | |
| "learning_rate": 9.591583078935892e-05, | |
| "loss": 1.3616, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 1.1384907960891724, | |
| "learning_rate": 9.58940252943742e-05, | |
| "loss": 1.3548, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.1582238674163818, | |
| "learning_rate": 9.587221979938946e-05, | |
| "loss": 1.374, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.1610651016235352, | |
| "learning_rate": 9.58504143044047e-05, | |
| "loss": 1.3726, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.0401073694229126, | |
| "learning_rate": 9.582860880941997e-05, | |
| "loss": 1.3617, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.1059417724609375, | |
| "learning_rate": 9.580680331443525e-05, | |
| "loss": 1.3765, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 1.055931806564331, | |
| "learning_rate": 9.578499781945051e-05, | |
| "loss": 1.377, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.1078617572784424, | |
| "learning_rate": 9.576319232446577e-05, | |
| "loss": 1.3714, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.0788148641586304, | |
| "learning_rate": 9.574138682948104e-05, | |
| "loss": 1.3769, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.1252089738845825, | |
| "learning_rate": 9.57195813344963e-05, | |
| "loss": 1.3583, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "grad_norm": 1.0174541473388672, | |
| "learning_rate": 9.569777583951156e-05, | |
| "loss": 1.3665, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.0689630508422852, | |
| "learning_rate": 9.567597034452682e-05, | |
| "loss": 1.3571, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.1311278343200684, | |
| "learning_rate": 9.565416484954209e-05, | |
| "loss": 1.3475, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.082227349281311, | |
| "learning_rate": 9.563235935455735e-05, | |
| "loss": 1.3952, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.116151213645935, | |
| "learning_rate": 9.561055385957261e-05, | |
| "loss": 1.3644, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 1.2500598430633545, | |
| "learning_rate": 9.558874836458788e-05, | |
| "loss": 1.3197, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.1783186197280884, | |
| "learning_rate": 9.556694286960315e-05, | |
| "loss": 1.3599, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.964650571346283, | |
| "learning_rate": 9.554513737461842e-05, | |
| "loss": 1.3765, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.1065633296966553, | |
| "learning_rate": 9.552333187963367e-05, | |
| "loss": 1.3605, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 1.4492055177688599, | |
| "learning_rate": 9.550152638464893e-05, | |
| "loss": 1.3766, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "grad_norm": 0.9989602565765381, | |
| "learning_rate": 9.54797208896642e-05, | |
| "loss": 1.3821, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.2991678714752197, | |
| "learning_rate": 9.545791539467947e-05, | |
| "loss": 1.3418, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.1501140594482422, | |
| "learning_rate": 9.543610989969473e-05, | |
| "loss": 1.3627, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.9911489486694336, | |
| "learning_rate": 9.541430440470998e-05, | |
| "loss": 1.3413, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 1.1046435832977295, | |
| "learning_rate": 9.539249890972526e-05, | |
| "loss": 1.3494, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.0511558055877686, | |
| "learning_rate": 9.537069341474052e-05, | |
| "loss": 1.3347, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.1485401391983032, | |
| "learning_rate": 9.534888791975578e-05, | |
| "loss": 1.3833, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.2908611297607422, | |
| "learning_rate": 9.532708242477105e-05, | |
| "loss": 1.3958, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.0557186603546143, | |
| "learning_rate": 9.530527692978631e-05, | |
| "loss": 1.3455, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "grad_norm": 1.0551774501800537, | |
| "learning_rate": 9.528347143480157e-05, | |
| "loss": 1.3366, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.0171273946762085, | |
| "learning_rate": 9.526166593981684e-05, | |
| "loss": 1.3488, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.3464566469192505, | |
| "learning_rate": 9.523986044483211e-05, | |
| "loss": 1.3274, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.1853042840957642, | |
| "learning_rate": 9.521805494984737e-05, | |
| "loss": 1.3553, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 1.2067043781280518, | |
| "learning_rate": 9.519624945486262e-05, | |
| "loss": 1.358, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.0003714561462402, | |
| "learning_rate": 9.517444395987789e-05, | |
| "loss": 1.3768, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.036536455154419, | |
| "learning_rate": 9.515263846489316e-05, | |
| "loss": 1.325, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.2333424091339111, | |
| "learning_rate": 9.513083296990843e-05, | |
| "loss": 1.3179, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 1.5285654067993164, | |
| "learning_rate": 9.510902747492369e-05, | |
| "loss": 1.3847, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "grad_norm": 0.9648860096931458, | |
| "learning_rate": 9.508722197993894e-05, | |
| "loss": 1.3624, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.0200995206832886, | |
| "learning_rate": 9.506541648495422e-05, | |
| "loss": 1.3604, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.0368491411209106, | |
| "learning_rate": 9.504361098996948e-05, | |
| "loss": 1.3778, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.9241245985031128, | |
| "learning_rate": 9.502180549498474e-05, | |
| "loss": 1.3751, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.0286930799484253, | |
| "learning_rate": 9.5e-05, | |
| "loss": 1.3429, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 1.262276530265808, | |
| "learning_rate": 9.497819450501527e-05, | |
| "loss": 1.3533, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.1345752477645874, | |
| "learning_rate": 9.495638901003053e-05, | |
| "loss": 1.3502, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.025653600692749, | |
| "learning_rate": 9.49345835150458e-05, | |
| "loss": 1.3674, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.0177459716796875, | |
| "learning_rate": 9.491277802006106e-05, | |
| "loss": 1.356, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "grad_norm": 1.1438894271850586, | |
| "learning_rate": 9.489097252507632e-05, | |
| "loss": 1.3488, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.133844017982483, | |
| "learning_rate": 9.486916703009158e-05, | |
| "loss": 1.3649, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.0228559970855713, | |
| "learning_rate": 9.484736153510685e-05, | |
| "loss": 1.3207, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.037307858467102, | |
| "learning_rate": 9.482555604012211e-05, | |
| "loss": 1.3517, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.123706340789795, | |
| "learning_rate": 9.480375054513739e-05, | |
| "loss": 1.371, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "grad_norm": 1.0684685707092285, | |
| "learning_rate": 9.478194505015265e-05, | |
| "loss": 1.335, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.9726172089576721, | |
| "learning_rate": 9.47601395551679e-05, | |
| "loss": 1.3588, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.8923851251602173, | |
| "learning_rate": 9.473833406018318e-05, | |
| "loss": 1.3269, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 1.1655867099761963, | |
| "learning_rate": 9.471652856519844e-05, | |
| "loss": 1.3267, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.9636451005935669, | |
| "learning_rate": 9.46947230702137e-05, | |
| "loss": 1.3545, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.1559605598449707, | |
| "learning_rate": 9.467291757522896e-05, | |
| "loss": 1.3276, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.1488990783691406, | |
| "learning_rate": 9.465111208024423e-05, | |
| "loss": 1.3312, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.0026187896728516, | |
| "learning_rate": 9.462930658525949e-05, | |
| "loss": 1.3574, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.0129337310791016, | |
| "learning_rate": 9.460750109027475e-05, | |
| "loss": 1.3524, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "grad_norm": 1.1561243534088135, | |
| "learning_rate": 9.458569559529002e-05, | |
| "loss": 1.3467, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.0476332902908325, | |
| "learning_rate": 9.456389010030528e-05, | |
| "loss": 1.3552, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.0199921131134033, | |
| "learning_rate": 9.454208460532054e-05, | |
| "loss": 1.3313, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.2194985151290894, | |
| "learning_rate": 9.45202791103358e-05, | |
| "loss": 1.3134, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 0.9112060070037842, | |
| "learning_rate": 9.449847361535107e-05, | |
| "loss": 1.3581, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 1.085046648979187, | |
| "learning_rate": 9.447666812036635e-05, | |
| "loss": 1.3344, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.0680015087127686, | |
| "learning_rate": 9.445486262538161e-05, | |
| "loss": 1.3227, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 0.9969652891159058, | |
| "learning_rate": 9.443305713039686e-05, | |
| "loss": 1.3324, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.0868465900421143, | |
| "learning_rate": 9.441125163541212e-05, | |
| "loss": 1.3261, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "grad_norm": 1.0380125045776367, | |
| "learning_rate": 9.43894461404274e-05, | |
| "loss": 1.3378, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 0.9851745367050171, | |
| "learning_rate": 9.436764064544266e-05, | |
| "loss": 1.3171, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 0.9909139275550842, | |
| "learning_rate": 9.434583515045792e-05, | |
| "loss": 1.3073, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.0225688219070435, | |
| "learning_rate": 9.432402965547317e-05, | |
| "loss": 1.3119, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.018894910812378, | |
| "learning_rate": 9.430222416048845e-05, | |
| "loss": 1.3337, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 1.0594004392623901, | |
| "learning_rate": 9.428041866550371e-05, | |
| "loss": 1.309, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 1.0812976360321045, | |
| "learning_rate": 9.425861317051898e-05, | |
| "loss": 1.3403, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.9586821794509888, | |
| "learning_rate": 9.423680767553424e-05, | |
| "loss": 1.3413, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.9033297896385193, | |
| "learning_rate": 9.42150021805495e-05, | |
| "loss": 1.3361, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "grad_norm": 0.976488471031189, | |
| "learning_rate": 9.419319668556476e-05, | |
| "loss": 1.3467, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 0.9687233567237854, | |
| "learning_rate": 9.417139119058003e-05, | |
| "loss": 1.3089, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 0.9967139959335327, | |
| "learning_rate": 9.41495856955953e-05, | |
| "loss": 1.3241, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 0.9404115676879883, | |
| "learning_rate": 9.412778020061055e-05, | |
| "loss": 1.3489, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 1.038221001625061, | |
| "learning_rate": 9.410597470562582e-05, | |
| "loss": 1.3405, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 0.9442505240440369, | |
| "learning_rate": 9.408416921064108e-05, | |
| "loss": 1.3733, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.8614059090614319, | |
| "learning_rate": 9.406236371565636e-05, | |
| "loss": 1.3369, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.0159504413604736, | |
| "learning_rate": 9.404055822067162e-05, | |
| "loss": 1.3473, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.9344844222068787, | |
| "learning_rate": 9.401875272568688e-05, | |
| "loss": 1.3191, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 0.9241899251937866, | |
| "learning_rate": 9.399694723070213e-05, | |
| "loss": 1.3074, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 1.0132297277450562, | |
| "learning_rate": 9.397514173571741e-05, | |
| "loss": 1.3345, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.035719633102417, | |
| "learning_rate": 9.395333624073267e-05, | |
| "loss": 1.3241, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.0716739892959595, | |
| "learning_rate": 9.393153074574793e-05, | |
| "loss": 1.3342, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.05617094039917, | |
| "learning_rate": 9.39097252507632e-05, | |
| "loss": 1.3174, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 1.0201910734176636, | |
| "learning_rate": 9.388791975577846e-05, | |
| "loss": 1.3427, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.9820442199707031, | |
| "learning_rate": 9.386611426079372e-05, | |
| "loss": 1.3187, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.9873951077461243, | |
| "learning_rate": 9.384430876580899e-05, | |
| "loss": 1.311, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.0694694519042969, | |
| "learning_rate": 9.382250327082425e-05, | |
| "loss": 1.3409, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 0.9933134317398071, | |
| "learning_rate": 9.380069777583951e-05, | |
| "loss": 1.3202, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "grad_norm": 1.0120593309402466, | |
| "learning_rate": 9.377889228085478e-05, | |
| "loss": 1.3243, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.0012543201446533, | |
| "learning_rate": 9.375708678587004e-05, | |
| "loss": 1.3205, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.9940156936645508, | |
| "learning_rate": 9.373528129088532e-05, | |
| "loss": 1.3319, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.9410566687583923, | |
| "learning_rate": 9.371347579590058e-05, | |
| "loss": 1.3377, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 1.0209511518478394, | |
| "learning_rate": 9.369167030091584e-05, | |
| "loss": 1.3226, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.0901682376861572, | |
| "learning_rate": 9.366986480593109e-05, | |
| "loss": 1.3054, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 1.1590335369110107, | |
| "learning_rate": 9.364805931094637e-05, | |
| "loss": 1.333, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.9248669147491455, | |
| "learning_rate": 9.362625381596163e-05, | |
| "loss": 1.3195, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.9178153276443481, | |
| "learning_rate": 9.36044483209769e-05, | |
| "loss": 1.3411, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 0.8997146487236023, | |
| "learning_rate": 9.358264282599216e-05, | |
| "loss": 1.3238, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 0.872699499130249, | |
| "learning_rate": 9.356083733100742e-05, | |
| "loss": 1.311, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.0057190656661987, | |
| "learning_rate": 9.353903183602268e-05, | |
| "loss": 1.3419, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 0.9421138763427734, | |
| "learning_rate": 9.351722634103795e-05, | |
| "loss": 1.3326, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 1.072662353515625, | |
| "learning_rate": 9.349542084605321e-05, | |
| "loss": 1.3101, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 0.9273852109909058, | |
| "learning_rate": 9.347361535106847e-05, | |
| "loss": 1.2917, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.056483507156372, | |
| "learning_rate": 9.345180985608373e-05, | |
| "loss": 1.3145, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.0562832355499268, | |
| "learning_rate": 9.3430004361099e-05, | |
| "loss": 1.3236, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 0.9665394425392151, | |
| "learning_rate": 9.340819886611426e-05, | |
| "loss": 1.3311, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "grad_norm": 1.1284903287887573, | |
| "learning_rate": 9.338639337112954e-05, | |
| "loss": 1.2955, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.8982547521591187, | |
| "learning_rate": 9.336458787614479e-05, | |
| "loss": 1.3064, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.9506440162658691, | |
| "learning_rate": 9.334278238116005e-05, | |
| "loss": 1.2924, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.990853488445282, | |
| "learning_rate": 9.332097688617531e-05, | |
| "loss": 1.3153, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 1.048412561416626, | |
| "learning_rate": 9.329917139119059e-05, | |
| "loss": 1.3151, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.9810274243354797, | |
| "learning_rate": 9.327736589620585e-05, | |
| "loss": 1.3106, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 1.2232158184051514, | |
| "learning_rate": 9.325556040122112e-05, | |
| "loss": 1.3269, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.9797046780586243, | |
| "learning_rate": 9.323375490623638e-05, | |
| "loss": 1.3237, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.9088875651359558, | |
| "learning_rate": 9.321194941125164e-05, | |
| "loss": 1.328, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 0.9865596294403076, | |
| "learning_rate": 9.31901439162669e-05, | |
| "loss": 1.3245, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.890883207321167, | |
| "learning_rate": 9.316833842128217e-05, | |
| "loss": 1.3078, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 1.2496368885040283, | |
| "learning_rate": 9.314653292629743e-05, | |
| "loss": 1.2926, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.9493234753608704, | |
| "learning_rate": 9.31247274313127e-05, | |
| "loss": 1.3267, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.9854113459587097, | |
| "learning_rate": 9.310292193632796e-05, | |
| "loss": 1.315, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.9487243294715881, | |
| "learning_rate": 9.308111644134322e-05, | |
| "loss": 1.3089, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.0045417547225952, | |
| "learning_rate": 9.30593109463585e-05, | |
| "loss": 1.3007, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.9876412749290466, | |
| "learning_rate": 9.303750545137375e-05, | |
| "loss": 1.3276, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 0.9821478724479675, | |
| "learning_rate": 9.301569995638901e-05, | |
| "loss": 1.3276, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.0079724788665771, | |
| "learning_rate": 9.299389446140427e-05, | |
| "loss": 1.3379, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "grad_norm": 1.0058810710906982, | |
| "learning_rate": 9.297208896641955e-05, | |
| "loss": 1.309, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.9457936882972717, | |
| "learning_rate": 9.295028347143481e-05, | |
| "loss": 1.3301, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.0582879781723022, | |
| "learning_rate": 9.292847797645007e-05, | |
| "loss": 1.3075, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.0312747955322266, | |
| "learning_rate": 9.290667248146532e-05, | |
| "loss": 1.3102, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 1.3287076950073242, | |
| "learning_rate": 9.28848669864806e-05, | |
| "loss": 1.2828, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 1.0003306865692139, | |
| "learning_rate": 9.286306149149586e-05, | |
| "loss": 1.3158, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.9804103970527649, | |
| "learning_rate": 9.284125599651113e-05, | |
| "loss": 1.3429, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.9052048325538635, | |
| "learning_rate": 9.281945050152639e-05, | |
| "loss": 1.3248, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.9492114782333374, | |
| "learning_rate": 9.279764500654165e-05, | |
| "loss": 1.3173, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "grad_norm": 0.9319648742675781, | |
| "learning_rate": 9.277583951155692e-05, | |
| "loss": 1.3188, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.9741306900978088, | |
| "learning_rate": 9.275403401657218e-05, | |
| "loss": 1.3263, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.9644444584846497, | |
| "learning_rate": 9.273222852158746e-05, | |
| "loss": 1.3089, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.972549319267273, | |
| "learning_rate": 9.27104230266027e-05, | |
| "loss": 1.3047, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 1.1472231149673462, | |
| "learning_rate": 9.268861753161797e-05, | |
| "loss": 1.3414, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.212759256362915, | |
| "learning_rate": 9.266681203663323e-05, | |
| "loss": 1.2955, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.9833585023880005, | |
| "learning_rate": 9.264500654164851e-05, | |
| "loss": 1.3101, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.0089327096939087, | |
| "learning_rate": 9.262320104666377e-05, | |
| "loss": 1.3078, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 1.026849627494812, | |
| "learning_rate": 9.260139555167902e-05, | |
| "loss": 1.3062, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "grad_norm": 0.8988268375396729, | |
| "learning_rate": 9.257959005669428e-05, | |
| "loss": 1.2961, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.0766083002090454, | |
| "learning_rate": 9.255778456170956e-05, | |
| "loss": 1.302, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 1.111632227897644, | |
| "learning_rate": 9.253597906672482e-05, | |
| "loss": 1.3179, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.9569946527481079, | |
| "learning_rate": 9.251417357174009e-05, | |
| "loss": 1.3392, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.9719332456588745, | |
| "learning_rate": 9.249236807675535e-05, | |
| "loss": 1.3019, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "grad_norm": 0.9521161317825317, | |
| "learning_rate": 9.247056258177061e-05, | |
| "loss": 1.3226, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.1349732875823975, | |
| "learning_rate": 9.244875708678587e-05, | |
| "loss": 1.3184, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.0802345275878906, | |
| "learning_rate": 9.242695159180114e-05, | |
| "loss": 1.3236, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.0327568054199219, | |
| "learning_rate": 9.24051460968164e-05, | |
| "loss": 1.3285, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 1.064948320388794, | |
| "learning_rate": 9.238334060183166e-05, | |
| "loss": 1.3158, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.88676518201828, | |
| "learning_rate": 9.236153510684693e-05, | |
| "loss": 1.3066, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.942152202129364, | |
| "learning_rate": 9.233972961186219e-05, | |
| "loss": 1.332, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.9341984987258911, | |
| "learning_rate": 9.231792411687745e-05, | |
| "loss": 1.3147, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.8915871381759644, | |
| "learning_rate": 9.229611862189273e-05, | |
| "loss": 1.3071, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 0.9265626668930054, | |
| "learning_rate": 9.227431312690798e-05, | |
| "loss": 1.3083, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.9003929495811462, | |
| "learning_rate": 9.225250763192324e-05, | |
| "loss": 1.3101, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.004757285118103, | |
| "learning_rate": 9.223070213693852e-05, | |
| "loss": 1.3324, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.9720560908317566, | |
| "learning_rate": 9.220889664195378e-05, | |
| "loss": 1.3074, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 1.0125725269317627, | |
| "learning_rate": 9.218709114696904e-05, | |
| "loss": 1.295, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.9948697686195374, | |
| "learning_rate": 9.21652856519843e-05, | |
| "loss": 1.3072, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 0.8904112577438354, | |
| "learning_rate": 9.214348015699957e-05, | |
| "loss": 1.2879, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 0.9827283620834351, | |
| "learning_rate": 9.212167466201483e-05, | |
| "loss": 1.2859, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 0.9134978652000427, | |
| "learning_rate": 9.20998691670301e-05, | |
| "loss": 1.2996, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "grad_norm": 0.9517325162887573, | |
| "learning_rate": 9.207806367204536e-05, | |
| "loss": 1.2764, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.9537093043327332, | |
| "learning_rate": 9.205625817706062e-05, | |
| "loss": 1.3112, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.011399269104004, | |
| "learning_rate": 9.203445268207589e-05, | |
| "loss": 1.3008, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 1.0325734615325928, | |
| "learning_rate": 9.201264718709115e-05, | |
| "loss": 1.3032, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.9590222239494324, | |
| "learning_rate": 9.199084169210641e-05, | |
| "loss": 1.3002, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.984958827495575, | |
| "learning_rate": 9.196903619712169e-05, | |
| "loss": 1.3011, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.1154364347457886, | |
| "learning_rate": 9.194723070213694e-05, | |
| "loss": 1.3065, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.0203578472137451, | |
| "learning_rate": 9.19254252071522e-05, | |
| "loss": 1.3193, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 1.0204946994781494, | |
| "learning_rate": 9.190361971216746e-05, | |
| "loss": 1.3048, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "grad_norm": 0.9758703708648682, | |
| "learning_rate": 9.188181421718274e-05, | |
| "loss": 1.2933, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.0854405164718628, | |
| "learning_rate": 9.1860008722198e-05, | |
| "loss": 1.2947, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.0030591487884521, | |
| "learning_rate": 9.183820322721325e-05, | |
| "loss": 1.2882, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.9652947187423706, | |
| "learning_rate": 9.181639773222852e-05, | |
| "loss": 1.2779, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.0450283288955688, | |
| "learning_rate": 9.179459223724379e-05, | |
| "loss": 1.2807, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 1.0894801616668701, | |
| "learning_rate": 9.177278674225906e-05, | |
| "loss": 1.3072, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.0392231941223145, | |
| "learning_rate": 9.175098124727432e-05, | |
| "loss": 1.3119, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 0.9792558550834656, | |
| "learning_rate": 9.172917575228958e-05, | |
| "loss": 1.3062, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.015689492225647, | |
| "learning_rate": 9.170737025730485e-05, | |
| "loss": 1.3075, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 1.0359702110290527, | |
| "learning_rate": 9.168556476232011e-05, | |
| "loss": 1.3022, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "grad_norm": 0.9113004803657532, | |
| "learning_rate": 9.166375926733537e-05, | |
| "loss": 1.3298, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.0571136474609375, | |
| "learning_rate": 9.164195377235065e-05, | |
| "loss": 1.2898, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.9297426342964172, | |
| "learning_rate": 9.16201482773659e-05, | |
| "loss": 1.2895, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 1.0925400257110596, | |
| "learning_rate": 9.159834278238116e-05, | |
| "loss": 1.2998, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.9070808291435242, | |
| "learning_rate": 9.157653728739642e-05, | |
| "loss": 1.2998, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.1315734386444092, | |
| "learning_rate": 9.15547317924117e-05, | |
| "loss": 1.2867, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.0597316026687622, | |
| "learning_rate": 9.153292629742696e-05, | |
| "loss": 1.2931, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 0.9442005157470703, | |
| "learning_rate": 9.151112080244221e-05, | |
| "loss": 1.2805, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 1.3041001558303833, | |
| "learning_rate": 9.148931530745748e-05, | |
| "loss": 1.2934, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "grad_norm": 0.9306684136390686, | |
| "learning_rate": 9.146750981247275e-05, | |
| "loss": 1.2933, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.9480651021003723, | |
| "learning_rate": 9.144570431748802e-05, | |
| "loss": 1.3147, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.98679119348526, | |
| "learning_rate": 9.142389882250328e-05, | |
| "loss": 1.3063, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.9486891627311707, | |
| "learning_rate": 9.140209332751853e-05, | |
| "loss": 1.2644, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.9325621724128723, | |
| "learning_rate": 9.13802878325338e-05, | |
| "loss": 1.2718, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.9871125221252441, | |
| "learning_rate": 9.135848233754907e-05, | |
| "loss": 1.2943, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.9043755531311035, | |
| "learning_rate": 9.133667684256433e-05, | |
| "loss": 1.3015, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.9878096580505371, | |
| "learning_rate": 9.13148713475796e-05, | |
| "loss": 1.2524, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.925841748714447, | |
| "learning_rate": 9.129306585259486e-05, | |
| "loss": 1.2881, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "grad_norm": 0.8888818025588989, | |
| "learning_rate": 9.127126035761012e-05, | |
| "loss": 1.3057, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.1273852586746216, | |
| "learning_rate": 9.124945486262538e-05, | |
| "loss": 1.3068, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.078979253768921, | |
| "learning_rate": 9.122764936764066e-05, | |
| "loss": 1.311, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 1.139224648475647, | |
| "learning_rate": 9.120584387265592e-05, | |
| "loss": 1.2961, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.9568941593170166, | |
| "learning_rate": 9.118403837767117e-05, | |
| "loss": 1.3335, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.8990288972854614, | |
| "learning_rate": 9.116223288268643e-05, | |
| "loss": 1.2983, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.0404481887817383, | |
| "learning_rate": 9.114042738770171e-05, | |
| "loss": 1.2867, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 0.970191240310669, | |
| "learning_rate": 9.111862189271697e-05, | |
| "loss": 1.2923, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 0.9285945296287537, | |
| "learning_rate": 9.109681639773224e-05, | |
| "loss": 1.296, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "grad_norm": 1.0113970041275024, | |
| "learning_rate": 9.107501090274749e-05, | |
| "loss": 1.2861, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.0101959705352783, | |
| "learning_rate": 9.105320540776276e-05, | |
| "loss": 1.2958, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.9014917612075806, | |
| "learning_rate": 9.103139991277803e-05, | |
| "loss": 1.2735, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 1.4451045989990234, | |
| "learning_rate": 9.100959441779329e-05, | |
| "loss": 1.3111, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.9970597624778748, | |
| "learning_rate": 9.098778892280855e-05, | |
| "loss": 1.2725, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.9795159101486206, | |
| "learning_rate": 9.096598342782382e-05, | |
| "loss": 1.286, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.1754708290100098, | |
| "learning_rate": 9.094417793283908e-05, | |
| "loss": 1.2903, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 1.02108895778656, | |
| "learning_rate": 9.092237243785434e-05, | |
| "loss": 1.2865, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 0.9269696474075317, | |
| "learning_rate": 9.09005669428696e-05, | |
| "loss": 1.3163, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 0.9824286103248596, | |
| "learning_rate": 9.087876144788488e-05, | |
| "loss": 1.2713, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.2137070894241333, | |
| "learning_rate": 9.085695595290013e-05, | |
| "loss": 1.313, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.0218490362167358, | |
| "learning_rate": 9.08351504579154e-05, | |
| "loss": 1.2864, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.0295207500457764, | |
| "learning_rate": 9.081334496293066e-05, | |
| "loss": 1.2974, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 1.0075607299804688, | |
| "learning_rate": 9.079153946794593e-05, | |
| "loss": 1.3011, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.889430820941925, | |
| "learning_rate": 9.07697339729612e-05, | |
| "loss": 1.3112, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 0.9565015435218811, | |
| "learning_rate": 9.074792847797645e-05, | |
| "loss": 1.3019, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.0241695642471313, | |
| "learning_rate": 9.072612298299172e-05, | |
| "loss": 1.2878, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 0.9693965315818787, | |
| "learning_rate": 9.070431748800699e-05, | |
| "loss": 1.3009, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 0.8897150754928589, | |
| "learning_rate": 9.068251199302225e-05, | |
| "loss": 1.2757, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "grad_norm": 1.1614912748336792, | |
| "learning_rate": 9.066070649803751e-05, | |
| "loss": 1.2923, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.8832863569259644, | |
| "learning_rate": 9.063890100305277e-05, | |
| "loss": 1.3098, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.9805281162261963, | |
| "learning_rate": 9.061709550806804e-05, | |
| "loss": 1.2958, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 1.0199958086013794, | |
| "learning_rate": 9.05952900130833e-05, | |
| "loss": 1.2824, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.8528922200202942, | |
| "learning_rate": 9.057348451809856e-05, | |
| "loss": 1.2993, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 0.9288610816001892, | |
| "learning_rate": 9.055167902311384e-05, | |
| "loss": 1.2758, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 0.8977848887443542, | |
| "learning_rate": 9.052987352812909e-05, | |
| "loss": 1.2789, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 0.8637726902961731, | |
| "learning_rate": 9.050806803314435e-05, | |
| "loss": 1.2734, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 0.9056828022003174, | |
| "learning_rate": 9.048626253815962e-05, | |
| "loss": 1.272, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 0.9080044627189636, | |
| "learning_rate": 9.046445704317489e-05, | |
| "loss": 1.264, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.886441707611084, | |
| "learning_rate": 9.044265154819016e-05, | |
| "loss": 1.2752, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.02278470993042, | |
| "learning_rate": 9.04208460532054e-05, | |
| "loss": 1.2819, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.2792317867279053, | |
| "eval_runtime": 1502.3325, | |
| "eval_samples_per_second": 257.499, | |
| "eval_steps_per_second": 4.024, | |
| "step": 4606 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.08243727684021, | |
| "learning_rate": 9.039904055822067e-05, | |
| "loss": 1.3113, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.073258399963379, | |
| "learning_rate": 9.037723506323594e-05, | |
| "loss": 1.3031, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.9962953329086304, | |
| "learning_rate": 9.035542956825121e-05, | |
| "loss": 1.2904, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.9397081136703491, | |
| "learning_rate": 9.033362407326647e-05, | |
| "loss": 1.2672, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.9223260879516602, | |
| "learning_rate": 9.031181857828172e-05, | |
| "loss": 1.2898, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 1.0643510818481445, | |
| "learning_rate": 9.0290013083297e-05, | |
| "loss": 1.2831, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "grad_norm": 0.9219188094139099, | |
| "learning_rate": 9.026820758831226e-05, | |
| "loss": 1.2651, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.9872779250144958, | |
| "learning_rate": 9.024640209332752e-05, | |
| "loss": 1.2695, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.9516711235046387, | |
| "learning_rate": 9.022459659834279e-05, | |
| "loss": 1.2662, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.9385516047477722, | |
| "learning_rate": 9.020279110335805e-05, | |
| "loss": 1.2744, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 1.0308866500854492, | |
| "learning_rate": 9.018098560837331e-05, | |
| "loss": 1.2718, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "grad_norm": 0.9456400871276855, | |
| "learning_rate": 9.015918011338857e-05, | |
| "loss": 1.2494, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.1350531578063965, | |
| "learning_rate": 9.013737461840385e-05, | |
| "loss": 1.2607, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 0.9552891254425049, | |
| "learning_rate": 9.011556912341911e-05, | |
| "loss": 1.2563, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 0.9082231521606445, | |
| "learning_rate": 9.009376362843436e-05, | |
| "loss": 1.268, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "grad_norm": 1.0419315099716187, | |
| "learning_rate": 9.007195813344963e-05, | |
| "loss": 1.3033, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.827100396156311, | |
| "learning_rate": 9.00501526384649e-05, | |
| "loss": 1.2636, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 1.0661678314208984, | |
| "learning_rate": 9.002834714348017e-05, | |
| "loss": 1.2487, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 0.9938476085662842, | |
| "learning_rate": 9.000654164849543e-05, | |
| "loss": 1.2729, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 1.1281195878982544, | |
| "learning_rate": 8.998473615351068e-05, | |
| "loss": 1.2391, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 1.1780451536178589, | |
| "learning_rate": 8.996293065852596e-05, | |
| "loss": 1.2985, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 1.0872817039489746, | |
| "learning_rate": 8.994112516354122e-05, | |
| "loss": 1.2615, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 0.9712433815002441, | |
| "learning_rate": 8.991931966855648e-05, | |
| "loss": 1.2694, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 1.2177668809890747, | |
| "learning_rate": 8.989751417357174e-05, | |
| "loss": 1.2726, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "grad_norm": 0.9332715272903442, | |
| "learning_rate": 8.987570867858701e-05, | |
| "loss": 1.2703, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 0.9567763209342957, | |
| "learning_rate": 8.985390318360227e-05, | |
| "loss": 1.2711, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 0.9975143074989319, | |
| "learning_rate": 8.983209768861753e-05, | |
| "loss": 1.2947, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.0711029767990112, | |
| "learning_rate": 8.98102921936328e-05, | |
| "loss": 1.2723, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 0.9394287467002869, | |
| "learning_rate": 8.978848669864807e-05, | |
| "loss": 1.2709, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "grad_norm": 1.0839319229125977, | |
| "learning_rate": 8.976668120366332e-05, | |
| "loss": 1.2892, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 1.024117112159729, | |
| "learning_rate": 8.974487570867859e-05, | |
| "loss": 1.2627, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.9055659174919128, | |
| "learning_rate": 8.972307021369386e-05, | |
| "loss": 1.2754, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.9383713603019714, | |
| "learning_rate": 8.970126471870913e-05, | |
| "loss": 1.2713, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 1.087470293045044, | |
| "learning_rate": 8.967945922372439e-05, | |
| "loss": 1.27, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "grad_norm": 0.9602554440498352, | |
| "learning_rate": 8.965765372873964e-05, | |
| "loss": 1.2829, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.9457790851593018, | |
| "learning_rate": 8.963584823375491e-05, | |
| "loss": 1.2757, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 0.8682853579521179, | |
| "learning_rate": 8.961404273877018e-05, | |
| "loss": 1.2662, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.0000272989273071, | |
| "learning_rate": 8.959223724378544e-05, | |
| "loss": 1.2616, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 1.0122287273406982, | |
| "learning_rate": 8.95704317488007e-05, | |
| "loss": 1.287, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 0.9552735090255737, | |
| "learning_rate": 8.954862625381597e-05, | |
| "loss": 1.2782, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 0.9103166460990906, | |
| "learning_rate": 8.952682075883123e-05, | |
| "loss": 1.2388, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 1.0033226013183594, | |
| "learning_rate": 8.950501526384649e-05, | |
| "loss": 1.2762, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 0.9572534561157227, | |
| "learning_rate": 8.948320976886176e-05, | |
| "loss": 1.2801, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "grad_norm": 0.9460912942886353, | |
| "learning_rate": 8.946140427387702e-05, | |
| "loss": 1.2651, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 1.0236018896102905, | |
| "learning_rate": 8.943959877889228e-05, | |
| "loss": 1.2602, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 1.0384821891784668, | |
| "learning_rate": 8.941779328390754e-05, | |
| "loss": 1.3027, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.9547539949417114, | |
| "learning_rate": 8.939598778892281e-05, | |
| "loss": 1.2969, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "grad_norm": 0.9478334784507751, | |
| "learning_rate": 8.937418229393808e-05, | |
| "loss": 1.2829, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.0621150732040405, | |
| "learning_rate": 8.935237679895335e-05, | |
| "loss": 1.2601, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 0.9307476282119751, | |
| "learning_rate": 8.93305713039686e-05, | |
| "loss": 1.2656, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.0189131498336792, | |
| "learning_rate": 8.930876580898386e-05, | |
| "loss": 1.2646, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 1.1185131072998047, | |
| "learning_rate": 8.928696031399914e-05, | |
| "loss": 1.2785, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "grad_norm": 0.9753584265708923, | |
| "learning_rate": 8.92651548190144e-05, | |
| "loss": 1.2511, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 1.0418280363082886, | |
| "learning_rate": 8.924334932402966e-05, | |
| "loss": 1.2537, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.9717410802841187, | |
| "learning_rate": 8.922154382904493e-05, | |
| "loss": 1.2687, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.988318681716919, | |
| "learning_rate": 8.919973833406019e-05, | |
| "loss": 1.2599, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.9211105108261108, | |
| "learning_rate": 8.917793283907545e-05, | |
| "loss": 1.2646, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "grad_norm": 0.9481471180915833, | |
| "learning_rate": 8.915612734409071e-05, | |
| "loss": 1.271, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 0.8939971923828125, | |
| "learning_rate": 8.913432184910598e-05, | |
| "loss": 1.2865, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 0.9412124156951904, | |
| "learning_rate": 8.911251635412124e-05, | |
| "loss": 1.279, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 0.9381204843521118, | |
| "learning_rate": 8.90907108591365e-05, | |
| "loss": 1.2813, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 0.9502457976341248, | |
| "learning_rate": 8.906890536415177e-05, | |
| "loss": 1.2829, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.0576632022857666, | |
| "learning_rate": 8.904709986916704e-05, | |
| "loss": 1.2708, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.0302668809890747, | |
| "learning_rate": 8.902529437418229e-05, | |
| "loss": 1.2893, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 0.9892765283584595, | |
| "learning_rate": 8.900348887919756e-05, | |
| "loss": 1.2691, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 1.0383532047271729, | |
| "learning_rate": 8.898168338421282e-05, | |
| "loss": 1.2539, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "grad_norm": 0.9894425868988037, | |
| "learning_rate": 8.89598778892281e-05, | |
| "loss": 1.2838, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.0066653490066528, | |
| "learning_rate": 8.893807239424336e-05, | |
| "loss": 1.2606, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.0619821548461914, | |
| "learning_rate": 8.891626689925862e-05, | |
| "loss": 1.2724, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 0.9619722962379456, | |
| "learning_rate": 8.889446140427387e-05, | |
| "loss": 1.2783, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 0.8887227177619934, | |
| "learning_rate": 8.887265590928915e-05, | |
| "loss": 1.264, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "grad_norm": 1.0262665748596191, | |
| "learning_rate": 8.885085041430441e-05, | |
| "loss": 1.2482, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.016381859779358, | |
| "learning_rate": 8.882904491931967e-05, | |
| "loss": 1.2523, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.9932143092155457, | |
| "learning_rate": 8.880723942433494e-05, | |
| "loss": 1.2516, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 0.9815816283226013, | |
| "learning_rate": 8.87854339293502e-05, | |
| "loss": 1.2574, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "grad_norm": 1.0072325468063354, | |
| "learning_rate": 8.876362843436546e-05, | |
| "loss": 1.2688, | |
| "step": 5360 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 0.9834664463996887, | |
| "learning_rate": 8.874182293938073e-05, | |
| "loss": 1.2632, | |
| "step": 5370 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.0800156593322754, | |
| "learning_rate": 8.8720017444396e-05, | |
| "loss": 1.2767, | |
| "step": 5380 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 0.9449285268783569, | |
| "learning_rate": 8.869821194941125e-05, | |
| "loss": 1.2667, | |
| "step": 5390 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 1.1136956214904785, | |
| "learning_rate": 8.867640645442652e-05, | |
| "loss": 1.2506, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "grad_norm": 0.9061567783355713, | |
| "learning_rate": 8.865460095944178e-05, | |
| "loss": 1.2658, | |
| "step": 5410 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.00759756565094, | |
| "learning_rate": 8.863279546445705e-05, | |
| "loss": 1.285, | |
| "step": 5420 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.0507421493530273, | |
| "learning_rate": 8.861098996947232e-05, | |
| "loss": 1.277, | |
| "step": 5430 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.0796302556991577, | |
| "learning_rate": 8.858918447448758e-05, | |
| "loss": 1.2604, | |
| "step": 5440 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 1.0264052152633667, | |
| "learning_rate": 8.856737897950283e-05, | |
| "loss": 1.2747, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 0.9274656176567078, | |
| "learning_rate": 8.854557348451811e-05, | |
| "loss": 1.2617, | |
| "step": 5460 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.0233980417251587, | |
| "learning_rate": 8.852376798953337e-05, | |
| "loss": 1.2787, | |
| "step": 5470 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 0.9718747138977051, | |
| "learning_rate": 8.850196249454863e-05, | |
| "loss": 1.2511, | |
| "step": 5480 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.0765981674194336, | |
| "learning_rate": 8.84801569995639e-05, | |
| "loss": 1.2794, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "grad_norm": 1.048608660697937, | |
| "learning_rate": 8.845835150457916e-05, | |
| "loss": 1.2597, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.9524050354957581, | |
| "learning_rate": 8.843654600959442e-05, | |
| "loss": 1.246, | |
| "step": 5510 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.9819397926330566, | |
| "learning_rate": 8.841474051460969e-05, | |
| "loss": 1.2732, | |
| "step": 5520 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.914893388748169, | |
| "learning_rate": 8.839293501962495e-05, | |
| "loss": 1.2694, | |
| "step": 5530 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.9561071395874023, | |
| "learning_rate": 8.837112952464021e-05, | |
| "loss": 1.2642, | |
| "step": 5540 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "grad_norm": 0.9841814637184143, | |
| "learning_rate": 8.834932402965547e-05, | |
| "loss": 1.2684, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 0.931611955165863, | |
| "learning_rate": 8.832751853467074e-05, | |
| "loss": 1.2751, | |
| "step": 5560 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.0068223476409912, | |
| "learning_rate": 8.8305713039686e-05, | |
| "loss": 1.2589, | |
| "step": 5570 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 1.088884711265564, | |
| "learning_rate": 8.828390754470128e-05, | |
| "loss": 1.2606, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "grad_norm": 0.9682032465934753, | |
| "learning_rate": 8.826210204971653e-05, | |
| "loss": 1.2467, | |
| "step": 5590 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.0218122005462646, | |
| "learning_rate": 8.824029655473179e-05, | |
| "loss": 1.2684, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 0.9690065979957581, | |
| "learning_rate": 8.821849105974707e-05, | |
| "loss": 1.2906, | |
| "step": 5610 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 0.9736804366111755, | |
| "learning_rate": 8.819668556476233e-05, | |
| "loss": 1.2682, | |
| "step": 5620 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.0571842193603516, | |
| "learning_rate": 8.817488006977759e-05, | |
| "loss": 1.247, | |
| "step": 5630 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 1.1925692558288574, | |
| "learning_rate": 8.815307457479286e-05, | |
| "loss": 1.28, | |
| "step": 5640 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 0.8674301505088806, | |
| "learning_rate": 8.813126907980812e-05, | |
| "loss": 1.2699, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.030501127243042, | |
| "learning_rate": 8.810946358482338e-05, | |
| "loss": 1.2455, | |
| "step": 5660 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 1.0425055027008057, | |
| "learning_rate": 8.808765808983864e-05, | |
| "loss": 1.2802, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "grad_norm": 0.9576709866523743, | |
| "learning_rate": 8.806585259485391e-05, | |
| "loss": 1.2584, | |
| "step": 5680 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.9852989912033081, | |
| "learning_rate": 8.804404709986917e-05, | |
| "loss": 1.2707, | |
| "step": 5690 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.0519157648086548, | |
| "learning_rate": 8.802224160488443e-05, | |
| "loss": 1.2647, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.1391375064849854, | |
| "learning_rate": 8.80004361098997e-05, | |
| "loss": 1.2459, | |
| "step": 5710 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 1.295246958732605, | |
| "learning_rate": 8.797863061491496e-05, | |
| "loss": 1.2708, | |
| "step": 5720 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "grad_norm": 0.9388042688369751, | |
| "learning_rate": 8.795682511993024e-05, | |
| "loss": 1.2761, | |
| "step": 5730 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.8345937728881836, | |
| "learning_rate": 8.793501962494549e-05, | |
| "loss": 1.2641, | |
| "step": 5740 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.9559466242790222, | |
| "learning_rate": 8.791321412996075e-05, | |
| "loss": 1.2608, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 0.9135338068008423, | |
| "learning_rate": 8.789140863497601e-05, | |
| "loss": 1.245, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.0820287466049194, | |
| "learning_rate": 8.786960313999129e-05, | |
| "loss": 1.2549, | |
| "step": 5770 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "grad_norm": 1.05925714969635, | |
| "learning_rate": 8.784779764500655e-05, | |
| "loss": 1.2493, | |
| "step": 5780 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.0629942417144775, | |
| "learning_rate": 8.782599215002181e-05, | |
| "loss": 1.2803, | |
| "step": 5790 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 1.021894097328186, | |
| "learning_rate": 8.780418665503706e-05, | |
| "loss": 1.264, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 0.9319231510162354, | |
| "learning_rate": 8.778238116005234e-05, | |
| "loss": 1.2757, | |
| "step": 5810 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "grad_norm": 0.9403659701347351, | |
| "learning_rate": 8.77605756650676e-05, | |
| "loss": 1.2601, | |
| "step": 5820 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.0411070585250854, | |
| "learning_rate": 8.773877017008287e-05, | |
| "loss": 1.2747, | |
| "step": 5830 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 0.9437740445137024, | |
| "learning_rate": 8.771696467509813e-05, | |
| "loss": 1.2771, | |
| "step": 5840 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.0971676111221313, | |
| "learning_rate": 8.769515918011339e-05, | |
| "loss": 1.2631, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.0248700380325317, | |
| "learning_rate": 8.767335368512866e-05, | |
| "loss": 1.255, | |
| "step": 5860 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 1.1890584230422974, | |
| "learning_rate": 8.765154819014392e-05, | |
| "loss": 1.265, | |
| "step": 5870 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.1310992240905762, | |
| "learning_rate": 8.76297426951592e-05, | |
| "loss": 1.2786, | |
| "step": 5880 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.95496666431427, | |
| "learning_rate": 8.760793720017444e-05, | |
| "loss": 1.2534, | |
| "step": 5890 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 1.0427186489105225, | |
| "learning_rate": 8.758613170518971e-05, | |
| "loss": 1.2767, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 0.879298985004425, | |
| "learning_rate": 8.756432621020497e-05, | |
| "loss": 1.2453, | |
| "step": 5910 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 0.9911447167396545, | |
| "learning_rate": 8.754252071522025e-05, | |
| "loss": 1.248, | |
| "step": 5920 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 0.9124498963356018, | |
| "learning_rate": 8.752071522023551e-05, | |
| "loss": 1.2588, | |
| "step": 5930 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 0.9397348761558533, | |
| "learning_rate": 8.749890972525076e-05, | |
| "loss": 1.2822, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 1.0716569423675537, | |
| "learning_rate": 8.747710423026602e-05, | |
| "loss": 1.2483, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "grad_norm": 0.8869634866714478, | |
| "learning_rate": 8.74552987352813e-05, | |
| "loss": 1.2752, | |
| "step": 5960 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.9538241028785706, | |
| "learning_rate": 8.743349324029656e-05, | |
| "loss": 1.2627, | |
| "step": 5970 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.9991753697395325, | |
| "learning_rate": 8.741168774531183e-05, | |
| "loss": 1.2718, | |
| "step": 5980 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.0785272121429443, | |
| "learning_rate": 8.738988225032709e-05, | |
| "loss": 1.2826, | |
| "step": 5990 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 1.002681851387024, | |
| "learning_rate": 8.736807675534235e-05, | |
| "loss": 1.2659, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "grad_norm": 0.9270432591438293, | |
| "learning_rate": 8.734627126035761e-05, | |
| "loss": 1.2493, | |
| "step": 6010 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 1.143751621246338, | |
| "learning_rate": 8.732446576537288e-05, | |
| "loss": 1.2965, | |
| "step": 6020 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 0.9666625261306763, | |
| "learning_rate": 8.730266027038814e-05, | |
| "loss": 1.2553, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 0.9400457739830017, | |
| "learning_rate": 8.72808547754034e-05, | |
| "loss": 1.2657, | |
| "step": 6040 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 0.9232240319252014, | |
| "learning_rate": 8.725904928041867e-05, | |
| "loss": 1.2494, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.9295173287391663, | |
| "learning_rate": 8.723724378543393e-05, | |
| "loss": 1.2496, | |
| "step": 6060 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 1.293441653251648, | |
| "learning_rate": 8.72154382904492e-05, | |
| "loss": 1.2578, | |
| "step": 6070 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.9575563669204712, | |
| "learning_rate": 8.719363279546447e-05, | |
| "loss": 1.2323, | |
| "step": 6080 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 1.0204386711120605, | |
| "learning_rate": 8.717182730047972e-05, | |
| "loss": 1.2652, | |
| "step": 6090 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "grad_norm": 0.9446994066238403, | |
| "learning_rate": 8.715002180549498e-05, | |
| "loss": 1.2568, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 1.0751984119415283, | |
| "learning_rate": 8.712821631051026e-05, | |
| "loss": 1.2806, | |
| "step": 6110 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 0.9466795921325684, | |
| "learning_rate": 8.710641081552552e-05, | |
| "loss": 1.2416, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 1.1114068031311035, | |
| "learning_rate": 8.708460532054078e-05, | |
| "loss": 1.2405, | |
| "step": 6130 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "grad_norm": 0.9612728953361511, | |
| "learning_rate": 8.706279982555605e-05, | |
| "loss": 1.2655, | |
| "step": 6140 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.9728400707244873, | |
| "learning_rate": 8.704099433057131e-05, | |
| "loss": 1.2654, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 1.0217069387435913, | |
| "learning_rate": 8.701918883558657e-05, | |
| "loss": 1.2804, | |
| "step": 6160 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.9358672499656677, | |
| "learning_rate": 8.699738334060184e-05, | |
| "loss": 1.282, | |
| "step": 6170 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.875811755657196, | |
| "learning_rate": 8.69755778456171e-05, | |
| "loss": 1.2974, | |
| "step": 6180 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "grad_norm": 0.9315816760063171, | |
| "learning_rate": 8.695377235063236e-05, | |
| "loss": 1.2515, | |
| "step": 6190 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 0.9914236664772034, | |
| "learning_rate": 8.693196685564763e-05, | |
| "loss": 1.2438, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 0.9291836023330688, | |
| "learning_rate": 8.691016136066289e-05, | |
| "loss": 1.2794, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 1.036189317703247, | |
| "learning_rate": 8.688835586567815e-05, | |
| "loss": 1.2497, | |
| "step": 6220 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 1.1179789304733276, | |
| "learning_rate": 8.686655037069343e-05, | |
| "loss": 1.2627, | |
| "step": 6230 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "grad_norm": 1.0586695671081543, | |
| "learning_rate": 8.684474487570868e-05, | |
| "loss": 1.2611, | |
| "step": 6240 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 0.9113835692405701, | |
| "learning_rate": 8.682293938072394e-05, | |
| "loss": 1.2671, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 0.911665141582489, | |
| "learning_rate": 8.68011338857392e-05, | |
| "loss": 1.2425, | |
| "step": 6260 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.016471266746521, | |
| "learning_rate": 8.677932839075448e-05, | |
| "loss": 1.2672, | |
| "step": 6270 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 1.0666197538375854, | |
| "learning_rate": 8.675752289576974e-05, | |
| "loss": 1.2647, | |
| "step": 6280 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 1.042350172996521, | |
| "learning_rate": 8.673571740078499e-05, | |
| "loss": 1.2211, | |
| "step": 6290 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 0.9714857339859009, | |
| "learning_rate": 8.671391190580027e-05, | |
| "loss": 1.2698, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 0.9044662714004517, | |
| "learning_rate": 8.669210641081553e-05, | |
| "loss": 1.2753, | |
| "step": 6310 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 0.8921557664871216, | |
| "learning_rate": 8.66703009158308e-05, | |
| "loss": 1.2528, | |
| "step": 6320 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "grad_norm": 0.9644028544425964, | |
| "learning_rate": 8.664849542084606e-05, | |
| "loss": 1.2642, | |
| "step": 6330 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.0202399492263794, | |
| "learning_rate": 8.662668992586132e-05, | |
| "loss": 1.2473, | |
| "step": 6340 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.0238714218139648, | |
| "learning_rate": 8.660488443087658e-05, | |
| "loss": 1.256, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 1.1190308332443237, | |
| "learning_rate": 8.658307893589185e-05, | |
| "loss": 1.2579, | |
| "step": 6360 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "grad_norm": 0.9763012528419495, | |
| "learning_rate": 8.656127344090711e-05, | |
| "loss": 1.2607, | |
| "step": 6370 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 0.9133914709091187, | |
| "learning_rate": 8.653946794592239e-05, | |
| "loss": 1.2685, | |
| "step": 6380 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 0.9674580693244934, | |
| "learning_rate": 8.651766245093764e-05, | |
| "loss": 1.2533, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 1.1029064655303955, | |
| "learning_rate": 8.64958569559529e-05, | |
| "loss": 1.2487, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 0.9458103775978088, | |
| "learning_rate": 8.647405146096816e-05, | |
| "loss": 1.2677, | |
| "step": 6410 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "grad_norm": 1.1092442274093628, | |
| "learning_rate": 8.645224596598344e-05, | |
| "loss": 1.2624, | |
| "step": 6420 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.1490038633346558, | |
| "learning_rate": 8.64304404709987e-05, | |
| "loss": 1.2566, | |
| "step": 6430 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.9747464060783386, | |
| "learning_rate": 8.640863497601395e-05, | |
| "loss": 1.2571, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 1.1297920942306519, | |
| "learning_rate": 8.638682948102921e-05, | |
| "loss": 1.2327, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.9675096869468689, | |
| "learning_rate": 8.636502398604449e-05, | |
| "loss": 1.2327, | |
| "step": 6460 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 0.9282464385032654, | |
| "learning_rate": 8.634321849105975e-05, | |
| "loss": 1.2323, | |
| "step": 6470 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.011017918586731, | |
| "learning_rate": 8.632141299607502e-05, | |
| "loss": 1.2429, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.02436363697052, | |
| "learning_rate": 8.629960750109028e-05, | |
| "loss": 1.2382, | |
| "step": 6490 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 1.0600727796554565, | |
| "learning_rate": 8.627780200610554e-05, | |
| "loss": 1.2689, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "grad_norm": 0.9400041103363037, | |
| "learning_rate": 8.62559965111208e-05, | |
| "loss": 1.2804, | |
| "step": 6510 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 1.156300663948059, | |
| "learning_rate": 8.623419101613607e-05, | |
| "loss": 1.2596, | |
| "step": 6520 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.9240378141403198, | |
| "learning_rate": 8.621238552115133e-05, | |
| "loss": 1.24, | |
| "step": 6530 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.8798494338989258, | |
| "learning_rate": 8.61905800261666e-05, | |
| "loss": 1.2526, | |
| "step": 6540 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.9512797594070435, | |
| "learning_rate": 8.616877453118186e-05, | |
| "loss": 1.2602, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "grad_norm": 0.9985531568527222, | |
| "learning_rate": 8.614696903619712e-05, | |
| "loss": 1.2616, | |
| "step": 6560 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 1.134756088256836, | |
| "learning_rate": 8.61251635412124e-05, | |
| "loss": 1.2688, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 0.9372296333312988, | |
| "learning_rate": 8.610335804622766e-05, | |
| "loss": 1.2538, | |
| "step": 6580 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 1.011887788772583, | |
| "learning_rate": 8.608155255124291e-05, | |
| "loss": 1.246, | |
| "step": 6590 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "grad_norm": 0.9553661346435547, | |
| "learning_rate": 8.605974705625817e-05, | |
| "loss": 1.2502, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.9924313426017761, | |
| "learning_rate": 8.603794156127345e-05, | |
| "loss": 1.2362, | |
| "step": 6610 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.05217707157135, | |
| "learning_rate": 8.601613606628871e-05, | |
| "loss": 1.2655, | |
| "step": 6620 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.0302504301071167, | |
| "learning_rate": 8.599433057130398e-05, | |
| "loss": 1.2699, | |
| "step": 6630 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 1.043373942375183, | |
| "learning_rate": 8.597252507631923e-05, | |
| "loss": 1.2532, | |
| "step": 6640 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "grad_norm": 0.9535781145095825, | |
| "learning_rate": 8.59507195813345e-05, | |
| "loss": 1.2586, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 1.1055347919464111, | |
| "learning_rate": 8.592891408634977e-05, | |
| "loss": 1.2632, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 1.0888850688934326, | |
| "learning_rate": 8.590710859136503e-05, | |
| "loss": 1.2497, | |
| "step": 6670 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 0.9970211386680603, | |
| "learning_rate": 8.588530309638029e-05, | |
| "loss": 1.2869, | |
| "step": 6680 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 1.0836609601974487, | |
| "learning_rate": 8.586349760139555e-05, | |
| "loss": 1.2321, | |
| "step": 6690 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 0.9511786103248596, | |
| "learning_rate": 8.584169210641082e-05, | |
| "loss": 1.2562, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.088644027709961, | |
| "learning_rate": 8.581988661142608e-05, | |
| "loss": 1.2418, | |
| "step": 6710 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.0465929508209229, | |
| "learning_rate": 8.579808111644134e-05, | |
| "loss": 1.2608, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.12638521194458, | |
| "learning_rate": 8.577627562145662e-05, | |
| "loss": 1.2725, | |
| "step": 6730 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "grad_norm": 1.171322226524353, | |
| "learning_rate": 8.575447012647187e-05, | |
| "loss": 1.265, | |
| "step": 6740 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 0.926113486289978, | |
| "learning_rate": 8.573266463148713e-05, | |
| "loss": 1.2559, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 0.9716551899909973, | |
| "learning_rate": 8.57108591365024e-05, | |
| "loss": 1.2568, | |
| "step": 6760 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.0213953256607056, | |
| "learning_rate": 8.568905364151767e-05, | |
| "loss": 1.2649, | |
| "step": 6770 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 0.9643402099609375, | |
| "learning_rate": 8.566724814653294e-05, | |
| "loss": 1.2433, | |
| "step": 6780 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "grad_norm": 1.0367106199264526, | |
| "learning_rate": 8.564544265154819e-05, | |
| "loss": 1.2356, | |
| "step": 6790 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.9655973315238953, | |
| "learning_rate": 8.562363715656346e-05, | |
| "loss": 1.2439, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 1.0422053337097168, | |
| "learning_rate": 8.560183166157872e-05, | |
| "loss": 1.2528, | |
| "step": 6810 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.9676966071128845, | |
| "learning_rate": 8.558002616659399e-05, | |
| "loss": 1.2577, | |
| "step": 6820 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 0.9732950329780579, | |
| "learning_rate": 8.555822067160925e-05, | |
| "loss": 1.2513, | |
| "step": 6830 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "grad_norm": 1.0636634826660156, | |
| "learning_rate": 8.553641517662451e-05, | |
| "loss": 1.2694, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 0.9392173290252686, | |
| "learning_rate": 8.551460968163978e-05, | |
| "loss": 1.2478, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 0.9402878880500793, | |
| "learning_rate": 8.549280418665504e-05, | |
| "loss": 1.2528, | |
| "step": 6860 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 1.0256085395812988, | |
| "learning_rate": 8.54709986916703e-05, | |
| "loss": 1.2704, | |
| "step": 6870 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 1.0600332021713257, | |
| "learning_rate": 8.544919319668558e-05, | |
| "loss": 1.2338, | |
| "step": 6880 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.0218205451965332, | |
| "learning_rate": 8.542738770170083e-05, | |
| "loss": 1.2839, | |
| "step": 6890 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.8786155581474304, | |
| "learning_rate": 8.540558220671609e-05, | |
| "loss": 1.248, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.9721015095710754, | |
| "learning_rate": 8.538377671173136e-05, | |
| "loss": 1.2734, | |
| "step": 6910 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.9734498858451843, | |
| "learning_rate": 8.536197121674663e-05, | |
| "loss": 1.2454, | |
| "step": 6920 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.9616742730140686, | |
| "learning_rate": 8.53401657217619e-05, | |
| "loss": 1.2565, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.153671383857727, | |
| "learning_rate": 8.531836022677714e-05, | |
| "loss": 1.2549, | |
| "step": 6940 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 0.9344118237495422, | |
| "learning_rate": 8.529655473179241e-05, | |
| "loss": 1.2431, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.0228878259658813, | |
| "learning_rate": 8.527474923680768e-05, | |
| "loss": 1.2276, | |
| "step": 6960 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "grad_norm": 1.088304042816162, | |
| "learning_rate": 8.525294374182295e-05, | |
| "loss": 1.2423, | |
| "step": 6970 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.9886937737464905, | |
| "learning_rate": 8.523113824683821e-05, | |
| "loss": 1.2693, | |
| "step": 6980 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.8818524479866028, | |
| "learning_rate": 8.520933275185346e-05, | |
| "loss": 1.2424, | |
| "step": 6990 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.9912683963775635, | |
| "learning_rate": 8.518752725686874e-05, | |
| "loss": 1.2522, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 0.9952061176300049, | |
| "learning_rate": 8.5165721761884e-05, | |
| "loss": 1.2519, | |
| "step": 7010 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "grad_norm": 1.035301923751831, | |
| "learning_rate": 8.514391626689926e-05, | |
| "loss": 1.2501, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.0349431037902832, | |
| "learning_rate": 8.512211077191452e-05, | |
| "loss": 1.2451, | |
| "step": 7030 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 0.9751808643341064, | |
| "learning_rate": 8.510030527692979e-05, | |
| "loss": 1.2381, | |
| "step": 7040 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 0.896840512752533, | |
| "learning_rate": 8.507849978194505e-05, | |
| "loss": 1.2509, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.074179768562317, | |
| "learning_rate": 8.505669428696031e-05, | |
| "loss": 1.2439, | |
| "step": 7060 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "grad_norm": 1.0536302328109741, | |
| "learning_rate": 8.503488879197559e-05, | |
| "loss": 1.2795, | |
| "step": 7070 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 0.9011424779891968, | |
| "learning_rate": 8.501308329699085e-05, | |
| "loss": 1.2418, | |
| "step": 7080 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 0.9322314262390137, | |
| "learning_rate": 8.49912778020061e-05, | |
| "loss": 1.2576, | |
| "step": 7090 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 0.9793155193328857, | |
| "learning_rate": 8.496947230702137e-05, | |
| "loss": 1.2492, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 0.9420814514160156, | |
| "learning_rate": 8.494766681203664e-05, | |
| "loss": 1.2373, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 0.8934997320175171, | |
| "learning_rate": 8.49258613170519e-05, | |
| "loss": 1.2433, | |
| "step": 7120 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.0100373029708862, | |
| "learning_rate": 8.490405582206717e-05, | |
| "loss": 1.2397, | |
| "step": 7130 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 0.9812464118003845, | |
| "learning_rate": 8.488225032708242e-05, | |
| "loss": 1.2536, | |
| "step": 7140 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.0419830083847046, | |
| "learning_rate": 8.48604448320977e-05, | |
| "loss": 1.2531, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "grad_norm": 1.0287178754806519, | |
| "learning_rate": 8.483863933711296e-05, | |
| "loss": 1.2853, | |
| "step": 7160 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 0.9258010983467102, | |
| "learning_rate": 8.481683384212822e-05, | |
| "loss": 1.2384, | |
| "step": 7170 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.0923179388046265, | |
| "learning_rate": 8.479502834714348e-05, | |
| "loss": 1.2388, | |
| "step": 7180 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.026920199394226, | |
| "learning_rate": 8.477322285215875e-05, | |
| "loss": 1.2403, | |
| "step": 7190 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "grad_norm": 1.071996808052063, | |
| "learning_rate": 8.475141735717401e-05, | |
| "loss": 1.257, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 1.0824863910675049, | |
| "learning_rate": 8.472961186218927e-05, | |
| "loss": 1.2358, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 1.006395697593689, | |
| "learning_rate": 8.470780636720454e-05, | |
| "loss": 1.2675, | |
| "step": 7220 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 0.9629374146461487, | |
| "learning_rate": 8.468600087221981e-05, | |
| "loss": 1.2377, | |
| "step": 7230 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 0.9439448714256287, | |
| "learning_rate": 8.466419537723506e-05, | |
| "loss": 1.2269, | |
| "step": 7240 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "grad_norm": 0.9413838386535645, | |
| "learning_rate": 8.464238988225033e-05, | |
| "loss": 1.248, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.9353733658790588, | |
| "learning_rate": 8.46205843872656e-05, | |
| "loss": 1.2535, | |
| "step": 7260 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 1.0403653383255005, | |
| "learning_rate": 8.459877889228086e-05, | |
| "loss": 1.2323, | |
| "step": 7270 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.8675696849822998, | |
| "learning_rate": 8.457697339729613e-05, | |
| "loss": 1.2712, | |
| "step": 7280 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.9282375574111938, | |
| "learning_rate": 8.455516790231138e-05, | |
| "loss": 1.2259, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 0.9778069853782654, | |
| "learning_rate": 8.453336240732665e-05, | |
| "loss": 1.2499, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 1.0154436826705933, | |
| "learning_rate": 8.451155691234192e-05, | |
| "loss": 1.2253, | |
| "step": 7310 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 0.9822314381599426, | |
| "learning_rate": 8.448975141735718e-05, | |
| "loss": 1.2583, | |
| "step": 7320 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 1.0584256649017334, | |
| "learning_rate": 8.446794592237244e-05, | |
| "loss": 1.2682, | |
| "step": 7330 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "grad_norm": 1.035949945449829, | |
| "learning_rate": 8.44461404273877e-05, | |
| "loss": 1.2604, | |
| "step": 7340 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.9688887596130371, | |
| "learning_rate": 8.442433493240297e-05, | |
| "loss": 1.2308, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.0668280124664307, | |
| "learning_rate": 8.440252943741823e-05, | |
| "loss": 1.2523, | |
| "step": 7360 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.0507837533950806, | |
| "learning_rate": 8.43807239424335e-05, | |
| "loss": 1.2458, | |
| "step": 7370 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 0.9705730676651001, | |
| "learning_rate": 8.435891844744876e-05, | |
| "loss": 1.2623, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 1.1198492050170898, | |
| "learning_rate": 8.433711295246402e-05, | |
| "loss": 1.2263, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.090376853942871, | |
| "learning_rate": 8.431530745747928e-05, | |
| "loss": 1.2549, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 0.9599369764328003, | |
| "learning_rate": 8.429350196249455e-05, | |
| "loss": 1.2453, | |
| "step": 7410 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 0.9473201036453247, | |
| "learning_rate": 8.427169646750982e-05, | |
| "loss": 1.2449, | |
| "step": 7420 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "grad_norm": 1.0158095359802246, | |
| "learning_rate": 8.424989097252509e-05, | |
| "loss": 1.2395, | |
| "step": 7430 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.1401153802871704, | |
| "learning_rate": 8.422808547754034e-05, | |
| "loss": 1.2426, | |
| "step": 7440 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 0.9833976030349731, | |
| "learning_rate": 8.42062799825556e-05, | |
| "loss": 1.2238, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.0531307458877563, | |
| "learning_rate": 8.418447448757088e-05, | |
| "loss": 1.2286, | |
| "step": 7460 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 0.9833014607429504, | |
| "learning_rate": 8.416266899258614e-05, | |
| "loss": 1.2483, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "grad_norm": 1.0215846300125122, | |
| "learning_rate": 8.41408634976014e-05, | |
| "loss": 1.2434, | |
| "step": 7480 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 0.9338911175727844, | |
| "learning_rate": 8.411905800261667e-05, | |
| "loss": 1.2263, | |
| "step": 7490 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 0.9091663360595703, | |
| "learning_rate": 8.409725250763193e-05, | |
| "loss": 1.2359, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 0.9303663969039917, | |
| "learning_rate": 8.407544701264719e-05, | |
| "loss": 1.243, | |
| "step": 7510 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 0.9787565469741821, | |
| "learning_rate": 8.405364151766245e-05, | |
| "loss": 1.2444, | |
| "step": 7520 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 1.1064313650131226, | |
| "learning_rate": 8.403183602267772e-05, | |
| "loss": 1.2438, | |
| "step": 7530 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 0.9433283805847168, | |
| "learning_rate": 8.401003052769298e-05, | |
| "loss": 1.2442, | |
| "step": 7540 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 0.9914006590843201, | |
| "learning_rate": 8.398822503270824e-05, | |
| "loss": 1.2595, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.1178406476974487, | |
| "learning_rate": 8.39664195377235e-05, | |
| "loss": 1.2223, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "grad_norm": 1.1177582740783691, | |
| "learning_rate": 8.394461404273878e-05, | |
| "loss": 1.2284, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.0288305282592773, | |
| "learning_rate": 8.392280854775405e-05, | |
| "loss": 1.2329, | |
| "step": 7580 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.078165054321289, | |
| "learning_rate": 8.39010030527693e-05, | |
| "loss": 1.2149, | |
| "step": 7590 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.0270469188690186, | |
| "learning_rate": 8.387919755778456e-05, | |
| "loss": 1.2453, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.142359733581543, | |
| "learning_rate": 8.385739206279984e-05, | |
| "loss": 1.2115, | |
| "step": 7610 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "grad_norm": 1.066074252128601, | |
| "learning_rate": 8.38355865678151e-05, | |
| "loss": 1.2282, | |
| "step": 7620 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 0.9854233860969543, | |
| "learning_rate": 8.381378107283036e-05, | |
| "loss": 1.25, | |
| "step": 7630 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.0901075601577759, | |
| "learning_rate": 8.379197557784561e-05, | |
| "loss": 1.2237, | |
| "step": 7640 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 1.1587127447128296, | |
| "learning_rate": 8.377017008286089e-05, | |
| "loss": 1.219, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "grad_norm": 0.9623563289642334, | |
| "learning_rate": 8.374836458787615e-05, | |
| "loss": 1.2311, | |
| "step": 7660 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 0.9470689296722412, | |
| "learning_rate": 8.372655909289141e-05, | |
| "loss": 1.2515, | |
| "step": 7670 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 0.9638876914978027, | |
| "learning_rate": 8.370475359790668e-05, | |
| "loss": 1.2532, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.163567304611206, | |
| "learning_rate": 8.368294810292194e-05, | |
| "loss": 1.2615, | |
| "step": 7690 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.001160979270935, | |
| "learning_rate": 8.36611426079372e-05, | |
| "loss": 1.2472, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 1.0169782638549805, | |
| "learning_rate": 8.363933711295247e-05, | |
| "loss": 1.2473, | |
| "step": 7710 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 0.9867805242538452, | |
| "learning_rate": 8.361753161796774e-05, | |
| "loss": 1.2452, | |
| "step": 7720 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 1.0535905361175537, | |
| "learning_rate": 8.359572612298299e-05, | |
| "loss": 1.2405, | |
| "step": 7730 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 0.9246835708618164, | |
| "learning_rate": 8.357392062799825e-05, | |
| "loss": 1.2522, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 1.0927287340164185, | |
| "learning_rate": 8.355211513301352e-05, | |
| "loss": 1.2493, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "grad_norm": 1.054208755493164, | |
| "learning_rate": 8.35303096380288e-05, | |
| "loss": 1.263, | |
| "step": 7760 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 0.9636792540550232, | |
| "learning_rate": 8.350850414304406e-05, | |
| "loss": 1.2426, | |
| "step": 7770 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 1.0837719440460205, | |
| "learning_rate": 8.348669864805932e-05, | |
| "loss": 1.2265, | |
| "step": 7780 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 0.9462710022926331, | |
| "learning_rate": 8.346489315307457e-05, | |
| "loss": 1.2242, | |
| "step": 7790 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "grad_norm": 0.987519383430481, | |
| "learning_rate": 8.344308765808985e-05, | |
| "loss": 1.2261, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.0755093097686768, | |
| "learning_rate": 8.342128216310511e-05, | |
| "loss": 1.2486, | |
| "step": 7810 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 0.9885231852531433, | |
| "learning_rate": 8.339947666812037e-05, | |
| "loss": 1.2325, | |
| "step": 7820 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.0870469808578491, | |
| "learning_rate": 8.337767117313564e-05, | |
| "loss": 1.2175, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.0006695985794067, | |
| "learning_rate": 8.33558656781509e-05, | |
| "loss": 1.2521, | |
| "step": 7840 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "grad_norm": 1.0880390405654907, | |
| "learning_rate": 8.333406018316616e-05, | |
| "loss": 1.2353, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.9993226528167725, | |
| "learning_rate": 8.331225468818142e-05, | |
| "loss": 1.2365, | |
| "step": 7860 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.964745819568634, | |
| "learning_rate": 8.329044919319669e-05, | |
| "loss": 1.2566, | |
| "step": 7870 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 0.9665801525115967, | |
| "learning_rate": 8.326864369821195e-05, | |
| "loss": 1.2266, | |
| "step": 7880 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "grad_norm": 1.0917197465896606, | |
| "learning_rate": 8.324683820322721e-05, | |
| "loss": 1.2457, | |
| "step": 7890 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.1263692378997803, | |
| "learning_rate": 8.322503270824248e-05, | |
| "loss": 1.2312, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.9168413877487183, | |
| "learning_rate": 8.320322721325774e-05, | |
| "loss": 1.223, | |
| "step": 7910 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.9771096706390381, | |
| "learning_rate": 8.318142171827302e-05, | |
| "loss": 1.2219, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 0.9901739358901978, | |
| "learning_rate": 8.315961622328828e-05, | |
| "loss": 1.2405, | |
| "step": 7930 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 1.004320502281189, | |
| "learning_rate": 8.313781072830353e-05, | |
| "loss": 1.2584, | |
| "step": 7940 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 0.897678554058075, | |
| "learning_rate": 8.31160052333188e-05, | |
| "loss": 1.2359, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 0.9914141893386841, | |
| "learning_rate": 8.309419973833407e-05, | |
| "loss": 1.2269, | |
| "step": 7960 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.1783164739608765, | |
| "learning_rate": 8.307239424334933e-05, | |
| "loss": 1.2208, | |
| "step": 7970 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 1.0260601043701172, | |
| "learning_rate": 8.30505887483646e-05, | |
| "loss": 1.2206, | |
| "step": 7980 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "grad_norm": 0.9606086015701294, | |
| "learning_rate": 8.302878325337986e-05, | |
| "loss": 1.246, | |
| "step": 7990 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 1.0758907794952393, | |
| "learning_rate": 8.300697775839512e-05, | |
| "loss": 1.2386, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 0.9541261792182922, | |
| "learning_rate": 8.298517226341038e-05, | |
| "loss": 1.2554, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 1.130035161972046, | |
| "learning_rate": 8.296336676842565e-05, | |
| "loss": 1.2292, | |
| "step": 8020 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "grad_norm": 0.9219099879264832, | |
| "learning_rate": 8.294156127344091e-05, | |
| "loss": 1.2486, | |
| "step": 8030 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.9194048643112183, | |
| "learning_rate": 8.291975577845617e-05, | |
| "loss": 1.2065, | |
| "step": 8040 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.0724278688430786, | |
| "learning_rate": 8.289795028347144e-05, | |
| "loss": 1.232, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.0829250812530518, | |
| "learning_rate": 8.28761447884867e-05, | |
| "loss": 1.2374, | |
| "step": 8060 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 0.9441924691200256, | |
| "learning_rate": 8.285433929350198e-05, | |
| "loss": 1.2248, | |
| "step": 8070 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "grad_norm": 1.0257307291030884, | |
| "learning_rate": 8.283253379851722e-05, | |
| "loss": 1.2356, | |
| "step": 8080 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 0.8646122813224792, | |
| "learning_rate": 8.281072830353249e-05, | |
| "loss": 1.2497, | |
| "step": 8090 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.100232481956482, | |
| "learning_rate": 8.278892280854775e-05, | |
| "loss": 1.2365, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.0597792863845825, | |
| "learning_rate": 8.276711731356303e-05, | |
| "loss": 1.2403, | |
| "step": 8110 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.0088367462158203, | |
| "learning_rate": 8.274531181857829e-05, | |
| "loss": 1.2281, | |
| "step": 8120 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 1.0818982124328613, | |
| "learning_rate": 8.272350632359355e-05, | |
| "loss": 1.2427, | |
| "step": 8130 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 0.9281474947929382, | |
| "learning_rate": 8.27017008286088e-05, | |
| "loss": 1.2595, | |
| "step": 8140 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 0.9748603105545044, | |
| "learning_rate": 8.267989533362408e-05, | |
| "loss": 1.248, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 1.027099370956421, | |
| "learning_rate": 8.265808983863934e-05, | |
| "loss": 1.2313, | |
| "step": 8160 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "grad_norm": 1.0615408420562744, | |
| "learning_rate": 8.26362843436546e-05, | |
| "loss": 1.2549, | |
| "step": 8170 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.9190282225608826, | |
| "learning_rate": 8.261447884866987e-05, | |
| "loss": 1.2169, | |
| "step": 8180 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.9824718236923218, | |
| "learning_rate": 8.259267335368513e-05, | |
| "loss": 1.2505, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.9848600029945374, | |
| "learning_rate": 8.25708678587004e-05, | |
| "loss": 1.2414, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 0.9373934268951416, | |
| "learning_rate": 8.254906236371566e-05, | |
| "loss": 1.2294, | |
| "step": 8210 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "grad_norm": 1.0315806865692139, | |
| "learning_rate": 8.252725686873093e-05, | |
| "loss": 1.2259, | |
| "step": 8220 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.0654377937316895, | |
| "learning_rate": 8.250545137374618e-05, | |
| "loss": 1.249, | |
| "step": 8230 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 1.0188405513763428, | |
| "learning_rate": 8.248364587876145e-05, | |
| "loss": 1.2361, | |
| "step": 8240 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 0.9202408790588379, | |
| "learning_rate": 8.246184038377671e-05, | |
| "loss": 1.2344, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "grad_norm": 0.953535795211792, | |
| "learning_rate": 8.244003488879199e-05, | |
| "loss": 1.2439, | |
| "step": 8260 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.8910773992538452, | |
| "learning_rate": 8.241822939380725e-05, | |
| "loss": 1.2417, | |
| "step": 8270 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 1.0123344659805298, | |
| "learning_rate": 8.23964238988225e-05, | |
| "loss": 1.2437, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.9692454934120178, | |
| "learning_rate": 8.237461840383776e-05, | |
| "loss": 1.2414, | |
| "step": 8290 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 1.2110908031463623, | |
| "learning_rate": 8.235281290885304e-05, | |
| "loss": 1.2273, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 1.8, | |
| "grad_norm": 0.9399771690368652, | |
| "learning_rate": 8.23310074138683e-05, | |
| "loss": 1.2305, | |
| "step": 8310 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.0485948324203491, | |
| "learning_rate": 8.230920191888356e-05, | |
| "loss": 1.2243, | |
| "step": 8320 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.1290273666381836, | |
| "learning_rate": 8.228739642389883e-05, | |
| "loss": 1.2647, | |
| "step": 8330 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.113707184791565, | |
| "learning_rate": 8.226559092891409e-05, | |
| "loss": 1.2396, | |
| "step": 8340 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.161978006362915, | |
| "learning_rate": 8.224378543392935e-05, | |
| "loss": 1.2371, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 1.075077772140503, | |
| "learning_rate": 8.222197993894462e-05, | |
| "loss": 1.2326, | |
| "step": 8360 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 0.9579611420631409, | |
| "learning_rate": 8.220017444395988e-05, | |
| "loss": 1.2212, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.0509251356124878, | |
| "learning_rate": 8.217836894897514e-05, | |
| "loss": 1.2234, | |
| "step": 8380 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.02772057056427, | |
| "learning_rate": 8.21565634539904e-05, | |
| "loss": 1.212, | |
| "step": 8390 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "grad_norm": 1.0468199253082275, | |
| "learning_rate": 8.213475795900567e-05, | |
| "loss": 1.2328, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 0.9836091995239258, | |
| "learning_rate": 8.211295246402095e-05, | |
| "loss": 1.2368, | |
| "step": 8410 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.0582927465438843, | |
| "learning_rate": 8.209114696903621e-05, | |
| "loss": 1.2466, | |
| "step": 8420 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.039549708366394, | |
| "learning_rate": 8.206934147405146e-05, | |
| "loss": 1.2334, | |
| "step": 8430 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 0.9211510419845581, | |
| "learning_rate": 8.204753597906672e-05, | |
| "loss": 1.2205, | |
| "step": 8440 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "grad_norm": 1.019851565361023, | |
| "learning_rate": 8.2025730484082e-05, | |
| "loss": 1.2416, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.0609748363494873, | |
| "learning_rate": 8.200392498909726e-05, | |
| "loss": 1.2315, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 1.1158742904663086, | |
| "learning_rate": 8.198211949411252e-05, | |
| "loss": 1.2485, | |
| "step": 8470 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 0.8996789455413818, | |
| "learning_rate": 8.196031399912779e-05, | |
| "loss": 1.2309, | |
| "step": 8480 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "grad_norm": 0.9898722171783447, | |
| "learning_rate": 8.193850850414305e-05, | |
| "loss": 1.236, | |
| "step": 8490 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 1.1336474418640137, | |
| "learning_rate": 8.191670300915831e-05, | |
| "loss": 1.2375, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 0.9630258679389954, | |
| "learning_rate": 8.189489751417358e-05, | |
| "loss": 1.2462, | |
| "step": 8510 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 0.9450762271881104, | |
| "learning_rate": 8.187309201918884e-05, | |
| "loss": 1.221, | |
| "step": 8520 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 0.9798605442047119, | |
| "learning_rate": 8.18512865242041e-05, | |
| "loss": 1.2222, | |
| "step": 8530 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 0.9023801684379578, | |
| "learning_rate": 8.182948102921936e-05, | |
| "loss": 1.2193, | |
| "step": 8540 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 0.9918519258499146, | |
| "learning_rate": 8.180767553423463e-05, | |
| "loss": 1.2538, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.078640341758728, | |
| "learning_rate": 8.178587003924989e-05, | |
| "loss": 1.2239, | |
| "step": 8560 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.1001946926116943, | |
| "learning_rate": 8.176406454426517e-05, | |
| "loss": 1.2542, | |
| "step": 8570 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 0.9115540385246277, | |
| "learning_rate": 8.174225904928042e-05, | |
| "loss": 1.2231, | |
| "step": 8580 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "grad_norm": 1.0351630449295044, | |
| "learning_rate": 8.172045355429568e-05, | |
| "loss": 1.2328, | |
| "step": 8590 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.1193772554397583, | |
| "learning_rate": 8.169864805931094e-05, | |
| "loss": 1.2344, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 0.926569402217865, | |
| "learning_rate": 8.167684256432622e-05, | |
| "loss": 1.2318, | |
| "step": 8610 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.1995497941970825, | |
| "learning_rate": 8.165503706934148e-05, | |
| "loss": 1.2645, | |
| "step": 8620 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "grad_norm": 1.0718098878860474, | |
| "learning_rate": 8.163323157435673e-05, | |
| "loss": 1.2372, | |
| "step": 8630 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.0319968461990356, | |
| "learning_rate": 8.161142607937201e-05, | |
| "loss": 1.222, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.0868433713912964, | |
| "learning_rate": 8.158962058438727e-05, | |
| "loss": 1.2381, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.0332001447677612, | |
| "learning_rate": 8.156781508940253e-05, | |
| "loss": 1.208, | |
| "step": 8660 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 1.050507664680481, | |
| "learning_rate": 8.15460095944178e-05, | |
| "loss": 1.2276, | |
| "step": 8670 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "grad_norm": 0.9764347672462463, | |
| "learning_rate": 8.152420409943306e-05, | |
| "loss": 1.2289, | |
| "step": 8680 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 0.9142500758171082, | |
| "learning_rate": 8.150239860444832e-05, | |
| "loss": 1.2109, | |
| "step": 8690 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.028554916381836, | |
| "learning_rate": 8.148059310946359e-05, | |
| "loss": 1.2245, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.09976327419281, | |
| "learning_rate": 8.145878761447885e-05, | |
| "loss": 1.2387, | |
| "step": 8710 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "grad_norm": 1.0482656955718994, | |
| "learning_rate": 8.143698211949413e-05, | |
| "loss": 1.2225, | |
| "step": 8720 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 0.953663170337677, | |
| "learning_rate": 8.141517662450938e-05, | |
| "loss": 1.2605, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.0766589641571045, | |
| "learning_rate": 8.139337112952464e-05, | |
| "loss": 1.2348, | |
| "step": 8740 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.1204911470413208, | |
| "learning_rate": 8.13715656345399e-05, | |
| "loss": 1.2248, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.0836663246154785, | |
| "learning_rate": 8.134976013955518e-05, | |
| "loss": 1.2463, | |
| "step": 8760 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 1.0038310289382935, | |
| "learning_rate": 8.132795464457044e-05, | |
| "loss": 1.2415, | |
| "step": 8770 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 0.9727823138237, | |
| "learning_rate": 8.130614914958569e-05, | |
| "loss": 1.2291, | |
| "step": 8780 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 0.9913771748542786, | |
| "learning_rate": 8.128434365460095e-05, | |
| "loss": 1.2374, | |
| "step": 8790 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.0077624320983887, | |
| "learning_rate": 8.126253815961623e-05, | |
| "loss": 1.2126, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 0.9802316427230835, | |
| "learning_rate": 8.12407326646315e-05, | |
| "loss": 1.2084, | |
| "step": 8810 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "grad_norm": 1.1375538110733032, | |
| "learning_rate": 8.121892716964676e-05, | |
| "loss": 1.231, | |
| "step": 8820 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 1.0553092956542969, | |
| "learning_rate": 8.119712167466202e-05, | |
| "loss": 1.2132, | |
| "step": 8830 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.9583929777145386, | |
| "learning_rate": 8.117531617967728e-05, | |
| "loss": 1.2492, | |
| "step": 8840 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 1.1101999282836914, | |
| "learning_rate": 8.115351068469255e-05, | |
| "loss": 1.2381, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 0.9837037920951843, | |
| "learning_rate": 8.113170518970781e-05, | |
| "loss": 1.2122, | |
| "step": 8860 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 0.9561728835105896, | |
| "learning_rate": 8.110989969472309e-05, | |
| "loss": 1.2371, | |
| "step": 8870 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.0024539232254028, | |
| "learning_rate": 8.108809419973834e-05, | |
| "loss": 1.2421, | |
| "step": 8880 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 0.8823496103286743, | |
| "learning_rate": 8.10662887047536e-05, | |
| "loss": 1.2221, | |
| "step": 8890 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 0.9598950743675232, | |
| "learning_rate": 8.104448320976886e-05, | |
| "loss": 1.2043, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "grad_norm": 1.165281057357788, | |
| "learning_rate": 8.102267771478414e-05, | |
| "loss": 1.2261, | |
| "step": 8910 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 0.9209827184677124, | |
| "learning_rate": 8.10008722197994e-05, | |
| "loss": 1.2196, | |
| "step": 8920 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.023848056793213, | |
| "learning_rate": 8.097906672481465e-05, | |
| "loss": 1.2393, | |
| "step": 8930 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 1.0043749809265137, | |
| "learning_rate": 8.095726122982991e-05, | |
| "loss": 1.2362, | |
| "step": 8940 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 0.9257699251174927, | |
| "learning_rate": 8.093545573484519e-05, | |
| "loss": 1.2258, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.1696765422821045, | |
| "learning_rate": 8.091365023986045e-05, | |
| "loss": 1.2459, | |
| "step": 8960 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 0.9257934093475342, | |
| "learning_rate": 8.089184474487572e-05, | |
| "loss": 1.2492, | |
| "step": 8970 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.1503798961639404, | |
| "learning_rate": 8.087003924989097e-05, | |
| "loss": 1.2311, | |
| "step": 8980 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 1.1405220031738281, | |
| "learning_rate": 8.084823375490624e-05, | |
| "loss": 1.2409, | |
| "step": 8990 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "grad_norm": 0.976625382900238, | |
| "learning_rate": 8.08264282599215e-05, | |
| "loss": 1.2266, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.9233745336532593, | |
| "learning_rate": 8.080462276493677e-05, | |
| "loss": 1.2261, | |
| "step": 9010 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.0994141101837158, | |
| "learning_rate": 8.078281726995203e-05, | |
| "loss": 1.2352, | |
| "step": 9020 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.9999457001686096, | |
| "learning_rate": 8.07610117749673e-05, | |
| "loss": 1.2238, | |
| "step": 9030 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 1.0037119388580322, | |
| "learning_rate": 8.073920627998256e-05, | |
| "loss": 1.2439, | |
| "step": 9040 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "grad_norm": 0.9493910670280457, | |
| "learning_rate": 8.071740078499782e-05, | |
| "loss": 1.2253, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.099271535873413, | |
| "learning_rate": 8.069559529001308e-05, | |
| "loss": 1.211, | |
| "step": 9060 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 0.9729533791542053, | |
| "learning_rate": 8.067378979502836e-05, | |
| "loss": 1.2257, | |
| "step": 9070 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 1.112057089805603, | |
| "learning_rate": 8.065198430004361e-05, | |
| "loss": 1.2092, | |
| "step": 9080 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "grad_norm": 0.9645751714706421, | |
| "learning_rate": 8.063017880505887e-05, | |
| "loss": 1.2123, | |
| "step": 9090 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.0263340473175049, | |
| "learning_rate": 8.060837331007415e-05, | |
| "loss": 1.2033, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.1131114959716797, | |
| "learning_rate": 8.058656781508941e-05, | |
| "loss": 1.2303, | |
| "step": 9110 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 1.1425633430480957, | |
| "learning_rate": 8.056476232010468e-05, | |
| "loss": 1.2166, | |
| "step": 9120 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 0.9223284721374512, | |
| "learning_rate": 8.054295682511992e-05, | |
| "loss": 1.2588, | |
| "step": 9130 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "grad_norm": 0.9477842450141907, | |
| "learning_rate": 8.05211513301352e-05, | |
| "loss": 1.2028, | |
| "step": 9140 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.0649006366729736, | |
| "learning_rate": 8.049934583515046e-05, | |
| "loss": 1.2238, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.0043710470199585, | |
| "learning_rate": 8.047754034016573e-05, | |
| "loss": 1.2301, | |
| "step": 9160 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 1.0217610597610474, | |
| "learning_rate": 8.045573484518099e-05, | |
| "loss": 1.2406, | |
| "step": 9170 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 0.9688403606414795, | |
| "learning_rate": 8.043392935019625e-05, | |
| "loss": 1.2364, | |
| "step": 9180 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.095987319946289, | |
| "learning_rate": 8.041212385521152e-05, | |
| "loss": 1.241, | |
| "step": 9190 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9398607611656189, | |
| "learning_rate": 8.039031836022678e-05, | |
| "loss": 1.226, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9815939664840698, | |
| "learning_rate": 8.036851286524204e-05, | |
| "loss": 1.2181, | |
| "step": 9210 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.2817823886871338, | |
| "eval_runtime": 1495.0675, | |
| "eval_samples_per_second": 258.75, | |
| "eval_steps_per_second": 4.043, | |
| "step": 9212 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.0157184600830078, | |
| "learning_rate": 8.034670737025732e-05, | |
| "loss": 1.2142, | |
| "step": 9220 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 0.9625092148780823, | |
| "learning_rate": 8.032490187527257e-05, | |
| "loss": 1.2089, | |
| "step": 9230 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.9196017384529114, | |
| "learning_rate": 8.030309638028783e-05, | |
| "loss": 1.2335, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.9308544397354126, | |
| "learning_rate": 8.02812908853031e-05, | |
| "loss": 1.2163, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 1.2144242525100708, | |
| "learning_rate": 8.025948539031837e-05, | |
| "loss": 1.2008, | |
| "step": 9260 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.9780566692352295, | |
| "learning_rate": 8.023767989533363e-05, | |
| "loss": 1.1919, | |
| "step": 9270 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "grad_norm": 0.9934610724449158, | |
| "learning_rate": 8.021587440034888e-05, | |
| "loss": 1.1813, | |
| "step": 9280 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 1.1047219038009644, | |
| "learning_rate": 8.019406890536415e-05, | |
| "loss": 1.1887, | |
| "step": 9290 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 1.0617597103118896, | |
| "learning_rate": 8.017226341037942e-05, | |
| "loss": 1.2142, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.9656373858451843, | |
| "learning_rate": 8.015045791539469e-05, | |
| "loss": 1.1962, | |
| "step": 9310 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "grad_norm": 0.9934256076812744, | |
| "learning_rate": 8.012865242040995e-05, | |
| "loss": 1.2093, | |
| "step": 9320 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.0616453886032104, | |
| "learning_rate": 8.010684692542521e-05, | |
| "loss": 1.227, | |
| "step": 9330 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.0761624574661255, | |
| "learning_rate": 8.008504143044048e-05, | |
| "loss": 1.2126, | |
| "step": 9340 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.06252920627594, | |
| "learning_rate": 8.006323593545574e-05, | |
| "loss": 1.1966, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 0.9828883409500122, | |
| "learning_rate": 8.0041430440471e-05, | |
| "loss": 1.2032, | |
| "step": 9360 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 1.0415362119674683, | |
| "learning_rate": 8.001962494548628e-05, | |
| "loss": 1.2069, | |
| "step": 9370 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.9932116866111755, | |
| "learning_rate": 7.999781945050153e-05, | |
| "loss": 1.2099, | |
| "step": 9380 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 1.0453740358352661, | |
| "learning_rate": 7.997601395551679e-05, | |
| "loss": 1.1908, | |
| "step": 9390 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.9478277564048767, | |
| "learning_rate": 7.995420846053205e-05, | |
| "loss": 1.2016, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "grad_norm": 0.9447776079177856, | |
| "learning_rate": 7.993240296554733e-05, | |
| "loss": 1.2163, | |
| "step": 9410 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.9693462252616882, | |
| "learning_rate": 7.991059747056259e-05, | |
| "loss": 1.1871, | |
| "step": 9420 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 1.2381738424301147, | |
| "learning_rate": 7.988879197557784e-05, | |
| "loss": 1.214, | |
| "step": 9430 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.9551769495010376, | |
| "learning_rate": 7.98669864805931e-05, | |
| "loss": 1.2026, | |
| "step": 9440 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 1.009376883506775, | |
| "learning_rate": 7.984518098560838e-05, | |
| "loss": 1.1991, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "grad_norm": 0.9546257257461548, | |
| "learning_rate": 7.982337549062365e-05, | |
| "loss": 1.2164, | |
| "step": 9460 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 0.9941860437393188, | |
| "learning_rate": 7.980156999563891e-05, | |
| "loss": 1.2111, | |
| "step": 9470 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.211512565612793, | |
| "learning_rate": 7.977976450065416e-05, | |
| "loss": 1.1795, | |
| "step": 9480 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.004779577255249, | |
| "learning_rate": 7.975795900566943e-05, | |
| "loss": 1.2049, | |
| "step": 9490 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.0823005437850952, | |
| "learning_rate": 7.97361535106847e-05, | |
| "loss": 1.1886, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "grad_norm": 1.0418225526809692, | |
| "learning_rate": 7.971434801569996e-05, | |
| "loss": 1.2105, | |
| "step": 9510 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 1.1182845830917358, | |
| "learning_rate": 7.969254252071522e-05, | |
| "loss": 1.1897, | |
| "step": 9520 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 0.946642279624939, | |
| "learning_rate": 7.967073702573049e-05, | |
| "loss": 1.199, | |
| "step": 9530 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 1.1157629489898682, | |
| "learning_rate": 7.964893153074575e-05, | |
| "loss": 1.2294, | |
| "step": 9540 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "grad_norm": 1.053207516670227, | |
| "learning_rate": 7.962712603576101e-05, | |
| "loss": 1.2412, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.9756922721862793, | |
| "learning_rate": 7.960532054077629e-05, | |
| "loss": 1.1976, | |
| "step": 9560 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 1.049428105354309, | |
| "learning_rate": 7.958351504579155e-05, | |
| "loss": 1.2254, | |
| "step": 9570 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 0.9671922922134399, | |
| "learning_rate": 7.95617095508068e-05, | |
| "loss": 1.1905, | |
| "step": 9580 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 1.0883835554122925, | |
| "learning_rate": 7.953990405582206e-05, | |
| "loss": 1.2032, | |
| "step": 9590 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 1.080729365348816, | |
| "learning_rate": 7.951809856083734e-05, | |
| "loss": 1.216, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 0.9762791395187378, | |
| "learning_rate": 7.94962930658526e-05, | |
| "loss": 1.2167, | |
| "step": 9610 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.1527519226074219, | |
| "learning_rate": 7.947448757086787e-05, | |
| "loss": 1.1682, | |
| "step": 9620 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.0505051612854004, | |
| "learning_rate": 7.945268207588312e-05, | |
| "loss": 1.211, | |
| "step": 9630 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.1166177988052368, | |
| "learning_rate": 7.94308765808984e-05, | |
| "loss": 1.1763, | |
| "step": 9640 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "grad_norm": 1.038783073425293, | |
| "learning_rate": 7.940907108591366e-05, | |
| "loss": 1.2113, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 1.0138919353485107, | |
| "learning_rate": 7.938726559092892e-05, | |
| "loss": 1.214, | |
| "step": 9660 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.8989730477333069, | |
| "learning_rate": 7.936546009594418e-05, | |
| "loss": 1.1975, | |
| "step": 9670 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.9866936206817627, | |
| "learning_rate": 7.934365460095945e-05, | |
| "loss": 1.2163, | |
| "step": 9680 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "grad_norm": 0.9352193474769592, | |
| "learning_rate": 7.932184910597471e-05, | |
| "loss": 1.1936, | |
| "step": 9690 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 0.9865077137947083, | |
| "learning_rate": 7.930004361098997e-05, | |
| "loss": 1.2279, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 0.9269611835479736, | |
| "learning_rate": 7.927823811600523e-05, | |
| "loss": 1.2089, | |
| "step": 9710 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 1.0865782499313354, | |
| "learning_rate": 7.92564326210205e-05, | |
| "loss": 1.2073, | |
| "step": 9720 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 1.077241063117981, | |
| "learning_rate": 7.923462712603576e-05, | |
| "loss": 1.1952, | |
| "step": 9730 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "grad_norm": 1.1019902229309082, | |
| "learning_rate": 7.921282163105102e-05, | |
| "loss": 1.1845, | |
| "step": 9740 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.1047565937042236, | |
| "learning_rate": 7.919101613606629e-05, | |
| "loss": 1.2115, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.038865327835083, | |
| "learning_rate": 7.916921064108156e-05, | |
| "loss": 1.1764, | |
| "step": 9760 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.039838194847107, | |
| "learning_rate": 7.914740514609683e-05, | |
| "loss": 1.2061, | |
| "step": 9770 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 1.1482833623886108, | |
| "learning_rate": 7.912559965111208e-05, | |
| "loss": 1.1819, | |
| "step": 9780 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 1.2092708349227905, | |
| "learning_rate": 7.910379415612735e-05, | |
| "loss": 1.2204, | |
| "step": 9790 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 0.9620797634124756, | |
| "learning_rate": 7.908198866114262e-05, | |
| "loss": 1.2282, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 0.9821200966835022, | |
| "learning_rate": 7.906018316615788e-05, | |
| "loss": 1.1928, | |
| "step": 9810 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 0.9970041513442993, | |
| "learning_rate": 7.903837767117314e-05, | |
| "loss": 1.2293, | |
| "step": 9820 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "grad_norm": 1.0370044708251953, | |
| "learning_rate": 7.90165721761884e-05, | |
| "loss": 1.2015, | |
| "step": 9830 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 0.9988645911216736, | |
| "learning_rate": 7.899476668120367e-05, | |
| "loss": 1.1827, | |
| "step": 9840 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 1.0234349966049194, | |
| "learning_rate": 7.897296118621893e-05, | |
| "loss": 1.2185, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 1.1477036476135254, | |
| "learning_rate": 7.89511556912342e-05, | |
| "loss": 1.2108, | |
| "step": 9860 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 1.1326051950454712, | |
| "learning_rate": 7.892935019624946e-05, | |
| "loss": 1.1785, | |
| "step": 9870 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "grad_norm": 1.003237009048462, | |
| "learning_rate": 7.890754470126472e-05, | |
| "loss": 1.2082, | |
| "step": 9880 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 1.0607051849365234, | |
| "learning_rate": 7.888573920627998e-05, | |
| "loss": 1.2112, | |
| "step": 9890 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 1.0867217779159546, | |
| "learning_rate": 7.886393371129525e-05, | |
| "loss": 1.1845, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 0.945563018321991, | |
| "learning_rate": 7.884212821631052e-05, | |
| "loss": 1.1925, | |
| "step": 9910 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "grad_norm": 1.0693022012710571, | |
| "learning_rate": 7.882032272132579e-05, | |
| "loss": 1.1956, | |
| "step": 9920 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.9993180632591248, | |
| "learning_rate": 7.879851722634103e-05, | |
| "loss": 1.1965, | |
| "step": 9930 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 1.010133147239685, | |
| "learning_rate": 7.87767117313563e-05, | |
| "loss": 1.2168, | |
| "step": 9940 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 1.0953561067581177, | |
| "learning_rate": 7.875490623637157e-05, | |
| "loss": 1.2114, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.9444001317024231, | |
| "learning_rate": 7.873310074138684e-05, | |
| "loss": 1.1988, | |
| "step": 9960 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "grad_norm": 0.9980970621109009, | |
| "learning_rate": 7.87112952464021e-05, | |
| "loss": 1.2275, | |
| "step": 9970 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 1.0584611892700195, | |
| "learning_rate": 7.868948975141735e-05, | |
| "loss": 1.2105, | |
| "step": 9980 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 1.1327629089355469, | |
| "learning_rate": 7.866768425643263e-05, | |
| "loss": 1.2022, | |
| "step": 9990 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 0.981350302696228, | |
| "learning_rate": 7.864587876144789e-05, | |
| "loss": 1.2151, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 1.1142750978469849, | |
| "learning_rate": 7.862407326646315e-05, | |
| "loss": 1.1931, | |
| "step": 10010 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 1.0601882934570312, | |
| "learning_rate": 7.860226777147842e-05, | |
| "loss": 1.2141, | |
| "step": 10020 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 0.9991333484649658, | |
| "learning_rate": 7.858046227649368e-05, | |
| "loss": 1.1921, | |
| "step": 10030 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 1.1021018028259277, | |
| "learning_rate": 7.855865678150894e-05, | |
| "loss": 1.2225, | |
| "step": 10040 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 1.0568020343780518, | |
| "learning_rate": 7.85368512865242e-05, | |
| "loss": 1.2427, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "grad_norm": 0.9811879992485046, | |
| "learning_rate": 7.851504579153948e-05, | |
| "loss": 1.1997, | |
| "step": 10060 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.0988446474075317, | |
| "learning_rate": 7.849324029655473e-05, | |
| "loss": 1.2156, | |
| "step": 10070 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.0393906831741333, | |
| "learning_rate": 7.847143480157e-05, | |
| "loss": 1.2258, | |
| "step": 10080 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.1017202138900757, | |
| "learning_rate": 7.844962930658526e-05, | |
| "loss": 1.2069, | |
| "step": 10090 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.1102749109268188, | |
| "learning_rate": 7.842782381160053e-05, | |
| "loss": 1.2256, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "grad_norm": 1.0270189046859741, | |
| "learning_rate": 7.84060183166158e-05, | |
| "loss": 1.2174, | |
| "step": 10110 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 1.0221537351608276, | |
| "learning_rate": 7.838421282163106e-05, | |
| "loss": 1.1968, | |
| "step": 10120 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.95604407787323, | |
| "learning_rate": 7.836240732664631e-05, | |
| "loss": 1.213, | |
| "step": 10130 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 0.9393739700317383, | |
| "learning_rate": 7.834060183166159e-05, | |
| "loss": 1.2182, | |
| "step": 10140 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "grad_norm": 1.014799952507019, | |
| "learning_rate": 7.831879633667685e-05, | |
| "loss": 1.2021, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 1.0287479162216187, | |
| "learning_rate": 7.829699084169211e-05, | |
| "loss": 1.2114, | |
| "step": 10160 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 1.0790306329727173, | |
| "learning_rate": 7.827736589620584e-05, | |
| "loss": 1.1874, | |
| "step": 10170 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 0.9588958621025085, | |
| "learning_rate": 7.82555604012211e-05, | |
| "loss": 1.2191, | |
| "step": 10180 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 0.9004745483398438, | |
| "learning_rate": 7.823375490623638e-05, | |
| "loss": 1.1933, | |
| "step": 10190 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "grad_norm": 1.0742331743240356, | |
| "learning_rate": 7.821194941125164e-05, | |
| "loss": 1.2128, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 1.072489857673645, | |
| "learning_rate": 7.81901439162669e-05, | |
| "loss": 1.2143, | |
| "step": 10210 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 0.9534905552864075, | |
| "learning_rate": 7.816833842128217e-05, | |
| "loss": 1.2206, | |
| "step": 10220 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 1.0694421529769897, | |
| "learning_rate": 7.814653292629743e-05, | |
| "loss": 1.2051, | |
| "step": 10230 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 0.9729447364807129, | |
| "learning_rate": 7.81247274313127e-05, | |
| "loss": 1.2234, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 1.0395437479019165, | |
| "learning_rate": 7.810292193632796e-05, | |
| "loss": 1.1977, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 0.999451756477356, | |
| "learning_rate": 7.808111644134322e-05, | |
| "loss": 1.2053, | |
| "step": 10260 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 1.1238023042678833, | |
| "learning_rate": 7.805931094635848e-05, | |
| "loss": 1.2295, | |
| "step": 10270 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 1.0689754486083984, | |
| "learning_rate": 7.803750545137375e-05, | |
| "loss": 1.2059, | |
| "step": 10280 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "grad_norm": 0.9754849672317505, | |
| "learning_rate": 7.801569995638901e-05, | |
| "loss": 1.206, | |
| "step": 10290 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.02662193775177, | |
| "learning_rate": 7.799389446140429e-05, | |
| "loss": 1.1967, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.1547129154205322, | |
| "learning_rate": 7.797208896641954e-05, | |
| "loss": 1.211, | |
| "step": 10310 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.9812795519828796, | |
| "learning_rate": 7.79502834714348e-05, | |
| "loss": 1.1928, | |
| "step": 10320 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.0706185102462769, | |
| "learning_rate": 7.792847797645006e-05, | |
| "loss": 1.1914, | |
| "step": 10330 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 1.0410836935043335, | |
| "learning_rate": 7.790667248146534e-05, | |
| "loss": 1.2002, | |
| "step": 10340 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.9746688008308411, | |
| "learning_rate": 7.78848669864806e-05, | |
| "loss": 1.1863, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.8778429627418518, | |
| "learning_rate": 7.786524204099433e-05, | |
| "loss": 1.2383, | |
| "step": 10360 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 0.969650149345398, | |
| "learning_rate": 7.78434365460096e-05, | |
| "loss": 1.177, | |
| "step": 10370 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "grad_norm": 1.015781283378601, | |
| "learning_rate": 7.782163105102486e-05, | |
| "loss": 1.1838, | |
| "step": 10380 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 0.8965770602226257, | |
| "learning_rate": 7.779982555604013e-05, | |
| "loss": 1.2175, | |
| "step": 10390 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 1.007692575454712, | |
| "learning_rate": 7.77780200610554e-05, | |
| "loss": 1.1978, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 0.9334578514099121, | |
| "learning_rate": 7.775621456607065e-05, | |
| "loss": 1.1887, | |
| "step": 10410 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 0.9570727348327637, | |
| "learning_rate": 7.773440907108591e-05, | |
| "loss": 1.211, | |
| "step": 10420 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 1.0146620273590088, | |
| "learning_rate": 7.771260357610119e-05, | |
| "loss": 1.2188, | |
| "step": 10430 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.0868462324142456, | |
| "learning_rate": 7.769079808111645e-05, | |
| "loss": 1.2147, | |
| "step": 10440 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.062110185623169, | |
| "learning_rate": 7.766899258613171e-05, | |
| "loss": 1.2172, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 0.950108528137207, | |
| "learning_rate": 7.764718709114697e-05, | |
| "loss": 1.2077, | |
| "step": 10460 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "grad_norm": 1.029308795928955, | |
| "learning_rate": 7.762538159616224e-05, | |
| "loss": 1.2112, | |
| "step": 10470 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 0.9809032678604126, | |
| "learning_rate": 7.76035761011775e-05, | |
| "loss": 1.2115, | |
| "step": 10480 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 1.0070390701293945, | |
| "learning_rate": 7.758177060619276e-05, | |
| "loss": 1.2032, | |
| "step": 10490 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 1.1221727132797241, | |
| "learning_rate": 7.755996511120803e-05, | |
| "loss": 1.2164, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 1.013219952583313, | |
| "learning_rate": 7.753815961622329e-05, | |
| "loss": 1.1912, | |
| "step": 10510 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "grad_norm": 1.0602985620498657, | |
| "learning_rate": 7.751635412123855e-05, | |
| "loss": 1.1607, | |
| "step": 10520 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 1.009325385093689, | |
| "learning_rate": 7.749454862625382e-05, | |
| "loss": 1.1943, | |
| "step": 10530 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 1.01610267162323, | |
| "learning_rate": 7.747274313126909e-05, | |
| "loss": 1.2036, | |
| "step": 10540 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 0.9865471720695496, | |
| "learning_rate": 7.745093763628436e-05, | |
| "loss": 1.1951, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 1.1565035581588745, | |
| "learning_rate": 7.74291321412996e-05, | |
| "loss": 1.2132, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 2.29, | |
| "grad_norm": 0.9530940651893616, | |
| "learning_rate": 7.740732664631487e-05, | |
| "loss": 1.191, | |
| "step": 10570 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 1.1055086851119995, | |
| "learning_rate": 7.738552115133014e-05, | |
| "loss": 1.2292, | |
| "step": 10580 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 1.0695475339889526, | |
| "learning_rate": 7.736371565634541e-05, | |
| "loss": 1.1937, | |
| "step": 10590 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 0.991439163684845, | |
| "learning_rate": 7.734191016136067e-05, | |
| "loss": 1.2117, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "grad_norm": 0.9743112921714783, | |
| "learning_rate": 7.732010466637592e-05, | |
| "loss": 1.2275, | |
| "step": 10610 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 1.030121922492981, | |
| "learning_rate": 7.72982991713912e-05, | |
| "loss": 1.1893, | |
| "step": 10620 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 1.0691959857940674, | |
| "learning_rate": 7.727649367640646e-05, | |
| "loss": 1.2044, | |
| "step": 10630 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 1.141326904296875, | |
| "learning_rate": 7.725468818142172e-05, | |
| "loss": 1.2208, | |
| "step": 10640 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 1.0179444551467896, | |
| "learning_rate": 7.723288268643699e-05, | |
| "loss": 1.1901, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 1.1256074905395508, | |
| "learning_rate": 7.721107719145225e-05, | |
| "loss": 1.2, | |
| "step": 10660 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0997061729431152, | |
| "learning_rate": 7.718927169646751e-05, | |
| "loss": 1.194, | |
| "step": 10670 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0382623672485352, | |
| "learning_rate": 7.716746620148277e-05, | |
| "loss": 1.2277, | |
| "step": 10680 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0295804738998413, | |
| "learning_rate": 7.714566070649805e-05, | |
| "loss": 1.1857, | |
| "step": 10690 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "grad_norm": 1.0594016313552856, | |
| "learning_rate": 7.71238552115133e-05, | |
| "loss": 1.1955, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.0921293497085571, | |
| "learning_rate": 7.710204971652856e-05, | |
| "loss": 1.1836, | |
| "step": 10710 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.0477246046066284, | |
| "learning_rate": 7.708024422154383e-05, | |
| "loss": 1.2023, | |
| "step": 10720 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.0246959924697876, | |
| "learning_rate": 7.70584387265591e-05, | |
| "loss": 1.222, | |
| "step": 10730 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.0640301704406738, | |
| "learning_rate": 7.703663323157437e-05, | |
| "loss": 1.1974, | |
| "step": 10740 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "grad_norm": 1.0652765035629272, | |
| "learning_rate": 7.701482773658963e-05, | |
| "loss": 1.1997, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 0.9220369458198547, | |
| "learning_rate": 7.699302224160488e-05, | |
| "loss": 1.212, | |
| "step": 10760 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 0.9531814455986023, | |
| "learning_rate": 7.697121674662016e-05, | |
| "loss": 1.1686, | |
| "step": 10770 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.1248044967651367, | |
| "learning_rate": 7.694941125163542e-05, | |
| "loss": 1.1971, | |
| "step": 10780 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.0232545137405396, | |
| "learning_rate": 7.692760575665068e-05, | |
| "loss": 1.194, | |
| "step": 10790 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "grad_norm": 1.0724860429763794, | |
| "learning_rate": 7.690580026166594e-05, | |
| "loss": 1.1936, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 1.036474347114563, | |
| "learning_rate": 7.688399476668121e-05, | |
| "loss": 1.2078, | |
| "step": 10810 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 1.0231555700302124, | |
| "learning_rate": 7.686218927169647e-05, | |
| "loss": 1.2056, | |
| "step": 10820 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 0.9879153370857239, | |
| "learning_rate": 7.684038377671173e-05, | |
| "loss": 1.2191, | |
| "step": 10830 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 1.0709577798843384, | |
| "learning_rate": 7.6818578281727e-05, | |
| "loss": 1.198, | |
| "step": 10840 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.0138386487960815, | |
| "learning_rate": 7.679677278674226e-05, | |
| "loss": 1.2284, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.0676188468933105, | |
| "learning_rate": 7.677496729175752e-05, | |
| "loss": 1.2004, | |
| "step": 10860 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.0372511148452759, | |
| "learning_rate": 7.675316179677279e-05, | |
| "loss": 1.167, | |
| "step": 10870 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.0466020107269287, | |
| "learning_rate": 7.673135630178805e-05, | |
| "loss": 1.1958, | |
| "step": 10880 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "grad_norm": 1.0521596670150757, | |
| "learning_rate": 7.670955080680333e-05, | |
| "loss": 1.2025, | |
| "step": 10890 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 0.9906710982322693, | |
| "learning_rate": 7.668774531181858e-05, | |
| "loss": 1.188, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 1.1713993549346924, | |
| "learning_rate": 7.666593981683384e-05, | |
| "loss": 1.1992, | |
| "step": 10910 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 1.009819507598877, | |
| "learning_rate": 7.664413432184911e-05, | |
| "loss": 1.191, | |
| "step": 10920 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "grad_norm": 1.0150312185287476, | |
| "learning_rate": 7.662232882686438e-05, | |
| "loss": 1.1951, | |
| "step": 10930 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.9645649790763855, | |
| "learning_rate": 7.660052333187964e-05, | |
| "loss": 1.1941, | |
| "step": 10940 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 1.0158168077468872, | |
| "learning_rate": 7.65787178368949e-05, | |
| "loss": 1.1911, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 1.0730938911437988, | |
| "learning_rate": 7.655691234191017e-05, | |
| "loss": 1.1885, | |
| "step": 10960 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 1.09099543094635, | |
| "learning_rate": 7.653510684692543e-05, | |
| "loss": 1.195, | |
| "step": 10970 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "grad_norm": 0.982562243938446, | |
| "learning_rate": 7.651330135194069e-05, | |
| "loss": 1.213, | |
| "step": 10980 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 1.0173815488815308, | |
| "learning_rate": 7.649149585695596e-05, | |
| "loss": 1.1931, | |
| "step": 10990 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 1.0644387006759644, | |
| "learning_rate": 7.646969036197122e-05, | |
| "loss": 1.2, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 1.0456851720809937, | |
| "learning_rate": 7.644788486698648e-05, | |
| "loss": 1.2267, | |
| "step": 11010 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 1.0387489795684814, | |
| "learning_rate": 7.642607937200175e-05, | |
| "loss": 1.1818, | |
| "step": 11020 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "grad_norm": 1.034599781036377, | |
| "learning_rate": 7.640427387701701e-05, | |
| "loss": 1.1972, | |
| "step": 11030 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.005964994430542, | |
| "learning_rate": 7.638246838203228e-05, | |
| "loss": 1.1882, | |
| "step": 11040 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.0190836191177368, | |
| "learning_rate": 7.636066288704753e-05, | |
| "loss": 1.1819, | |
| "step": 11050 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 1.010334849357605, | |
| "learning_rate": 7.63388573920628e-05, | |
| "loss": 1.2054, | |
| "step": 11060 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 0.986047089099884, | |
| "learning_rate": 7.631705189707806e-05, | |
| "loss": 1.1831, | |
| "step": 11070 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.0715646743774414, | |
| "learning_rate": 7.629524640209334e-05, | |
| "loss": 1.2143, | |
| "step": 11080 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 1.0573137998580933, | |
| "learning_rate": 7.62734409071086e-05, | |
| "loss": 1.1765, | |
| "step": 11090 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 0.9830726385116577, | |
| "learning_rate": 7.625163541212386e-05, | |
| "loss": 1.2195, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 0.9928615689277649, | |
| "learning_rate": 7.622982991713911e-05, | |
| "loss": 1.2052, | |
| "step": 11110 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "grad_norm": 0.916532039642334, | |
| "learning_rate": 7.620802442215439e-05, | |
| "loss": 1.2161, | |
| "step": 11120 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.024786353111267, | |
| "learning_rate": 7.618621892716965e-05, | |
| "loss": 1.1841, | |
| "step": 11130 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 0.9942538142204285, | |
| "learning_rate": 7.616441343218491e-05, | |
| "loss": 1.1969, | |
| "step": 11140 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 0.9637119770050049, | |
| "learning_rate": 7.614260793720018e-05, | |
| "loss": 1.1839, | |
| "step": 11150 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.0759954452514648, | |
| "learning_rate": 7.612080244221544e-05, | |
| "loss": 1.2087, | |
| "step": 11160 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "grad_norm": 1.1083338260650635, | |
| "learning_rate": 7.60989969472307e-05, | |
| "loss": 1.1637, | |
| "step": 11170 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 0.9280533790588379, | |
| "learning_rate": 7.607719145224597e-05, | |
| "loss": 1.186, | |
| "step": 11180 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 1.005856990814209, | |
| "learning_rate": 7.605538595726124e-05, | |
| "loss": 1.2096, | |
| "step": 11190 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 1.0294781923294067, | |
| "learning_rate": 7.603358046227649e-05, | |
| "loss": 1.1933, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "grad_norm": 1.129011631011963, | |
| "learning_rate": 7.601177496729176e-05, | |
| "loss": 1.1975, | |
| "step": 11210 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 0.9473848938941956, | |
| "learning_rate": 7.598996947230702e-05, | |
| "loss": 1.191, | |
| "step": 11220 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.0725443363189697, | |
| "learning_rate": 7.59681639773223e-05, | |
| "loss": 1.2069, | |
| "step": 11230 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.0083664655685425, | |
| "learning_rate": 7.594635848233756e-05, | |
| "loss": 1.2012, | |
| "step": 11240 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.0504008531570435, | |
| "learning_rate": 7.592455298735281e-05, | |
| "loss": 1.1897, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 1.02128267288208, | |
| "learning_rate": 7.590274749236807e-05, | |
| "loss": 1.193, | |
| "step": 11260 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.043655276298523, | |
| "learning_rate": 7.588094199738335e-05, | |
| "loss": 1.1984, | |
| "step": 11270 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.0775086879730225, | |
| "learning_rate": 7.585913650239861e-05, | |
| "loss": 1.1826, | |
| "step": 11280 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.0672656297683716, | |
| "learning_rate": 7.583733100741387e-05, | |
| "loss": 1.221, | |
| "step": 11290 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "grad_norm": 1.1105164289474487, | |
| "learning_rate": 7.581552551242914e-05, | |
| "loss": 1.2124, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 0.978393018245697, | |
| "learning_rate": 7.57937200174444e-05, | |
| "loss": 1.1749, | |
| "step": 11310 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 1.0011403560638428, | |
| "learning_rate": 7.577191452245966e-05, | |
| "loss": 1.1987, | |
| "step": 11320 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 0.9928615093231201, | |
| "learning_rate": 7.575010902747493e-05, | |
| "loss": 1.1916, | |
| "step": 11330 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 0.9368339776992798, | |
| "learning_rate": 7.572830353249019e-05, | |
| "loss": 1.2155, | |
| "step": 11340 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "grad_norm": 1.0176599025726318, | |
| "learning_rate": 7.570649803750545e-05, | |
| "loss": 1.2108, | |
| "step": 11350 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 0.956798255443573, | |
| "learning_rate": 7.568469254252072e-05, | |
| "loss": 1.1951, | |
| "step": 11360 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 0.9456045627593994, | |
| "learning_rate": 7.566288704753598e-05, | |
| "loss": 1.1939, | |
| "step": 11370 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 1.1099495887756348, | |
| "learning_rate": 7.564108155255125e-05, | |
| "loss": 1.2113, | |
| "step": 11380 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 1.0258333683013916, | |
| "learning_rate": 7.561927605756652e-05, | |
| "loss": 1.1723, | |
| "step": 11390 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "grad_norm": 1.0410195589065552, | |
| "learning_rate": 7.559747056258177e-05, | |
| "loss": 1.182, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.9671265482902527, | |
| "learning_rate": 7.557566506759703e-05, | |
| "loss": 1.2038, | |
| "step": 11410 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 0.9647257328033447, | |
| "learning_rate": 7.555385957261231e-05, | |
| "loss": 1.2078, | |
| "step": 11420 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.0497002601623535, | |
| "learning_rate": 7.553205407762757e-05, | |
| "loss": 1.2053, | |
| "step": 11430 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "grad_norm": 1.080557107925415, | |
| "learning_rate": 7.551024858264283e-05, | |
| "loss": 1.1925, | |
| "step": 11440 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 0.967833936214447, | |
| "learning_rate": 7.54884430876581e-05, | |
| "loss": 1.2106, | |
| "step": 11450 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.1252259016036987, | |
| "learning_rate": 7.546663759267336e-05, | |
| "loss": 1.2035, | |
| "step": 11460 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.021498203277588, | |
| "learning_rate": 7.544483209768862e-05, | |
| "loss": 1.1748, | |
| "step": 11470 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 1.1426560878753662, | |
| "learning_rate": 7.542302660270389e-05, | |
| "loss": 1.1916, | |
| "step": 11480 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 0.9883751273155212, | |
| "learning_rate": 7.540122110771915e-05, | |
| "loss": 1.1808, | |
| "step": 11490 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.9893055558204651, | |
| "learning_rate": 7.537941561273441e-05, | |
| "loss": 1.1961, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 1.038801908493042, | |
| "learning_rate": 7.535761011774967e-05, | |
| "loss": 1.1813, | |
| "step": 11510 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.9812270998954773, | |
| "learning_rate": 7.533580462276494e-05, | |
| "loss": 1.1873, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 1.0793439149856567, | |
| "learning_rate": 7.53139991277802e-05, | |
| "loss": 1.1858, | |
| "step": 11530 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.0743041038513184, | |
| "learning_rate": 7.529219363279548e-05, | |
| "loss": 1.1788, | |
| "step": 11540 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.1196831464767456, | |
| "learning_rate": 7.527038813781073e-05, | |
| "loss": 1.2059, | |
| "step": 11550 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.0126169919967651, | |
| "learning_rate": 7.524858264282599e-05, | |
| "loss": 1.2101, | |
| "step": 11560 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 1.2069376707077026, | |
| "learning_rate": 7.522677714784125e-05, | |
| "loss": 1.1964, | |
| "step": 11570 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "grad_norm": 0.9865954518318176, | |
| "learning_rate": 7.520497165285653e-05, | |
| "loss": 1.1966, | |
| "step": 11580 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.9862752556800842, | |
| "learning_rate": 7.518316615787179e-05, | |
| "loss": 1.1954, | |
| "step": 11590 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 1.093674659729004, | |
| "learning_rate": 7.516136066288704e-05, | |
| "loss": 1.1931, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 1.0402370691299438, | |
| "learning_rate": 7.513955516790232e-05, | |
| "loss": 1.1834, | |
| "step": 11610 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 0.9660056233406067, | |
| "learning_rate": 7.511774967291758e-05, | |
| "loss": 1.1978, | |
| "step": 11620 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "grad_norm": 1.1045291423797607, | |
| "learning_rate": 7.509594417793284e-05, | |
| "loss": 1.1789, | |
| "step": 11630 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 1.1806862354278564, | |
| "learning_rate": 7.507413868294811e-05, | |
| "loss": 1.1849, | |
| "step": 11640 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 1.0600950717926025, | |
| "learning_rate": 7.505233318796337e-05, | |
| "loss": 1.1863, | |
| "step": 11650 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 1.2518783807754517, | |
| "learning_rate": 7.503052769297863e-05, | |
| "loss": 1.1911, | |
| "step": 11660 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 1.0559264421463013, | |
| "learning_rate": 7.50087221979939e-05, | |
| "loss": 1.2106, | |
| "step": 11670 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 0.9558138847351074, | |
| "learning_rate": 7.498691670300916e-05, | |
| "loss": 1.1719, | |
| "step": 11680 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 1.0867066383361816, | |
| "learning_rate": 7.496511120802444e-05, | |
| "loss": 1.2209, | |
| "step": 11690 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 0.9424611926078796, | |
| "learning_rate": 7.494330571303969e-05, | |
| "loss": 1.1812, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 1.04227614402771, | |
| "learning_rate": 7.492150021805495e-05, | |
| "loss": 1.204, | |
| "step": 11710 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "grad_norm": 0.9230485558509827, | |
| "learning_rate": 7.489969472307021e-05, | |
| "loss": 1.1923, | |
| "step": 11720 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 1.079827070236206, | |
| "learning_rate": 7.487788922808549e-05, | |
| "loss": 1.1633, | |
| "step": 11730 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 1.0158615112304688, | |
| "learning_rate": 7.485608373310075e-05, | |
| "loss": 1.1828, | |
| "step": 11740 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 1.0298587083816528, | |
| "learning_rate": 7.4834278238116e-05, | |
| "loss": 1.2046, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "grad_norm": 1.1021103858947754, | |
| "learning_rate": 7.481247274313126e-05, | |
| "loss": 1.2369, | |
| "step": 11760 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 1.0776439905166626, | |
| "learning_rate": 7.479066724814654e-05, | |
| "loss": 1.1884, | |
| "step": 11770 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 1.0745654106140137, | |
| "learning_rate": 7.47688617531618e-05, | |
| "loss": 1.1915, | |
| "step": 11780 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.9988030791282654, | |
| "learning_rate": 7.474705625817707e-05, | |
| "loss": 1.1783, | |
| "step": 11790 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 0.9837521910667419, | |
| "learning_rate": 7.472525076319233e-05, | |
| "loss": 1.1859, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 1.076101541519165, | |
| "learning_rate": 7.470344526820759e-05, | |
| "loss": 1.194, | |
| "step": 11810 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.0141769647598267, | |
| "learning_rate": 7.468163977322286e-05, | |
| "loss": 1.1893, | |
| "step": 11820 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 0.9962597489356995, | |
| "learning_rate": 7.465983427823812e-05, | |
| "loss": 1.2143, | |
| "step": 11830 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.0923272371292114, | |
| "learning_rate": 7.46380287832534e-05, | |
| "loss": 1.184, | |
| "step": 11840 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.1431857347488403, | |
| "learning_rate": 7.461622328826864e-05, | |
| "loss": 1.1926, | |
| "step": 11850 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "grad_norm": 1.0489574670791626, | |
| "learning_rate": 7.459441779328391e-05, | |
| "loss": 1.1584, | |
| "step": 11860 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.049176812171936, | |
| "learning_rate": 7.457261229829917e-05, | |
| "loss": 1.2145, | |
| "step": 11870 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.0617070198059082, | |
| "learning_rate": 7.455080680331445e-05, | |
| "loss": 1.1821, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.1978720426559448, | |
| "learning_rate": 7.452900130832971e-05, | |
| "loss": 1.1832, | |
| "step": 11890 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 1.0322489738464355, | |
| "learning_rate": 7.450719581334496e-05, | |
| "loss": 1.1978, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.0497206449508667, | |
| "learning_rate": 7.448539031836022e-05, | |
| "loss": 1.1771, | |
| "step": 11910 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.0136041641235352, | |
| "learning_rate": 7.44635848233755e-05, | |
| "loss": 1.198, | |
| "step": 11920 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.0500036478042603, | |
| "learning_rate": 7.444177932839076e-05, | |
| "loss": 1.2019, | |
| "step": 11930 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.0009404420852661, | |
| "learning_rate": 7.441997383340603e-05, | |
| "loss": 1.197, | |
| "step": 11940 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "grad_norm": 1.1604543924331665, | |
| "learning_rate": 7.439816833842127e-05, | |
| "loss": 1.1921, | |
| "step": 11950 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.0473634004592896, | |
| "learning_rate": 7.437636284343655e-05, | |
| "loss": 1.1718, | |
| "step": 11960 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.0517455339431763, | |
| "learning_rate": 7.435455734845181e-05, | |
| "loss": 1.1721, | |
| "step": 11970 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.0030772686004639, | |
| "learning_rate": 7.433275185346708e-05, | |
| "loss": 1.1942, | |
| "step": 11980 | |
| }, | |
| { | |
| "epoch": 2.6, | |
| "grad_norm": 1.067175269126892, | |
| "learning_rate": 7.431094635848234e-05, | |
| "loss": 1.2015, | |
| "step": 11990 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 1.0570900440216064, | |
| "learning_rate": 7.42891408634976e-05, | |
| "loss": 1.1715, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 1.0768860578536987, | |
| "learning_rate": 7.426733536851287e-05, | |
| "loss": 1.2118, | |
| "step": 12010 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 0.9864534139633179, | |
| "learning_rate": 7.424552987352813e-05, | |
| "loss": 1.211, | |
| "step": 12020 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 0.9961116909980774, | |
| "learning_rate": 7.422372437854339e-05, | |
| "loss": 1.1726, | |
| "step": 12030 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "grad_norm": 1.149584174156189, | |
| "learning_rate": 7.420191888355867e-05, | |
| "loss": 1.2015, | |
| "step": 12040 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 0.9385210275650024, | |
| "learning_rate": 7.418011338857392e-05, | |
| "loss": 1.1853, | |
| "step": 12050 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 0.9972238540649414, | |
| "learning_rate": 7.415830789358918e-05, | |
| "loss": 1.1862, | |
| "step": 12060 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 1.1037793159484863, | |
| "learning_rate": 7.413650239860446e-05, | |
| "loss": 1.2191, | |
| "step": 12070 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 1.082542896270752, | |
| "learning_rate": 7.411469690361972e-05, | |
| "loss": 1.2079, | |
| "step": 12080 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 1.103800892829895, | |
| "learning_rate": 7.409289140863498e-05, | |
| "loss": 1.2069, | |
| "step": 12090 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.1348109245300293, | |
| "learning_rate": 7.407108591365023e-05, | |
| "loss": 1.1853, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.0272557735443115, | |
| "learning_rate": 7.404928041866551e-05, | |
| "loss": 1.206, | |
| "step": 12110 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 1.06856369972229, | |
| "learning_rate": 7.402747492368077e-05, | |
| "loss": 1.2077, | |
| "step": 12120 | |
| }, | |
| { | |
| "epoch": 2.63, | |
| "grad_norm": 0.9664187431335449, | |
| "learning_rate": 7.400566942869604e-05, | |
| "loss": 1.2252, | |
| "step": 12130 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 1.0753014087677002, | |
| "learning_rate": 7.39838639337113e-05, | |
| "loss": 1.2033, | |
| "step": 12140 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 1.1803292036056519, | |
| "learning_rate": 7.396205843872656e-05, | |
| "loss": 1.1944, | |
| "step": 12150 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 0.9899237155914307, | |
| "learning_rate": 7.394025294374183e-05, | |
| "loss": 1.1768, | |
| "step": 12160 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 1.0693211555480957, | |
| "learning_rate": 7.391844744875709e-05, | |
| "loss": 1.1957, | |
| "step": 12170 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "grad_norm": 1.0212500095367432, | |
| "learning_rate": 7.389664195377235e-05, | |
| "loss": 1.1807, | |
| "step": 12180 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 0.9626917839050293, | |
| "learning_rate": 7.387483645878763e-05, | |
| "loss": 1.2019, | |
| "step": 12190 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 1.0324492454528809, | |
| "learning_rate": 7.385303096380288e-05, | |
| "loss": 1.1787, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 1.0183689594268799, | |
| "learning_rate": 7.383122546881814e-05, | |
| "loss": 1.1718, | |
| "step": 12210 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "grad_norm": 1.03179132938385, | |
| "learning_rate": 7.38094199738334e-05, | |
| "loss": 1.1684, | |
| "step": 12220 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.0151221752166748, | |
| "learning_rate": 7.378761447884868e-05, | |
| "loss": 1.1754, | |
| "step": 12230 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.0675002336502075, | |
| "learning_rate": 7.376580898386394e-05, | |
| "loss": 1.1964, | |
| "step": 12240 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 0.9424752593040466, | |
| "learning_rate": 7.374400348887919e-05, | |
| "loss": 1.1994, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.0181151628494263, | |
| "learning_rate": 7.372219799389446e-05, | |
| "loss": 1.1943, | |
| "step": 12260 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "grad_norm": 1.0865308046340942, | |
| "learning_rate": 7.370039249890973e-05, | |
| "loss": 1.1703, | |
| "step": 12270 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.043016791343689, | |
| "learning_rate": 7.3678587003925e-05, | |
| "loss": 1.1813, | |
| "step": 12280 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.060164213180542, | |
| "learning_rate": 7.365678150894026e-05, | |
| "loss": 1.1769, | |
| "step": 12290 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.0264476537704468, | |
| "learning_rate": 7.363497601395552e-05, | |
| "loss": 1.1895, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.0359675884246826, | |
| "learning_rate": 7.361317051897078e-05, | |
| "loss": 1.1773, | |
| "step": 12310 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 1.0558348894119263, | |
| "learning_rate": 7.359136502398605e-05, | |
| "loss": 1.2011, | |
| "step": 12320 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 1.0487242937088013, | |
| "learning_rate": 7.356955952900131e-05, | |
| "loss": 1.2145, | |
| "step": 12330 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 1.0390251874923706, | |
| "learning_rate": 7.354775403401657e-05, | |
| "loss": 1.1771, | |
| "step": 12340 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.9608905911445618, | |
| "learning_rate": 7.352594853903184e-05, | |
| "loss": 1.1988, | |
| "step": 12350 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "grad_norm": 0.9924561977386475, | |
| "learning_rate": 7.35041430440471e-05, | |
| "loss": 1.2049, | |
| "step": 12360 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 0.9115813970565796, | |
| "learning_rate": 7.348233754906236e-05, | |
| "loss": 1.185, | |
| "step": 12370 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 0.9227597713470459, | |
| "learning_rate": 7.346053205407764e-05, | |
| "loss": 1.1964, | |
| "step": 12380 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 1.1192283630371094, | |
| "learning_rate": 7.34387265590929e-05, | |
| "loss": 1.1927, | |
| "step": 12390 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 0.9770265817642212, | |
| "learning_rate": 7.341692106410815e-05, | |
| "loss": 1.197, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "grad_norm": 1.0701338052749634, | |
| "learning_rate": 7.339511556912341e-05, | |
| "loss": 1.1834, | |
| "step": 12410 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.0348602533340454, | |
| "learning_rate": 7.337331007413869e-05, | |
| "loss": 1.2115, | |
| "step": 12420 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.0927150249481201, | |
| "learning_rate": 7.335150457915395e-05, | |
| "loss": 1.2032, | |
| "step": 12430 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 1.0548428297042847, | |
| "learning_rate": 7.332969908416922e-05, | |
| "loss": 1.1962, | |
| "step": 12440 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "grad_norm": 0.9672625064849854, | |
| "learning_rate": 7.330789358918447e-05, | |
| "loss": 1.1761, | |
| "step": 12450 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 0.9257100820541382, | |
| "learning_rate": 7.328608809419974e-05, | |
| "loss": 1.2007, | |
| "step": 12460 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 1.0286579132080078, | |
| "learning_rate": 7.3264282599215e-05, | |
| "loss": 1.1988, | |
| "step": 12470 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 1.153806447982788, | |
| "learning_rate": 7.324247710423027e-05, | |
| "loss": 1.207, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 0.9337689876556396, | |
| "learning_rate": 7.322067160924553e-05, | |
| "loss": 1.2006, | |
| "step": 12490 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 0.9721220135688782, | |
| "learning_rate": 7.31988661142608e-05, | |
| "loss": 1.2014, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.158456802368164, | |
| "learning_rate": 7.317706061927606e-05, | |
| "loss": 1.2074, | |
| "step": 12510 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.0969914197921753, | |
| "learning_rate": 7.315525512429132e-05, | |
| "loss": 1.207, | |
| "step": 12520 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 0.9585858583450317, | |
| "learning_rate": 7.31334496293066e-05, | |
| "loss": 1.1783, | |
| "step": 12530 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.0447596311569214, | |
| "learning_rate": 7.311164413432186e-05, | |
| "loss": 1.1662, | |
| "step": 12540 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "grad_norm": 1.0252220630645752, | |
| "learning_rate": 7.308983863933711e-05, | |
| "loss": 1.1891, | |
| "step": 12550 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 1.075294017791748, | |
| "learning_rate": 7.306803314435237e-05, | |
| "loss": 1.1917, | |
| "step": 12560 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 1.0980489253997803, | |
| "learning_rate": 7.304622764936765e-05, | |
| "loss": 1.1829, | |
| "step": 12570 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 1.0682340860366821, | |
| "learning_rate": 7.302442215438291e-05, | |
| "loss": 1.1859, | |
| "step": 12580 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "grad_norm": 1.0863393545150757, | |
| "learning_rate": 7.300261665939818e-05, | |
| "loss": 1.188, | |
| "step": 12590 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 1.0569467544555664, | |
| "learning_rate": 7.298081116441343e-05, | |
| "loss": 1.1962, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 1.0733450651168823, | |
| "learning_rate": 7.29590056694287e-05, | |
| "loss": 1.1934, | |
| "step": 12610 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 1.0762420892715454, | |
| "learning_rate": 7.293720017444397e-05, | |
| "loss": 1.181, | |
| "step": 12620 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 1.0010732412338257, | |
| "learning_rate": 7.291539467945923e-05, | |
| "loss": 1.1936, | |
| "step": 12630 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "grad_norm": 1.039819598197937, | |
| "learning_rate": 7.289358918447449e-05, | |
| "loss": 1.2001, | |
| "step": 12640 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 1.1060088872909546, | |
| "learning_rate": 7.287178368948975e-05, | |
| "loss": 1.2056, | |
| "step": 12650 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 0.9314666986465454, | |
| "learning_rate": 7.284997819450502e-05, | |
| "loss": 1.1748, | |
| "step": 12660 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 1.2504175901412964, | |
| "learning_rate": 7.282817269952028e-05, | |
| "loss": 1.1737, | |
| "step": 12670 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 1.1391412019729614, | |
| "learning_rate": 7.280636720453554e-05, | |
| "loss": 1.1909, | |
| "step": 12680 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "grad_norm": 1.0052971839904785, | |
| "learning_rate": 7.278456170955081e-05, | |
| "loss": 1.1902, | |
| "step": 12690 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 1.1059855222702026, | |
| "learning_rate": 7.276275621456607e-05, | |
| "loss": 1.2021, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 1.0115567445755005, | |
| "learning_rate": 7.274095071958133e-05, | |
| "loss": 1.1512, | |
| "step": 12710 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 1.0905554294586182, | |
| "learning_rate": 7.27191452245966e-05, | |
| "loss": 1.1884, | |
| "step": 12720 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 1.023762583732605, | |
| "learning_rate": 7.269733972961187e-05, | |
| "loss": 1.1841, | |
| "step": 12730 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 1.0214531421661377, | |
| "learning_rate": 7.267553423462714e-05, | |
| "loss": 1.185, | |
| "step": 12740 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 1.043494701385498, | |
| "learning_rate": 7.265372873964239e-05, | |
| "loss": 1.1822, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 1.0787135362625122, | |
| "learning_rate": 7.263192324465766e-05, | |
| "loss": 1.1827, | |
| "step": 12760 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 1.1063132286071777, | |
| "learning_rate": 7.261011774967292e-05, | |
| "loss": 1.1847, | |
| "step": 12770 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "grad_norm": 1.0400912761688232, | |
| "learning_rate": 7.258831225468819e-05, | |
| "loss": 1.1603, | |
| "step": 12780 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.057569146156311, | |
| "learning_rate": 7.256650675970345e-05, | |
| "loss": 1.1713, | |
| "step": 12790 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.0713859796524048, | |
| "learning_rate": 7.254470126471871e-05, | |
| "loss": 1.2167, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.0643656253814697, | |
| "learning_rate": 7.252289576973398e-05, | |
| "loss": 1.1744, | |
| "step": 12810 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "grad_norm": 1.1218703985214233, | |
| "learning_rate": 7.250109027474924e-05, | |
| "loss": 1.2183, | |
| "step": 12820 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 0.9932084083557129, | |
| "learning_rate": 7.24792847797645e-05, | |
| "loss": 1.1774, | |
| "step": 12830 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.063856840133667, | |
| "learning_rate": 7.245747928477977e-05, | |
| "loss": 1.1519, | |
| "step": 12840 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.0655205249786377, | |
| "learning_rate": 7.243567378979503e-05, | |
| "loss": 1.1883, | |
| "step": 12850 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 0.9149487018585205, | |
| "learning_rate": 7.241386829481029e-05, | |
| "loss": 1.1636, | |
| "step": 12860 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "grad_norm": 1.061606764793396, | |
| "learning_rate": 7.239206279982556e-05, | |
| "loss": 1.1933, | |
| "step": 12870 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.026875376701355, | |
| "learning_rate": 7.237025730484083e-05, | |
| "loss": 1.1697, | |
| "step": 12880 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.9857021570205688, | |
| "learning_rate": 7.23484518098561e-05, | |
| "loss": 1.1593, | |
| "step": 12890 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.0682117938995361, | |
| "learning_rate": 7.232664631487134e-05, | |
| "loss": 1.1846, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 0.9390698671340942, | |
| "learning_rate": 7.230484081988661e-05, | |
| "loss": 1.1625, | |
| "step": 12910 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 1.0105453729629517, | |
| "learning_rate": 7.228303532490188e-05, | |
| "loss": 1.1929, | |
| "step": 12920 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 0.986284077167511, | |
| "learning_rate": 7.226122982991715e-05, | |
| "loss": 1.1973, | |
| "step": 12930 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 1.0369880199432373, | |
| "learning_rate": 7.223942433493241e-05, | |
| "loss": 1.1996, | |
| "step": 12940 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 1.1171998977661133, | |
| "learning_rate": 7.221761883994766e-05, | |
| "loss": 1.2022, | |
| "step": 12950 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "grad_norm": 1.0862730741500854, | |
| "learning_rate": 7.219581334496294e-05, | |
| "loss": 1.19, | |
| "step": 12960 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 1.0609533786773682, | |
| "learning_rate": 7.21740078499782e-05, | |
| "loss": 1.1825, | |
| "step": 12970 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 0.98408043384552, | |
| "learning_rate": 7.215220235499346e-05, | |
| "loss": 1.1766, | |
| "step": 12980 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 1.0378422737121582, | |
| "learning_rate": 7.213039686000873e-05, | |
| "loss": 1.1843, | |
| "step": 12990 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 0.9478686451911926, | |
| "learning_rate": 7.210859136502399e-05, | |
| "loss": 1.1728, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "grad_norm": 1.0276613235473633, | |
| "learning_rate": 7.208678587003925e-05, | |
| "loss": 1.1796, | |
| "step": 13010 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 0.9244964122772217, | |
| "learning_rate": 7.206498037505451e-05, | |
| "loss": 1.1812, | |
| "step": 13020 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 1.0720821619033813, | |
| "learning_rate": 7.204317488006979e-05, | |
| "loss": 1.1597, | |
| "step": 13030 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 1.0820330381393433, | |
| "learning_rate": 7.202136938508504e-05, | |
| "loss": 1.1981, | |
| "step": 13040 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "grad_norm": 0.9590197205543518, | |
| "learning_rate": 7.19995638901003e-05, | |
| "loss": 1.1898, | |
| "step": 13050 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.0559465885162354, | |
| "learning_rate": 7.197775839511557e-05, | |
| "loss": 1.1985, | |
| "step": 13060 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 0.9392025470733643, | |
| "learning_rate": 7.195595290013084e-05, | |
| "loss": 1.1933, | |
| "step": 13070 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.1029566526412964, | |
| "learning_rate": 7.19341474051461e-05, | |
| "loss": 1.1733, | |
| "step": 13080 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.0255013704299927, | |
| "learning_rate": 7.191234191016137e-05, | |
| "loss": 1.1762, | |
| "step": 13090 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "grad_norm": 1.0394928455352783, | |
| "learning_rate": 7.189053641517662e-05, | |
| "loss": 1.151, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.057391881942749, | |
| "learning_rate": 7.18687309201919e-05, | |
| "loss": 1.1761, | |
| "step": 13110 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.0358378887176514, | |
| "learning_rate": 7.184692542520716e-05, | |
| "loss": 1.1911, | |
| "step": 13120 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.0503947734832764, | |
| "learning_rate": 7.182511993022242e-05, | |
| "loss": 1.2198, | |
| "step": 13130 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 1.0237114429473877, | |
| "learning_rate": 7.180331443523768e-05, | |
| "loss": 1.2043, | |
| "step": 13140 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 0.9386830925941467, | |
| "learning_rate": 7.178150894025295e-05, | |
| "loss": 1.192, | |
| "step": 13150 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 0.9386530518531799, | |
| "learning_rate": 7.175970344526821e-05, | |
| "loss": 1.1864, | |
| "step": 13160 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 0.9574694633483887, | |
| "learning_rate": 7.173789795028347e-05, | |
| "loss": 1.1828, | |
| "step": 13170 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.0528520345687866, | |
| "learning_rate": 7.171609245529874e-05, | |
| "loss": 1.1861, | |
| "step": 13180 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "grad_norm": 1.0283684730529785, | |
| "learning_rate": 7.1694286960314e-05, | |
| "loss": 1.1749, | |
| "step": 13190 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 0.9847733974456787, | |
| "learning_rate": 7.167248146532926e-05, | |
| "loss": 1.1903, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.0302000045776367, | |
| "learning_rate": 7.165067597034453e-05, | |
| "loss": 1.1852, | |
| "step": 13210 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.0097705125808716, | |
| "learning_rate": 7.16288704753598e-05, | |
| "loss": 1.1874, | |
| "step": 13220 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 1.1593202352523804, | |
| "learning_rate": 7.160706498037506e-05, | |
| "loss": 1.1827, | |
| "step": 13230 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "grad_norm": 0.9892207384109497, | |
| "learning_rate": 7.158525948539033e-05, | |
| "loss": 1.1694, | |
| "step": 13240 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.0846501588821411, | |
| "learning_rate": 7.156345399040558e-05, | |
| "loss": 1.1892, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.014400601387024, | |
| "learning_rate": 7.154164849542085e-05, | |
| "loss": 1.1806, | |
| "step": 13260 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.0073882341384888, | |
| "learning_rate": 7.151984300043612e-05, | |
| "loss": 1.1781, | |
| "step": 13270 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.2205009460449219, | |
| "learning_rate": 7.149803750545138e-05, | |
| "loss": 1.1757, | |
| "step": 13280 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.058864951133728, | |
| "learning_rate": 7.147623201046664e-05, | |
| "loss": 1.1968, | |
| "step": 13290 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.0327656269073486, | |
| "learning_rate": 7.14544265154819e-05, | |
| "loss": 1.2232, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.042557954788208, | |
| "learning_rate": 7.143262102049717e-05, | |
| "loss": 1.2254, | |
| "step": 13310 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 0.9692584276199341, | |
| "learning_rate": 7.141081552551243e-05, | |
| "loss": 1.1757, | |
| "step": 13320 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 1.0381295680999756, | |
| "learning_rate": 7.13890100305277e-05, | |
| "loss": 1.1961, | |
| "step": 13330 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.0239328145980835, | |
| "learning_rate": 7.136720453554296e-05, | |
| "loss": 1.155, | |
| "step": 13340 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.0357582569122314, | |
| "learning_rate": 7.134539904055822e-05, | |
| "loss": 1.1719, | |
| "step": 13350 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.0303056240081787, | |
| "learning_rate": 7.132359354557348e-05, | |
| "loss": 1.1934, | |
| "step": 13360 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 0.9931465983390808, | |
| "learning_rate": 7.130178805058875e-05, | |
| "loss": 1.1791, | |
| "step": 13370 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "grad_norm": 1.0507264137268066, | |
| "learning_rate": 7.127998255560402e-05, | |
| "loss": 1.184, | |
| "step": 13380 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 1.0703891515731812, | |
| "learning_rate": 7.125817706061927e-05, | |
| "loss": 1.1853, | |
| "step": 13390 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 0.9957337975502014, | |
| "learning_rate": 7.123637156563454e-05, | |
| "loss": 1.1702, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 1.1027911901474, | |
| "learning_rate": 7.12145660706498e-05, | |
| "loss": 1.1968, | |
| "step": 13410 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "grad_norm": 0.9877254366874695, | |
| "learning_rate": 7.119276057566508e-05, | |
| "loss": 1.1752, | |
| "step": 13420 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.0115269422531128, | |
| "learning_rate": 7.117095508068034e-05, | |
| "loss": 1.1546, | |
| "step": 13430 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 0.9738414287567139, | |
| "learning_rate": 7.11491495856956e-05, | |
| "loss": 1.1576, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.0419977903366089, | |
| "learning_rate": 7.112734409071087e-05, | |
| "loss": 1.1927, | |
| "step": 13450 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.0933623313903809, | |
| "learning_rate": 7.110553859572613e-05, | |
| "loss": 1.1747, | |
| "step": 13460 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "grad_norm": 1.0882395505905151, | |
| "learning_rate": 7.108373310074139e-05, | |
| "loss": 1.189, | |
| "step": 13470 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 0.9442359209060669, | |
| "learning_rate": 7.106192760575665e-05, | |
| "loss": 1.1826, | |
| "step": 13480 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 1.0601658821105957, | |
| "learning_rate": 7.104012211077192e-05, | |
| "loss": 1.1854, | |
| "step": 13490 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 1.0670174360275269, | |
| "learning_rate": 7.101831661578718e-05, | |
| "loss": 1.1893, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "grad_norm": 1.0757992267608643, | |
| "learning_rate": 7.099651112080244e-05, | |
| "loss": 1.1984, | |
| "step": 13510 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 1.0340900421142578, | |
| "learning_rate": 7.09747056258177e-05, | |
| "loss": 1.2068, | |
| "step": 13520 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 1.0402545928955078, | |
| "learning_rate": 7.095290013083298e-05, | |
| "loss": 1.208, | |
| "step": 13530 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 1.1371444463729858, | |
| "learning_rate": 7.093109463584823e-05, | |
| "loss": 1.1883, | |
| "step": 13540 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 1.0464153289794922, | |
| "learning_rate": 7.09092891408635e-05, | |
| "loss": 1.1896, | |
| "step": 13550 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 0.9860671758651733, | |
| "learning_rate": 7.088748364587876e-05, | |
| "loss": 1.1782, | |
| "step": 13560 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 0.927305281162262, | |
| "learning_rate": 7.086567815089404e-05, | |
| "loss": 1.1759, | |
| "step": 13570 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 1.0116522312164307, | |
| "learning_rate": 7.08438726559093e-05, | |
| "loss": 1.1845, | |
| "step": 13580 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 1.0394808053970337, | |
| "learning_rate": 7.082206716092456e-05, | |
| "loss": 1.1949, | |
| "step": 13590 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 1.1558239459991455, | |
| "learning_rate": 7.080026166593981e-05, | |
| "loss": 1.1758, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "grad_norm": 0.9348282217979431, | |
| "learning_rate": 7.077845617095509e-05, | |
| "loss": 1.1976, | |
| "step": 13610 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.9124108552932739, | |
| "learning_rate": 7.075665067597035e-05, | |
| "loss": 1.172, | |
| "step": 13620 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 1.077690839767456, | |
| "learning_rate": 7.073484518098561e-05, | |
| "loss": 1.1835, | |
| "step": 13630 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.9495044350624084, | |
| "learning_rate": 7.071303968600088e-05, | |
| "loss": 1.1682, | |
| "step": 13640 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "grad_norm": 0.9947417378425598, | |
| "learning_rate": 7.069123419101614e-05, | |
| "loss": 1.2216, | |
| "step": 13650 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 1.072772741317749, | |
| "learning_rate": 7.06694286960314e-05, | |
| "loss": 1.2006, | |
| "step": 13660 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 1.0669934749603271, | |
| "learning_rate": 7.064762320104667e-05, | |
| "loss": 1.1992, | |
| "step": 13670 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 1.0894432067871094, | |
| "learning_rate": 7.062581770606194e-05, | |
| "loss": 1.1745, | |
| "step": 13680 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 0.9627017378807068, | |
| "learning_rate": 7.060401221107719e-05, | |
| "loss": 1.1818, | |
| "step": 13690 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "grad_norm": 0.9909853935241699, | |
| "learning_rate": 7.058220671609245e-05, | |
| "loss": 1.1705, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 1.0125415325164795, | |
| "learning_rate": 7.056040122110772e-05, | |
| "loss": 1.211, | |
| "step": 13710 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 0.9729527235031128, | |
| "learning_rate": 7.0538595726123e-05, | |
| "loss": 1.1658, | |
| "step": 13720 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 1.0256701707839966, | |
| "learning_rate": 7.051679023113826e-05, | |
| "loss": 1.1657, | |
| "step": 13730 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 1.0687954425811768, | |
| "learning_rate": 7.04949847361535e-05, | |
| "loss": 1.1648, | |
| "step": 13740 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 0.9713466167449951, | |
| "learning_rate": 7.047317924116877e-05, | |
| "loss": 1.1774, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 1.0809965133666992, | |
| "learning_rate": 7.045137374618405e-05, | |
| "loss": 1.1658, | |
| "step": 13760 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 1.0827128887176514, | |
| "learning_rate": 7.042956825119931e-05, | |
| "loss": 1.1639, | |
| "step": 13770 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 1.112669825553894, | |
| "learning_rate": 7.040776275621457e-05, | |
| "loss": 1.1743, | |
| "step": 13780 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "grad_norm": 0.9779360890388489, | |
| "learning_rate": 7.038595726122984e-05, | |
| "loss": 1.1823, | |
| "step": 13790 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.0385786294937134, | |
| "learning_rate": 7.03641517662451e-05, | |
| "loss": 1.1804, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 1.05619215965271, | |
| "learning_rate": 7.034234627126036e-05, | |
| "loss": 1.1936, | |
| "step": 13810 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.28429114818573, | |
| "eval_runtime": 1501.2758, | |
| "eval_samples_per_second": 257.68, | |
| "eval_steps_per_second": 4.027, | |
| "step": 13818 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 46060, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "total_flos": 5.893571450073252e+18, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |