| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 534, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0037488284910965324, | |
| "grad_norm": 3.1443588733673096, | |
| "learning_rate": 0.0, | |
| "loss": 0.7334, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.007497656982193065, | |
| "grad_norm": 2.8891870975494385, | |
| "learning_rate": 3.7037037037037036e-07, | |
| "loss": 0.8449, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.011246485473289597, | |
| "grad_norm": 3.069650888442993, | |
| "learning_rate": 7.407407407407407e-07, | |
| "loss": 0.8849, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.01499531396438613, | |
| "grad_norm": 3.2767655849456787, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 0.918, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.01874414245548266, | |
| "grad_norm": 3.732121467590332, | |
| "learning_rate": 1.4814814814814815e-06, | |
| "loss": 0.9034, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.022492970946579195, | |
| "grad_norm": 3.3119568824768066, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.8088, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.026241799437675725, | |
| "grad_norm": 2.831653118133545, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.7799, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.02999062792877226, | |
| "grad_norm": 2.624796152114868, | |
| "learning_rate": 2.5925925925925925e-06, | |
| "loss": 0.7546, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.033739456419868794, | |
| "grad_norm": 2.72420597076416, | |
| "learning_rate": 2.962962962962963e-06, | |
| "loss": 0.9903, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03748828491096532, | |
| "grad_norm": 2.9389147758483887, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.8715, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.041237113402061855, | |
| "grad_norm": 3.242018461227417, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 1.1089, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.04498594189315839, | |
| "grad_norm": 3.1691386699676514, | |
| "learning_rate": 4.074074074074074e-06, | |
| "loss": 0.9839, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04873477038425492, | |
| "grad_norm": 3.211369037628174, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.6889, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.05248359887535145, | |
| "grad_norm": 2.753861665725708, | |
| "learning_rate": 4.814814814814815e-06, | |
| "loss": 0.9228, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.056232427366447985, | |
| "grad_norm": 2.5858447551727295, | |
| "learning_rate": 5.185185185185185e-06, | |
| "loss": 0.6551, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.05998125585754452, | |
| "grad_norm": 2.531930446624756, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.7092, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.06373008434864105, | |
| "grad_norm": 3.0797035694122314, | |
| "learning_rate": 5.925925925925926e-06, | |
| "loss": 0.6336, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.06747891283973759, | |
| "grad_norm": 2.6957359313964844, | |
| "learning_rate": 6.296296296296297e-06, | |
| "loss": 0.7413, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.07122774133083412, | |
| "grad_norm": 2.9657933712005615, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.9917, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.07497656982193064, | |
| "grad_norm": 2.8018271923065186, | |
| "learning_rate": 7.0370370370370375e-06, | |
| "loss": 0.7026, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07872539831302718, | |
| "grad_norm": 3.1438984870910645, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 1.0012, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.08247422680412371, | |
| "grad_norm": 2.5763065814971924, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.7979, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.08622305529522024, | |
| "grad_norm": 2.6274800300598145, | |
| "learning_rate": 8.148148148148148e-06, | |
| "loss": 0.6811, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.08997188378631678, | |
| "grad_norm": 2.6588563919067383, | |
| "learning_rate": 8.518518518518519e-06, | |
| "loss": 0.7003, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.09372071227741331, | |
| "grad_norm": 2.8751745223999023, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 1.1499, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.09746954076850985, | |
| "grad_norm": 2.9690346717834473, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 0.8019, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.10121836925960637, | |
| "grad_norm": 2.8583548069000244, | |
| "learning_rate": 9.62962962962963e-06, | |
| "loss": 0.926, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.1049671977507029, | |
| "grad_norm": 2.719120502471924, | |
| "learning_rate": 1e-05, | |
| "loss": 0.8262, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.10871602624179943, | |
| "grad_norm": 2.5264711380004883, | |
| "learning_rate": 9.999904010783725e-06, | |
| "loss": 0.7492, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.11246485473289597, | |
| "grad_norm": 2.5457921028137207, | |
| "learning_rate": 9.999616046820467e-06, | |
| "loss": 0.8453, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1162136832239925, | |
| "grad_norm": 2.66349720954895, | |
| "learning_rate": 9.999136119166803e-06, | |
| "loss": 0.6645, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.11996251171508904, | |
| "grad_norm": 2.695261240005493, | |
| "learning_rate": 9.998464246249885e-06, | |
| "loss": 1.0498, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.12371134020618557, | |
| "grad_norm": 2.371567487716675, | |
| "learning_rate": 9.997600453866734e-06, | |
| "loss": 0.9992, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.1274601686972821, | |
| "grad_norm": 2.551347255706787, | |
| "learning_rate": 9.99654477518325e-06, | |
| "loss": 0.786, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.13120899718837864, | |
| "grad_norm": 2.8569281101226807, | |
| "learning_rate": 9.995297250732942e-06, | |
| "loss": 0.7571, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.13495782567947517, | |
| "grad_norm": 2.9101366996765137, | |
| "learning_rate": 9.99385792841537e-06, | |
| "loss": 0.8682, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1387066541705717, | |
| "grad_norm": 2.8088221549987793, | |
| "learning_rate": 9.9922268634943e-06, | |
| "loss": 0.9882, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.14245548266166824, | |
| "grad_norm": 2.6386826038360596, | |
| "learning_rate": 9.99040411859559e-06, | |
| "loss": 0.8706, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.14620431115276475, | |
| "grad_norm": 2.306828022003174, | |
| "learning_rate": 9.98838976370478e-06, | |
| "loss": 0.7294, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.14995313964386128, | |
| "grad_norm": 3.0453381538391113, | |
| "learning_rate": 9.986183876164412e-06, | |
| "loss": 0.7128, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.15370196813495782, | |
| "grad_norm": 2.72247314453125, | |
| "learning_rate": 9.983786540671052e-06, | |
| "loss": 0.8392, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.15745079662605435, | |
| "grad_norm": 2.6883301734924316, | |
| "learning_rate": 9.981197849272039e-06, | |
| "loss": 0.6254, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.16119962511715089, | |
| "grad_norm": 2.640571355819702, | |
| "learning_rate": 9.978417901361958e-06, | |
| "loss": 0.7416, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.16494845360824742, | |
| "grad_norm": 2.496026039123535, | |
| "learning_rate": 9.975446803678818e-06, | |
| "loss": 0.9206, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.16869728209934395, | |
| "grad_norm": 2.7780568599700928, | |
| "learning_rate": 9.972284670299957e-06, | |
| "loss": 0.8848, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.1724461105904405, | |
| "grad_norm": 2.521110773086548, | |
| "learning_rate": 9.968931622637652e-06, | |
| "loss": 0.6183, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.17619493908153702, | |
| "grad_norm": 2.378988742828369, | |
| "learning_rate": 9.965387789434474e-06, | |
| "loss": 0.7197, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.17994376757263356, | |
| "grad_norm": 2.662712574005127, | |
| "learning_rate": 9.961653306758326e-06, | |
| "loss": 0.8191, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.1836925960637301, | |
| "grad_norm": 2.818058729171753, | |
| "learning_rate": 9.95772831799724e-06, | |
| "loss": 0.945, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.18744142455482662, | |
| "grad_norm": 2.4332478046417236, | |
| "learning_rate": 9.953612973853853e-06, | |
| "loss": 0.743, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.19119025304592316, | |
| "grad_norm": 2.722165584564209, | |
| "learning_rate": 9.949307432339625e-06, | |
| "loss": 0.7276, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.1949390815370197, | |
| "grad_norm": 3.4195361137390137, | |
| "learning_rate": 9.944811858768782e-06, | |
| "loss": 0.9687, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.19868791002811623, | |
| "grad_norm": 3.273954391479492, | |
| "learning_rate": 9.940126425751957e-06, | |
| "loss": 0.8701, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.20243673851921273, | |
| "grad_norm": 2.8895037174224854, | |
| "learning_rate": 9.935251313189564e-06, | |
| "loss": 1.1019, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.20618556701030927, | |
| "grad_norm": 2.51041316986084, | |
| "learning_rate": 9.930186708264902e-06, | |
| "loss": 0.6458, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2099343955014058, | |
| "grad_norm": 2.807650566101074, | |
| "learning_rate": 9.92493280543695e-06, | |
| "loss": 0.7822, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.21368322399250234, | |
| "grad_norm": 2.7724640369415283, | |
| "learning_rate": 9.919489806432915e-06, | |
| "loss": 0.9093, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.21743205248359887, | |
| "grad_norm": 2.830974578857422, | |
| "learning_rate": 9.913857920240481e-06, | |
| "loss": 1.0478, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.2211808809746954, | |
| "grad_norm": 2.7909905910491943, | |
| "learning_rate": 9.908037363099782e-06, | |
| "loss": 0.6055, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.22492970946579194, | |
| "grad_norm": 2.5817978382110596, | |
| "learning_rate": 9.90202835849511e-06, | |
| "loss": 0.5351, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.22867853795688847, | |
| "grad_norm": 2.8909380435943604, | |
| "learning_rate": 9.895831137146319e-06, | |
| "loss": 0.7823, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.232427366447985, | |
| "grad_norm": 2.5689468383789062, | |
| "learning_rate": 9.889445936999978e-06, | |
| "loss": 0.7244, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.23617619493908154, | |
| "grad_norm": 2.3992927074432373, | |
| "learning_rate": 9.882873003220229e-06, | |
| "loss": 0.6762, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.23992502343017807, | |
| "grad_norm": 2.6709885597229004, | |
| "learning_rate": 9.876112588179378e-06, | |
| "loss": 1.0162, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.2436738519212746, | |
| "grad_norm": 2.898042917251587, | |
| "learning_rate": 9.869164951448201e-06, | |
| "loss": 0.8213, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.24742268041237114, | |
| "grad_norm": 2.4519455432891846, | |
| "learning_rate": 9.86203035978598e-06, | |
| "loss": 0.6992, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.2511715089034677, | |
| "grad_norm": 2.6127779483795166, | |
| "learning_rate": 9.854709087130261e-06, | |
| "loss": 0.7047, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.2549203373945642, | |
| "grad_norm": 2.591939926147461, | |
| "learning_rate": 9.847201414586331e-06, | |
| "loss": 0.8114, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.25866916588566075, | |
| "grad_norm": 2.322962522506714, | |
| "learning_rate": 9.839507630416436e-06, | |
| "loss": 0.567, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.2624179943767573, | |
| "grad_norm": 2.7526068687438965, | |
| "learning_rate": 9.831628030028698e-06, | |
| "loss": 0.8195, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2661668228678538, | |
| "grad_norm": 3.0277445316314697, | |
| "learning_rate": 9.82356291596578e-06, | |
| "loss": 1.1821, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.26991565135895035, | |
| "grad_norm": 2.836676836013794, | |
| "learning_rate": 9.81531259789328e-06, | |
| "loss": 1.1377, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2736644798500469, | |
| "grad_norm": 2.7574238777160645, | |
| "learning_rate": 9.80687739258782e-06, | |
| "loss": 0.8165, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2774133083411434, | |
| "grad_norm": 2.917651414871216, | |
| "learning_rate": 9.7982576239249e-06, | |
| "loss": 0.8276, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.28116213683223995, | |
| "grad_norm": 2.5589005947113037, | |
| "learning_rate": 9.789453622866455e-06, | |
| "loss": 0.7906, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.2849109653233365, | |
| "grad_norm": 2.742344379425049, | |
| "learning_rate": 9.78046572744815e-06, | |
| "loss": 0.9195, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.28865979381443296, | |
| "grad_norm": 2.717301845550537, | |
| "learning_rate": 9.771294282766399e-06, | |
| "loss": 0.9222, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.2924086223055295, | |
| "grad_norm": 2.7094359397888184, | |
| "learning_rate": 9.761939640965117e-06, | |
| "loss": 0.8209, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.29615745079662603, | |
| "grad_norm": 2.3744776248931885, | |
| "learning_rate": 9.7524021612222e-06, | |
| "loss": 0.8834, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.29990627928772257, | |
| "grad_norm": 2.6806893348693848, | |
| "learning_rate": 9.742682209735727e-06, | |
| "loss": 0.6742, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3036551077788191, | |
| "grad_norm": 2.5327813625335693, | |
| "learning_rate": 9.732780159709912e-06, | |
| "loss": 0.8571, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.30740393626991563, | |
| "grad_norm": 2.423889636993408, | |
| "learning_rate": 9.722696391340762e-06, | |
| "loss": 0.6847, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.31115276476101217, | |
| "grad_norm": 3.3473219871520996, | |
| "learning_rate": 9.712431291801483e-06, | |
| "loss": 0.997, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.3149015932521087, | |
| "grad_norm": 2.3630363941192627, | |
| "learning_rate": 9.701985255227624e-06, | |
| "loss": 0.7263, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.31865042174320524, | |
| "grad_norm": 2.9431068897247314, | |
| "learning_rate": 9.691358682701927e-06, | |
| "loss": 0.9122, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.32239925023430177, | |
| "grad_norm": 2.4724693298339844, | |
| "learning_rate": 9.680551982238941e-06, | |
| "loss": 0.85, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.3261480787253983, | |
| "grad_norm": 2.641633987426758, | |
| "learning_rate": 9.669565568769348e-06, | |
| "loss": 0.7426, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.32989690721649484, | |
| "grad_norm": 2.5549933910369873, | |
| "learning_rate": 9.658399864124037e-06, | |
| "loss": 0.7846, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.3336457357075914, | |
| "grad_norm": 2.469627618789673, | |
| "learning_rate": 9.647055297017901e-06, | |
| "loss": 0.745, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.3373945641986879, | |
| "grad_norm": 2.484022378921509, | |
| "learning_rate": 9.635532303033386e-06, | |
| "loss": 0.7989, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.34114339268978444, | |
| "grad_norm": 2.852081060409546, | |
| "learning_rate": 9.623831324603755e-06, | |
| "loss": 0.6607, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.344892221180881, | |
| "grad_norm": 2.902130126953125, | |
| "learning_rate": 9.611952810996104e-06, | |
| "loss": 0.8182, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.3486410496719775, | |
| "grad_norm": 2.2227494716644287, | |
| "learning_rate": 9.599897218294122e-06, | |
| "loss": 0.6317, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.35238987816307404, | |
| "grad_norm": 2.668668508529663, | |
| "learning_rate": 9.587665009380565e-06, | |
| "loss": 0.8303, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.3561387066541706, | |
| "grad_norm": 2.8259904384613037, | |
| "learning_rate": 9.575256653919494e-06, | |
| "loss": 0.8235, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.3598875351452671, | |
| "grad_norm": 2.3626370429992676, | |
| "learning_rate": 9.562672628338233e-06, | |
| "loss": 0.8566, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.36363636363636365, | |
| "grad_norm": 2.487170934677124, | |
| "learning_rate": 9.549913415809084e-06, | |
| "loss": 0.8119, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3673851921274602, | |
| "grad_norm": 2.4403631687164307, | |
| "learning_rate": 9.536979506230772e-06, | |
| "loss": 0.6435, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.3711340206185567, | |
| "grad_norm": 2.5403356552124023, | |
| "learning_rate": 9.523871396209633e-06, | |
| "loss": 0.8494, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.37488284910965325, | |
| "grad_norm": 2.5626585483551025, | |
| "learning_rate": 9.510589589040554e-06, | |
| "loss": 0.8538, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3786316776007498, | |
| "grad_norm": 2.8327720165252686, | |
| "learning_rate": 9.497134594687635e-06, | |
| "loss": 0.8465, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3823805060918463, | |
| "grad_norm": 2.6077094078063965, | |
| "learning_rate": 9.483506929764623e-06, | |
| "loss": 0.9154, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.38612933458294285, | |
| "grad_norm": 2.6204569339752197, | |
| "learning_rate": 9.469707117515068e-06, | |
| "loss": 0.8607, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.3898781630740394, | |
| "grad_norm": 2.281536340713501, | |
| "learning_rate": 9.455735687792233e-06, | |
| "loss": 0.6667, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.3936269915651359, | |
| "grad_norm": 2.339271068572998, | |
| "learning_rate": 9.44159317703876e-06, | |
| "loss": 0.7999, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.39737582005623245, | |
| "grad_norm": 2.6674795150756836, | |
| "learning_rate": 9.427280128266049e-06, | |
| "loss": 0.8966, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.40112464854732893, | |
| "grad_norm": 2.6626410484313965, | |
| "learning_rate": 9.412797091033444e-06, | |
| "loss": 0.9888, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.40487347703842547, | |
| "grad_norm": 2.451646327972412, | |
| "learning_rate": 9.398144621427095e-06, | |
| "loss": 0.7585, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.408622305529522, | |
| "grad_norm": 2.4723331928253174, | |
| "learning_rate": 9.383323282038632e-06, | |
| "loss": 0.7715, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.41237113402061853, | |
| "grad_norm": 3.4575955867767334, | |
| "learning_rate": 9.368333641943558e-06, | |
| "loss": 0.6064, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.41611996251171507, | |
| "grad_norm": 2.347303867340088, | |
| "learning_rate": 9.353176276679397e-06, | |
| "loss": 0.8154, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.4198687910028116, | |
| "grad_norm": 2.850472927093506, | |
| "learning_rate": 9.337851768223589e-06, | |
| "loss": 0.8903, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.42361761949390814, | |
| "grad_norm": 2.8391191959381104, | |
| "learning_rate": 9.322360704971161e-06, | |
| "loss": 0.8927, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.42736644798500467, | |
| "grad_norm": 2.2651166915893555, | |
| "learning_rate": 9.30670368171212e-06, | |
| "loss": 0.5307, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.4311152764761012, | |
| "grad_norm": 2.3141026496887207, | |
| "learning_rate": 9.29088129960862e-06, | |
| "loss": 0.7067, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.43486410496719774, | |
| "grad_norm": 2.7184128761291504, | |
| "learning_rate": 9.274894166171888e-06, | |
| "loss": 1.0238, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.4386129334582943, | |
| "grad_norm": 2.4955923557281494, | |
| "learning_rate": 9.258742895238886e-06, | |
| "loss": 0.7917, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.4423617619493908, | |
| "grad_norm": 2.485121726989746, | |
| "learning_rate": 9.242428106948748e-06, | |
| "loss": 0.9189, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.44611059044048734, | |
| "grad_norm": 2.644423723220825, | |
| "learning_rate": 9.225950427718974e-06, | |
| "loss": 0.8239, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4498594189315839, | |
| "grad_norm": 2.267098903656006, | |
| "learning_rate": 9.209310490221368e-06, | |
| "loss": 0.5885, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4536082474226804, | |
| "grad_norm": 3.184373140335083, | |
| "learning_rate": 9.192508933357753e-06, | |
| "loss": 0.9993, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.45735707591377694, | |
| "grad_norm": 2.420222282409668, | |
| "learning_rate": 9.175546402235443e-06, | |
| "loss": 0.6963, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.4611059044048735, | |
| "grad_norm": 2.1986594200134277, | |
| "learning_rate": 9.158423548142459e-06, | |
| "loss": 0.6484, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.46485473289597, | |
| "grad_norm": 2.687732219696045, | |
| "learning_rate": 9.141141028522544e-06, | |
| "loss": 0.8614, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.46860356138706655, | |
| "grad_norm": 2.5353922843933105, | |
| "learning_rate": 9.123699506949903e-06, | |
| "loss": 0.901, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.4723523898781631, | |
| "grad_norm": 2.416156768798828, | |
| "learning_rate": 9.106099653103729e-06, | |
| "loss": 0.6771, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.4761012183692596, | |
| "grad_norm": 2.7599902153015137, | |
| "learning_rate": 9.088342142742493e-06, | |
| "loss": 0.6258, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.47985004686035615, | |
| "grad_norm": 2.6150290966033936, | |
| "learning_rate": 9.070427657677996e-06, | |
| "loss": 0.923, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.4835988753514527, | |
| "grad_norm": 2.746690034866333, | |
| "learning_rate": 9.052356885749191e-06, | |
| "loss": 0.7873, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.4873477038425492, | |
| "grad_norm": 2.5986545085906982, | |
| "learning_rate": 9.034130520795774e-06, | |
| "loss": 0.6669, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.49109653233364575, | |
| "grad_norm": 2.674226760864258, | |
| "learning_rate": 9.015749262631537e-06, | |
| "loss": 0.8936, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.4948453608247423, | |
| "grad_norm": 2.7541725635528564, | |
| "learning_rate": 8.997213817017508e-06, | |
| "loss": 0.9551, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.4985941893158388, | |
| "grad_norm": 2.3283989429473877, | |
| "learning_rate": 8.978524895634842e-06, | |
| "loss": 0.8066, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.5023430178069354, | |
| "grad_norm": 2.7233939170837402, | |
| "learning_rate": 8.959683216057512e-06, | |
| "loss": 0.8723, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.5060918462980318, | |
| "grad_norm": 3.2135050296783447, | |
| "learning_rate": 8.940689501724737e-06, | |
| "loss": 0.9622, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.5098406747891284, | |
| "grad_norm": 2.69746994972229, | |
| "learning_rate": 8.921544481913218e-06, | |
| "loss": 0.9274, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.5135895032802249, | |
| "grad_norm": 2.718744993209839, | |
| "learning_rate": 8.902248891709133e-06, | |
| "loss": 0.7959, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.5173383317713215, | |
| "grad_norm": 2.5741381645202637, | |
| "learning_rate": 8.882803471979917e-06, | |
| "loss": 0.8901, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.521087160262418, | |
| "grad_norm": 2.678241491317749, | |
| "learning_rate": 8.86320896934581e-06, | |
| "loss": 0.8386, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.5248359887535146, | |
| "grad_norm": 3.3864355087280273, | |
| "learning_rate": 8.843466136151191e-06, | |
| "loss": 0.8401, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.528584817244611, | |
| "grad_norm": 2.4285075664520264, | |
| "learning_rate": 8.823575730435694e-06, | |
| "loss": 0.7686, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.5323336457357076, | |
| "grad_norm": 2.551071882247925, | |
| "learning_rate": 8.803538515905102e-06, | |
| "loss": 0.7005, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.5360824742268041, | |
| "grad_norm": 2.8349006175994873, | |
| "learning_rate": 8.783355261902023e-06, | |
| "loss": 0.7764, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.5398313027179007, | |
| "grad_norm": 2.5966458320617676, | |
| "learning_rate": 8.763026743376349e-06, | |
| "loss": 0.6911, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.5435801312089972, | |
| "grad_norm": 2.6098828315734863, | |
| "learning_rate": 8.742553740855507e-06, | |
| "loss": 0.8884, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.5473289597000938, | |
| "grad_norm": 2.8855576515197754, | |
| "learning_rate": 8.721937040414481e-06, | |
| "loss": 0.7172, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.5510777881911902, | |
| "grad_norm": 3.230717420578003, | |
| "learning_rate": 8.70117743364564e-06, | |
| "loss": 0.7861, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.5548266166822868, | |
| "grad_norm": 2.7732186317443848, | |
| "learning_rate": 8.680275717628336e-06, | |
| "loss": 0.657, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5585754451733833, | |
| "grad_norm": 2.7117972373962402, | |
| "learning_rate": 8.659232694898307e-06, | |
| "loss": 0.8825, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.5623242736644799, | |
| "grad_norm": 2.5794482231140137, | |
| "learning_rate": 8.638049173416855e-06, | |
| "loss": 0.8581, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5660731021555764, | |
| "grad_norm": 2.9475417137145996, | |
| "learning_rate": 8.616725966539831e-06, | |
| "loss": 0.908, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.569821930646673, | |
| "grad_norm": 2.9310085773468018, | |
| "learning_rate": 8.595263892986403e-06, | |
| "loss": 0.986, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.5735707591377694, | |
| "grad_norm": 2.635514974594116, | |
| "learning_rate": 8.573663776807615e-06, | |
| "loss": 0.6768, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.5773195876288659, | |
| "grad_norm": 2.826488733291626, | |
| "learning_rate": 8.551926447354759e-06, | |
| "loss": 1.0033, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.5810684161199625, | |
| "grad_norm": 2.487631320953369, | |
| "learning_rate": 8.530052739247522e-06, | |
| "loss": 0.7031, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.584817244611059, | |
| "grad_norm": 2.5516085624694824, | |
| "learning_rate": 8.508043492341944e-06, | |
| "loss": 0.7245, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5885660731021556, | |
| "grad_norm": 2.3507580757141113, | |
| "learning_rate": 8.485899551698166e-06, | |
| "loss": 0.8073, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5923149015932521, | |
| "grad_norm": 2.7757110595703125, | |
| "learning_rate": 8.463621767547998e-06, | |
| "loss": 0.8046, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.5960637300843487, | |
| "grad_norm": 2.639394521713257, | |
| "learning_rate": 8.44121099526225e-06, | |
| "loss": 0.7603, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5998125585754451, | |
| "grad_norm": 2.907921314239502, | |
| "learning_rate": 8.418668095317912e-06, | |
| "loss": 0.9165, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6035613870665417, | |
| "grad_norm": 2.7137627601623535, | |
| "learning_rate": 8.395993933265102e-06, | |
| "loss": 0.8256, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.6073102155576382, | |
| "grad_norm": 2.4688782691955566, | |
| "learning_rate": 8.373189379693838e-06, | |
| "loss": 0.9909, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.6110590440487348, | |
| "grad_norm": 2.68072772026062, | |
| "learning_rate": 8.350255310200611e-06, | |
| "loss": 0.9874, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.6148078725398313, | |
| "grad_norm": 3.0509040355682373, | |
| "learning_rate": 8.327192605354766e-06, | |
| "loss": 0.9298, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.6185567010309279, | |
| "grad_norm": 2.4764254093170166, | |
| "learning_rate": 8.304002150664684e-06, | |
| "loss": 0.7797, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.6223055295220243, | |
| "grad_norm": 2.992629051208496, | |
| "learning_rate": 8.280684836543794e-06, | |
| "loss": 0.8159, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.6260543580131209, | |
| "grad_norm": 2.652869462966919, | |
| "learning_rate": 8.257241558276381e-06, | |
| "loss": 0.597, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.6298031865042174, | |
| "grad_norm": 2.40967059135437, | |
| "learning_rate": 8.233673215983207e-06, | |
| "loss": 0.7821, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.633552014995314, | |
| "grad_norm": 2.9071333408355713, | |
| "learning_rate": 8.209980714586955e-06, | |
| "loss": 0.9107, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.6373008434864105, | |
| "grad_norm": 2.4595487117767334, | |
| "learning_rate": 8.18616496377748e-06, | |
| "loss": 0.8695, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6410496719775071, | |
| "grad_norm": 2.700758934020996, | |
| "learning_rate": 8.162226877976886e-06, | |
| "loss": 0.8954, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.6447985004686035, | |
| "grad_norm": 3.0076358318328857, | |
| "learning_rate": 8.138167376304411e-06, | |
| "loss": 0.7885, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.6485473289597001, | |
| "grad_norm": 2.321608543395996, | |
| "learning_rate": 8.113987382541138e-06, | |
| "loss": 0.6624, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.6522961574507966, | |
| "grad_norm": 2.736940383911133, | |
| "learning_rate": 8.089687825094524e-06, | |
| "loss": 0.9155, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.6560449859418932, | |
| "grad_norm": 2.9796676635742188, | |
| "learning_rate": 8.065269636962765e-06, | |
| "loss": 0.8578, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.6597938144329897, | |
| "grad_norm": 2.3743815422058105, | |
| "learning_rate": 8.040733755698954e-06, | |
| "loss": 0.831, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.6635426429240863, | |
| "grad_norm": 2.536311388015747, | |
| "learning_rate": 8.016081123375098e-06, | |
| "loss": 0.6518, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.6672914714151827, | |
| "grad_norm": 2.936393976211548, | |
| "learning_rate": 7.991312686545939e-06, | |
| "loss": 0.8743, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.6710402999062793, | |
| "grad_norm": 2.9552223682403564, | |
| "learning_rate": 7.96642939621261e-06, | |
| "loss": 1.0273, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.6747891283973758, | |
| "grad_norm": 2.538328170776367, | |
| "learning_rate": 7.94143220778613e-06, | |
| "loss": 0.786, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6785379568884724, | |
| "grad_norm": 2.406303882598877, | |
| "learning_rate": 7.916322081050708e-06, | |
| "loss": 0.8003, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.6822867853795689, | |
| "grad_norm": 2.377192735671997, | |
| "learning_rate": 7.8910999801269e-06, | |
| "loss": 0.7355, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.6860356138706654, | |
| "grad_norm": 3.0302469730377197, | |
| "learning_rate": 7.865766873434582e-06, | |
| "loss": 1.0152, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.689784442361762, | |
| "grad_norm": 2.5823426246643066, | |
| "learning_rate": 7.84032373365578e-06, | |
| "loss": 0.8919, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.6935332708528584, | |
| "grad_norm": 2.891167640686035, | |
| "learning_rate": 7.814771537697312e-06, | |
| "loss": 0.8386, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.697282099343955, | |
| "grad_norm": 2.61259126663208, | |
| "learning_rate": 7.789111266653285e-06, | |
| "loss": 0.8342, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.7010309278350515, | |
| "grad_norm": 3.1016290187835693, | |
| "learning_rate": 7.76334390576742e-06, | |
| "loss": 0.8578, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.7047797563261481, | |
| "grad_norm": 2.2901554107666016, | |
| "learning_rate": 7.737470444395227e-06, | |
| "loss": 0.6153, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.7085285848172446, | |
| "grad_norm": 3.0746381282806396, | |
| "learning_rate": 7.71149187596602e-06, | |
| "loss": 0.7487, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.7122774133083412, | |
| "grad_norm": 2.534573554992676, | |
| "learning_rate": 7.685409197944768e-06, | |
| "loss": 0.9354, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.7160262417994376, | |
| "grad_norm": 2.7725582122802734, | |
| "learning_rate": 7.6592234117938e-06, | |
| "loss": 0.8641, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.7197750702905342, | |
| "grad_norm": 2.284388780593872, | |
| "learning_rate": 7.63293552293435e-06, | |
| "loss": 0.8296, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.7235238987816307, | |
| "grad_norm": 2.665536880493164, | |
| "learning_rate": 7.60654654070796e-06, | |
| "loss": 0.7976, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.7272727272727273, | |
| "grad_norm": 2.496126651763916, | |
| "learning_rate": 7.580057478337717e-06, | |
| "loss": 0.8097, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.7310215557638238, | |
| "grad_norm": 2.898500919342041, | |
| "learning_rate": 7.553469352889356e-06, | |
| "loss": 0.832, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.7347703842549204, | |
| "grad_norm": 2.545947551727295, | |
| "learning_rate": 7.526783185232208e-06, | |
| "loss": 0.8465, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.7385192127460168, | |
| "grad_norm": 2.320594549179077, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.6391, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.7422680412371134, | |
| "grad_norm": 2.390089511871338, | |
| "learning_rate": 7.473120825551517e-06, | |
| "loss": 0.8494, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.7460168697282099, | |
| "grad_norm": 3.2954750061035156, | |
| "learning_rate": 7.446146693931111e-06, | |
| "loss": 0.9194, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.7497656982193065, | |
| "grad_norm": 3.3448970317840576, | |
| "learning_rate": 7.419078640829088e-06, | |
| "loss": 0.8001, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.753514526710403, | |
| "grad_norm": 2.490509033203125, | |
| "learning_rate": 7.391917705541927e-06, | |
| "loss": 0.8049, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.7572633552014996, | |
| "grad_norm": 3.141767740249634, | |
| "learning_rate": 7.364664930932385e-06, | |
| "loss": 0.6791, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.761012183692596, | |
| "grad_norm": 2.646108388900757, | |
| "learning_rate": 7.337321363389453e-06, | |
| "loss": 0.6114, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.7647610121836926, | |
| "grad_norm": 2.71254563331604, | |
| "learning_rate": 7.3098880527881755e-06, | |
| "loss": 0.8919, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.7685098406747891, | |
| "grad_norm": 2.701137065887451, | |
| "learning_rate": 7.282366052449351e-06, | |
| "loss": 0.9196, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.7722586691658857, | |
| "grad_norm": 2.5790905952453613, | |
| "learning_rate": 7.254756419099074e-06, | |
| "loss": 0.8328, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.7760074976569822, | |
| "grad_norm": 2.885322332382202, | |
| "learning_rate": 7.227060212828171e-06, | |
| "loss": 0.7947, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.7797563261480788, | |
| "grad_norm": 2.563446044921875, | |
| "learning_rate": 7.199278497051498e-06, | |
| "loss": 0.8801, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.7835051546391752, | |
| "grad_norm": 2.7891945838928223, | |
| "learning_rate": 7.171412338467101e-06, | |
| "loss": 0.7966, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.7872539831302718, | |
| "grad_norm": 2.888983726501465, | |
| "learning_rate": 7.143462807015271e-06, | |
| "loss": 0.7454, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7910028116213683, | |
| "grad_norm": 2.7022621631622314, | |
| "learning_rate": 7.115430975837457e-06, | |
| "loss": 0.9606, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.7947516401124649, | |
| "grad_norm": 2.689760208129883, | |
| "learning_rate": 7.087317921235059e-06, | |
| "loss": 0.819, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.7985004686035614, | |
| "grad_norm": 2.960847854614258, | |
| "learning_rate": 7.059124722628113e-06, | |
| "loss": 0.9319, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.8022492970946579, | |
| "grad_norm": 3.116780996322632, | |
| "learning_rate": 7.030852462513827e-06, | |
| "loss": 1.0187, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.8059981255857545, | |
| "grad_norm": 2.5313186645507812, | |
| "learning_rate": 7.002502226425042e-06, | |
| "loss": 0.9004, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.8097469540768509, | |
| "grad_norm": 2.4676010608673096, | |
| "learning_rate": 6.974075102888535e-06, | |
| "loss": 0.805, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.8134957825679475, | |
| "grad_norm": 2.8545048236846924, | |
| "learning_rate": 6.945572183383229e-06, | |
| "loss": 0.8092, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.817244611059044, | |
| "grad_norm": 2.710291624069214, | |
| "learning_rate": 6.916994562298286e-06, | |
| "loss": 0.8639, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.8209934395501406, | |
| "grad_norm": 2.850886344909668, | |
| "learning_rate": 6.888343336891088e-06, | |
| "loss": 0.8063, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.8247422680412371, | |
| "grad_norm": 2.6347622871398926, | |
| "learning_rate": 6.859619607245102e-06, | |
| "loss": 0.9418, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.8284910965323337, | |
| "grad_norm": 2.4911224842071533, | |
| "learning_rate": 6.830824476227646e-06, | |
| "loss": 0.633, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.8322399250234301, | |
| "grad_norm": 2.8799281120300293, | |
| "learning_rate": 6.801959049447546e-06, | |
| "loss": 0.7747, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.8359887535145267, | |
| "grad_norm": 2.7227516174316406, | |
| "learning_rate": 6.773024435212678e-06, | |
| "loss": 1.0301, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.8397375820056232, | |
| "grad_norm": 2.352261781692505, | |
| "learning_rate": 6.744021744487422e-06, | |
| "loss": 0.7293, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.8434864104967198, | |
| "grad_norm": 2.8626327514648438, | |
| "learning_rate": 6.714952090849996e-06, | |
| "loss": 1.022, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.8472352389878163, | |
| "grad_norm": 2.3415591716766357, | |
| "learning_rate": 6.685816590449708e-06, | |
| "loss": 0.7533, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.8509840674789129, | |
| "grad_norm": 2.774419069290161, | |
| "learning_rate": 6.6566163619641e-06, | |
| "loss": 0.7825, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.8547328959700093, | |
| "grad_norm": 2.38519287109375, | |
| "learning_rate": 6.62735252655599e-06, | |
| "loss": 0.9026, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.8584817244611059, | |
| "grad_norm": 2.562180280685425, | |
| "learning_rate": 6.598026207830428e-06, | |
| "loss": 0.8274, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.8622305529522024, | |
| "grad_norm": 2.506265640258789, | |
| "learning_rate": 6.568638531791555e-06, | |
| "loss": 0.8533, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.865979381443299, | |
| "grad_norm": 2.4109628200531006, | |
| "learning_rate": 6.539190626799366e-06, | |
| "loss": 0.718, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.8697282099343955, | |
| "grad_norm": 2.856254816055298, | |
| "learning_rate": 6.5096836235263904e-06, | |
| "loss": 0.8667, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.8734770384254921, | |
| "grad_norm": 2.8561127185821533, | |
| "learning_rate": 6.480118654914276e-06, | |
| "loss": 0.5914, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.8772258669165885, | |
| "grad_norm": 2.726389169692993, | |
| "learning_rate": 6.4504968561302905e-06, | |
| "loss": 0.9917, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.8809746954076851, | |
| "grad_norm": 2.7317206859588623, | |
| "learning_rate": 6.4208193645237314e-06, | |
| "loss": 0.9438, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.8847235238987816, | |
| "grad_norm": 2.6856586933135986, | |
| "learning_rate": 6.391087319582264e-06, | |
| "loss": 0.8312, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.8884723523898782, | |
| "grad_norm": 2.7664246559143066, | |
| "learning_rate": 6.3613018628881655e-06, | |
| "loss": 0.8756, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.8922211808809747, | |
| "grad_norm": 2.266479730606079, | |
| "learning_rate": 6.331464138074493e-06, | |
| "loss": 0.7957, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.8959700093720713, | |
| "grad_norm": 2.8850481510162354, | |
| "learning_rate": 6.301575290781174e-06, | |
| "loss": 0.9892, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.8997188378631678, | |
| "grad_norm": 3.300827980041504, | |
| "learning_rate": 6.271636468611022e-06, | |
| "loss": 0.8914, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.9034676663542643, | |
| "grad_norm": 2.5576953887939453, | |
| "learning_rate": 6.241648821085666e-06, | |
| "loss": 0.9061, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.9072164948453608, | |
| "grad_norm": 2.373811721801758, | |
| "learning_rate": 6.211613499601419e-06, | |
| "loss": 0.8346, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.9109653233364574, | |
| "grad_norm": 2.5030999183654785, | |
| "learning_rate": 6.181531657385068e-06, | |
| "loss": 0.8327, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.9147141518275539, | |
| "grad_norm": 3.0258920192718506, | |
| "learning_rate": 6.1514044494496e-06, | |
| "loss": 0.7557, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.9184629803186504, | |
| "grad_norm": 2.5062878131866455, | |
| "learning_rate": 6.1212330325498425e-06, | |
| "loss": 0.8887, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.922211808809747, | |
| "grad_norm": 2.974062442779541, | |
| "learning_rate": 6.091018565138062e-06, | |
| "loss": 0.8752, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.9259606373008434, | |
| "grad_norm": 2.3620338439941406, | |
| "learning_rate": 6.060762207319479e-06, | |
| "loss": 0.5664, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.92970946579194, | |
| "grad_norm": 2.6449522972106934, | |
| "learning_rate": 6.03046512080772e-06, | |
| "loss": 0.7479, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.9334582942830365, | |
| "grad_norm": 2.6556012630462646, | |
| "learning_rate": 6.000128468880223e-06, | |
| "loss": 0.9128, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.9372071227741331, | |
| "grad_norm": 2.5893819332122803, | |
| "learning_rate": 5.9697534163335645e-06, | |
| "loss": 0.7899, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.9409559512652296, | |
| "grad_norm": 2.8196489810943604, | |
| "learning_rate": 5.939341129438739e-06, | |
| "loss": 0.9707, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.9447047797563262, | |
| "grad_norm": 2.6079866886138916, | |
| "learning_rate": 5.908892775896383e-06, | |
| "loss": 0.6921, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.9484536082474226, | |
| "grad_norm": 2.749600648880005, | |
| "learning_rate": 5.878409524791931e-06, | |
| "loss": 0.8562, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.9522024367385192, | |
| "grad_norm": 2.7583858966827393, | |
| "learning_rate": 5.847892546550738e-06, | |
| "loss": 0.7389, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.9559512652296157, | |
| "grad_norm": 2.819322347640991, | |
| "learning_rate": 5.817343012893132e-06, | |
| "loss": 0.9122, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.9597000937207123, | |
| "grad_norm": 2.7966537475585938, | |
| "learning_rate": 5.786762096789431e-06, | |
| "loss": 1.0095, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.9634489222118088, | |
| "grad_norm": 2.982713460922241, | |
| "learning_rate": 5.756150972414904e-06, | |
| "loss": 1.0745, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.9671977507029054, | |
| "grad_norm": 2.7742886543273926, | |
| "learning_rate": 5.725510815104685e-06, | |
| "loss": 0.7517, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.9709465791940018, | |
| "grad_norm": 2.983306646347046, | |
| "learning_rate": 5.694842801308651e-06, | |
| "loss": 0.8668, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.9746954076850984, | |
| "grad_norm": 2.550466299057007, | |
| "learning_rate": 5.664148108546242e-06, | |
| "loss": 0.7585, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9784442361761949, | |
| "grad_norm": 2.5021555423736572, | |
| "learning_rate": 5.633427915361261e-06, | |
| "loss": 0.7487, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.9821930646672915, | |
| "grad_norm": 2.9954419136047363, | |
| "learning_rate": 5.6026834012766155e-06, | |
| "loss": 0.7231, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.985941893158388, | |
| "grad_norm": 2.553372859954834, | |
| "learning_rate": 5.5719157467490305e-06, | |
| "loss": 0.7305, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.9896907216494846, | |
| "grad_norm": 2.377293825149536, | |
| "learning_rate": 5.541126133123721e-06, | |
| "loss": 0.9153, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.993439550140581, | |
| "grad_norm": 2.6041624546051025, | |
| "learning_rate": 5.510315742589042e-06, | |
| "loss": 0.7345, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.9971883786316776, | |
| "grad_norm": 2.1862921714782715, | |
| "learning_rate": 5.479485758131089e-06, | |
| "loss": 0.8135, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 3.3813092708587646, | |
| "learning_rate": 5.4486373634882805e-06, | |
| "loss": 0.7056, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.0037488284910965, | |
| "grad_norm": 2.4991369247436523, | |
| "learning_rate": 5.417771743105908e-06, | |
| "loss": 0.5993, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.007497656982193, | |
| "grad_norm": 2.8168036937713623, | |
| "learning_rate": 5.386890082090652e-06, | |
| "loss": 0.6456, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.0112464854732897, | |
| "grad_norm": 2.7016103267669678, | |
| "learning_rate": 5.355993566165091e-06, | |
| "loss": 0.7714, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0149953139643861, | |
| "grad_norm": 2.449699878692627, | |
| "learning_rate": 5.325083381622165e-06, | |
| "loss": 0.7332, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.0187441424554826, | |
| "grad_norm": 2.5395665168762207, | |
| "learning_rate": 5.294160715279626e-06, | |
| "loss": 0.7395, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.022492970946579, | |
| "grad_norm": 2.3110768795013428, | |
| "learning_rate": 5.263226754434481e-06, | |
| "loss": 0.6565, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.0262417994376758, | |
| "grad_norm": 2.3739070892333984, | |
| "learning_rate": 5.232282686817392e-06, | |
| "loss": 0.7151, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.0299906279287723, | |
| "grad_norm": 2.500891923904419, | |
| "learning_rate": 5.201329700547077e-06, | |
| "loss": 0.6505, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.0337394564198688, | |
| "grad_norm": 2.575612783432007, | |
| "learning_rate": 5.170368984084695e-06, | |
| "loss": 0.737, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.0374882849109652, | |
| "grad_norm": 2.4420552253723145, | |
| "learning_rate": 5.139401726188208e-06, | |
| "loss": 0.5698, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.041237113402062, | |
| "grad_norm": 2.4866912364959717, | |
| "learning_rate": 5.108429115866744e-06, | |
| "loss": 0.5371, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.0449859418931584, | |
| "grad_norm": 2.7172107696533203, | |
| "learning_rate": 5.077452342334939e-06, | |
| "loss": 0.7068, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.0487347703842549, | |
| "grad_norm": 2.302205801010132, | |
| "learning_rate": 5.046472594967279e-06, | |
| "loss": 0.6946, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0524835988753514, | |
| "grad_norm": 2.5750224590301514, | |
| "learning_rate": 5.01549106325243e-06, | |
| "loss": 0.7505, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.056232427366448, | |
| "grad_norm": 2.5373618602752686, | |
| "learning_rate": 4.9845089367475715e-06, | |
| "loss": 0.7652, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.0599812558575445, | |
| "grad_norm": 2.721522092819214, | |
| "learning_rate": 4.953527405032723e-06, | |
| "loss": 0.7254, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.063730084348641, | |
| "grad_norm": 2.75787091255188, | |
| "learning_rate": 4.922547657665062e-06, | |
| "loss": 0.8159, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.0674789128397375, | |
| "grad_norm": 2.2756996154785156, | |
| "learning_rate": 4.891570884133256e-06, | |
| "loss": 0.5339, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.0712277413308342, | |
| "grad_norm": 2.541193962097168, | |
| "learning_rate": 4.860598273811793e-06, | |
| "loss": 0.6664, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.0749765698219307, | |
| "grad_norm": 2.9506993293762207, | |
| "learning_rate": 4.829631015915306e-06, | |
| "loss": 0.6293, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.0787253983130272, | |
| "grad_norm": 2.649301290512085, | |
| "learning_rate": 4.798670299452926e-06, | |
| "loss": 0.5073, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.0824742268041236, | |
| "grad_norm": 2.1914241313934326, | |
| "learning_rate": 4.767717313182611e-06, | |
| "loss": 0.6079, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.0862230552952203, | |
| "grad_norm": 2.44549298286438, | |
| "learning_rate": 4.736773245565521e-06, | |
| "loss": 0.5781, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.0899718837863168, | |
| "grad_norm": 2.684919595718384, | |
| "learning_rate": 4.705839284720376e-06, | |
| "loss": 0.5453, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.0937207122774133, | |
| "grad_norm": 2.83229660987854, | |
| "learning_rate": 4.6749166183778375e-06, | |
| "loss": 0.831, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.0974695407685098, | |
| "grad_norm": 2.6568431854248047, | |
| "learning_rate": 4.64400643383491e-06, | |
| "loss": 0.587, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.1012183692596063, | |
| "grad_norm": 2.890794277191162, | |
| "learning_rate": 4.613109917909349e-06, | |
| "loss": 0.7931, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.104967197750703, | |
| "grad_norm": 2.916185140609741, | |
| "learning_rate": 4.582228256894093e-06, | |
| "loss": 0.7106, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.1087160262417994, | |
| "grad_norm": 2.470686912536621, | |
| "learning_rate": 4.55136263651172e-06, | |
| "loss": 0.7789, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.112464854732896, | |
| "grad_norm": 2.4361371994018555, | |
| "learning_rate": 4.520514241868912e-06, | |
| "loss": 0.6496, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.1162136832239926, | |
| "grad_norm": 2.7543270587921143, | |
| "learning_rate": 4.489684257410959e-06, | |
| "loss": 0.6137, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.119962511715089, | |
| "grad_norm": 2.6973302364349365, | |
| "learning_rate": 4.458873866876282e-06, | |
| "loss": 0.5914, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.1237113402061856, | |
| "grad_norm": 2.2894020080566406, | |
| "learning_rate": 4.428084253250972e-06, | |
| "loss": 0.66, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.127460168697282, | |
| "grad_norm": 2.703667402267456, | |
| "learning_rate": 4.397316598723385e-06, | |
| "loss": 0.6326, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.1312089971883785, | |
| "grad_norm": 2.5640034675598145, | |
| "learning_rate": 4.3665720846387406e-06, | |
| "loss": 0.6076, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.1349578256794752, | |
| "grad_norm": 2.6277530193328857, | |
| "learning_rate": 4.335851891453759e-06, | |
| "loss": 0.6344, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.1387066541705717, | |
| "grad_norm": 2.1854584217071533, | |
| "learning_rate": 4.305157198691351e-06, | |
| "loss": 0.537, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.1424554826616682, | |
| "grad_norm": 2.584956407546997, | |
| "learning_rate": 4.2744891848953156e-06, | |
| "loss": 0.6874, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.1462043111527647, | |
| "grad_norm": 3.1633033752441406, | |
| "learning_rate": 4.2438490275850965e-06, | |
| "loss": 0.776, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.1499531396438614, | |
| "grad_norm": 2.372377634048462, | |
| "learning_rate": 4.2132379032105695e-06, | |
| "loss": 0.6193, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.1537019681349578, | |
| "grad_norm": 2.6179869174957275, | |
| "learning_rate": 4.182656987106869e-06, | |
| "loss": 0.8479, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.1574507966260543, | |
| "grad_norm": 2.4828739166259766, | |
| "learning_rate": 4.152107453449263e-06, | |
| "loss": 0.5892, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.1611996251171508, | |
| "grad_norm": 2.4343421459198, | |
| "learning_rate": 4.121590475208071e-06, | |
| "loss": 0.5757, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.1649484536082475, | |
| "grad_norm": 2.660548210144043, | |
| "learning_rate": 4.091107224103619e-06, | |
| "loss": 0.7259, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.168697282099344, | |
| "grad_norm": 2.5796167850494385, | |
| "learning_rate": 4.060658870561263e-06, | |
| "loss": 0.5966, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.1724461105904405, | |
| "grad_norm": 2.7497189044952393, | |
| "learning_rate": 4.030246583666437e-06, | |
| "loss": 0.6979, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.176194939081537, | |
| "grad_norm": 2.582590103149414, | |
| "learning_rate": 3.999871531119779e-06, | |
| "loss": 0.722, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.1799437675726336, | |
| "grad_norm": 2.6821389198303223, | |
| "learning_rate": 3.969534879192281e-06, | |
| "loss": 0.7015, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.1836925960637301, | |
| "grad_norm": 2.4163970947265625, | |
| "learning_rate": 3.9392377926805226e-06, | |
| "loss": 0.4959, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.1874414245548266, | |
| "grad_norm": 2.1838912963867188, | |
| "learning_rate": 3.9089814348619386e-06, | |
| "loss": 0.6188, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.191190253045923, | |
| "grad_norm": 2.767521619796753, | |
| "learning_rate": 3.878766967450158e-06, | |
| "loss": 0.8107, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.1949390815370198, | |
| "grad_norm": 2.630614995956421, | |
| "learning_rate": 3.848595550550401e-06, | |
| "loss": 0.7719, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.1986879100281163, | |
| "grad_norm": 2.7656266689300537, | |
| "learning_rate": 3.818468342614932e-06, | |
| "loss": 0.6614, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.2024367385192127, | |
| "grad_norm": 2.6769886016845703, | |
| "learning_rate": 3.788386500398583e-06, | |
| "loss": 0.6055, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.2061855670103092, | |
| "grad_norm": 2.4502344131469727, | |
| "learning_rate": 3.758351178914336e-06, | |
| "loss": 0.5417, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.209934395501406, | |
| "grad_norm": 2.5828676223754883, | |
| "learning_rate": 3.728363531388979e-06, | |
| "loss": 0.6551, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.2136832239925024, | |
| "grad_norm": 2.5377285480499268, | |
| "learning_rate": 3.6984247092188265e-06, | |
| "loss": 0.5738, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.2174320524835989, | |
| "grad_norm": 3.0588667392730713, | |
| "learning_rate": 3.668535861925509e-06, | |
| "loss": 0.9216, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.2211808809746953, | |
| "grad_norm": 2.466240167617798, | |
| "learning_rate": 3.6386981371118358e-06, | |
| "loss": 0.4853, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.2249297094657918, | |
| "grad_norm": 2.7123985290527344, | |
| "learning_rate": 3.6089126804177373e-06, | |
| "loss": 0.5507, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.2286785379568885, | |
| "grad_norm": 2.5103797912597656, | |
| "learning_rate": 3.5791806354762702e-06, | |
| "loss": 0.723, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.232427366447985, | |
| "grad_norm": 2.6206247806549072, | |
| "learning_rate": 3.5495031438697103e-06, | |
| "loss": 0.6191, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.2361761949390815, | |
| "grad_norm": 2.5603182315826416, | |
| "learning_rate": 3.519881345085724e-06, | |
| "loss": 0.6315, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.2399250234301782, | |
| "grad_norm": 2.5217936038970947, | |
| "learning_rate": 3.4903163764736104e-06, | |
| "loss": 0.5989, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.2436738519212747, | |
| "grad_norm": 2.4896228313446045, | |
| "learning_rate": 3.4608093732006367e-06, | |
| "loss": 0.6737, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.2474226804123711, | |
| "grad_norm": 2.5744566917419434, | |
| "learning_rate": 3.4313614682084483e-06, | |
| "loss": 0.5537, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.2511715089034676, | |
| "grad_norm": 2.5293214321136475, | |
| "learning_rate": 3.401973792169574e-06, | |
| "loss": 0.6486, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.254920337394564, | |
| "grad_norm": 2.5666043758392334, | |
| "learning_rate": 3.372647473444012e-06, | |
| "loss": 0.5285, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.2586691658856608, | |
| "grad_norm": 2.9129676818847656, | |
| "learning_rate": 3.343383638035902e-06, | |
| "loss": 0.8922, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.2624179943767573, | |
| "grad_norm": 2.596280097961426, | |
| "learning_rate": 3.314183409550293e-06, | |
| "loss": 0.6352, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.2661668228678538, | |
| "grad_norm": 2.8564982414245605, | |
| "learning_rate": 3.285047909150006e-06, | |
| "loss": 0.5762, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.2699156513589505, | |
| "grad_norm": 2.6609466075897217, | |
| "learning_rate": 3.2559782555125793e-06, | |
| "loss": 0.6421, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.273664479850047, | |
| "grad_norm": 2.636430501937866, | |
| "learning_rate": 3.226975564787322e-06, | |
| "loss": 0.8343, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.2774133083411434, | |
| "grad_norm": 2.764880657196045, | |
| "learning_rate": 3.1980409505524546e-06, | |
| "loss": 0.8492, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.28116213683224, | |
| "grad_norm": 2.4313178062438965, | |
| "learning_rate": 3.1691755237723538e-06, | |
| "loss": 0.5935, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.2849109653233364, | |
| "grad_norm": 3.312020778656006, | |
| "learning_rate": 3.140380392754901e-06, | |
| "loss": 0.7306, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.2886597938144329, | |
| "grad_norm": 2.7336678504943848, | |
| "learning_rate": 3.111656663108914e-06, | |
| "loss": 0.6683, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.2924086223055296, | |
| "grad_norm": 2.6858274936676025, | |
| "learning_rate": 3.083005437701715e-06, | |
| "loss": 0.6287, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.296157450796626, | |
| "grad_norm": 2.664910078048706, | |
| "learning_rate": 3.054427816616773e-06, | |
| "loss": 0.6392, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.2999062792877225, | |
| "grad_norm": 2.3806943893432617, | |
| "learning_rate": 3.0259248971114663e-06, | |
| "loss": 0.6144, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.3036551077788192, | |
| "grad_norm": 2.755476951599121, | |
| "learning_rate": 2.9974977735749596e-06, | |
| "loss": 0.6259, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.3074039362699157, | |
| "grad_norm": 2.4375879764556885, | |
| "learning_rate": 2.969147537486175e-06, | |
| "loss": 0.6063, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.3111527647610122, | |
| "grad_norm": 2.6043853759765625, | |
| "learning_rate": 2.9408752773718895e-06, | |
| "loss": 0.9196, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.3149015932521086, | |
| "grad_norm": 2.462339401245117, | |
| "learning_rate": 2.9126820787649403e-06, | |
| "loss": 0.5322, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.3186504217432051, | |
| "grad_norm": 2.3056864738464355, | |
| "learning_rate": 2.8845690241625437e-06, | |
| "loss": 0.6108, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.3223992502343018, | |
| "grad_norm": 2.394547462463379, | |
| "learning_rate": 2.8565371929847286e-06, | |
| "loss": 0.6488, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.3261480787253983, | |
| "grad_norm": 3.0996365547180176, | |
| "learning_rate": 2.828587661532901e-06, | |
| "loss": 0.6514, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.3298969072164948, | |
| "grad_norm": 2.57309889793396, | |
| "learning_rate": 2.800721502948506e-06, | |
| "loss": 0.614, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.3336457357075915, | |
| "grad_norm": 2.417073965072632, | |
| "learning_rate": 2.7729397871718306e-06, | |
| "loss": 0.5476, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.337394564198688, | |
| "grad_norm": 2.636715888977051, | |
| "learning_rate": 2.7452435809009272e-06, | |
| "loss": 0.7593, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.3411433926897844, | |
| "grad_norm": 3.096482992172241, | |
| "learning_rate": 2.7176339475506515e-06, | |
| "loss": 0.8159, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.344892221180881, | |
| "grad_norm": 2.1638081073760986, | |
| "learning_rate": 2.6901119472118253e-06, | |
| "loss": 0.5549, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.3486410496719774, | |
| "grad_norm": 2.6135005950927734, | |
| "learning_rate": 2.6626786366105493e-06, | |
| "loss": 0.6626, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.352389878163074, | |
| "grad_norm": 2.5569889545440674, | |
| "learning_rate": 2.635335069067617e-06, | |
| "loss": 0.5365, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.3561387066541706, | |
| "grad_norm": 2.9490528106689453, | |
| "learning_rate": 2.608082294458074e-06, | |
| "loss": 0.9714, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.359887535145267, | |
| "grad_norm": 3.6054556369781494, | |
| "learning_rate": 2.5809213591709124e-06, | |
| "loss": 0.8305, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 2.648580312728882, | |
| "learning_rate": 2.553853306068888e-06, | |
| "loss": 0.7582, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.3673851921274602, | |
| "grad_norm": 2.500655174255371, | |
| "learning_rate": 2.5268791744484865e-06, | |
| "loss": 0.5625, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.3711340206185567, | |
| "grad_norm": 2.625460147857666, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.8212, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.3748828491096532, | |
| "grad_norm": 2.444305896759033, | |
| "learning_rate": 2.4732168147677927e-06, | |
| "loss": 0.582, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.3786316776007497, | |
| "grad_norm": 2.8055059909820557, | |
| "learning_rate": 2.446530647110646e-06, | |
| "loss": 0.5747, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.3823805060918464, | |
| "grad_norm": 2.7823076248168945, | |
| "learning_rate": 2.419942521662285e-06, | |
| "loss": 0.5465, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.3861293345829429, | |
| "grad_norm": 3.1207704544067383, | |
| "learning_rate": 2.3934534592920416e-06, | |
| "loss": 0.8112, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.3898781630740393, | |
| "grad_norm": 2.4543776512145996, | |
| "learning_rate": 2.367064477065652e-06, | |
| "loss": 0.7124, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.393626991565136, | |
| "grad_norm": 2.6401026248931885, | |
| "learning_rate": 2.3407765882062024e-06, | |
| "loss": 0.6856, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.3973758200562325, | |
| "grad_norm": 2.729567527770996, | |
| "learning_rate": 2.314590802055232e-06, | |
| "loss": 0.6516, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.401124648547329, | |
| "grad_norm": 2.283984899520874, | |
| "learning_rate": 2.2885081240339813e-06, | |
| "loss": 0.4082, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.4048734770384255, | |
| "grad_norm": 2.7015268802642822, | |
| "learning_rate": 2.262529555604774e-06, | |
| "loss": 0.7567, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.408622305529522, | |
| "grad_norm": 2.3578450679779053, | |
| "learning_rate": 2.2366560942325833e-06, | |
| "loss": 0.6722, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.4123711340206184, | |
| "grad_norm": 2.6497104167938232, | |
| "learning_rate": 2.2108887333467172e-06, | |
| "loss": 0.6284, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.4161199625117151, | |
| "grad_norm": 2.5546791553497314, | |
| "learning_rate": 2.1852284623026906e-06, | |
| "loss": 0.5851, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.4198687910028116, | |
| "grad_norm": 2.486978530883789, | |
| "learning_rate": 2.159676266344222e-06, | |
| "loss": 0.6988, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.423617619493908, | |
| "grad_norm": 2.8536384105682373, | |
| "learning_rate": 2.1342331265654194e-06, | |
| "loss": 0.7193, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.4273664479850048, | |
| "grad_norm": 2.9641923904418945, | |
| "learning_rate": 2.108900019873103e-06, | |
| "loss": 0.6805, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.4311152764761013, | |
| "grad_norm": 2.463860034942627, | |
| "learning_rate": 2.0836779189492925e-06, | |
| "loss": 0.7775, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.4348641049671977, | |
| "grad_norm": 2.7579569816589355, | |
| "learning_rate": 2.0585677922138696e-06, | |
| "loss": 0.6252, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.4386129334582942, | |
| "grad_norm": 2.5609865188598633, | |
| "learning_rate": 2.033570603787391e-06, | |
| "loss": 0.6355, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.4423617619493907, | |
| "grad_norm": 2.9254584312438965, | |
| "learning_rate": 2.0086873134540626e-06, | |
| "loss": 0.6698, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.4461105904404874, | |
| "grad_norm": 3.046320915222168, | |
| "learning_rate": 1.9839188766249024e-06, | |
| "loss": 0.6857, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.4498594189315839, | |
| "grad_norm": 2.7485857009887695, | |
| "learning_rate": 1.959266244301047e-06, | |
| "loss": 0.8151, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.4536082474226804, | |
| "grad_norm": 2.6440517902374268, | |
| "learning_rate": 1.9347303630372373e-06, | |
| "loss": 0.63, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.457357075913777, | |
| "grad_norm": 2.842188596725464, | |
| "learning_rate": 1.910312174905477e-06, | |
| "loss": 0.5113, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.4611059044048735, | |
| "grad_norm": 2.662004232406616, | |
| "learning_rate": 1.886012617458864e-06, | |
| "loss": 0.7212, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.46485473289597, | |
| "grad_norm": 3.0616824626922607, | |
| "learning_rate": 1.8618326236955908e-06, | |
| "loss": 0.4773, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.4686035613870665, | |
| "grad_norm": 2.796537399291992, | |
| "learning_rate": 1.8377731220231144e-06, | |
| "loss": 0.7543, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.472352389878163, | |
| "grad_norm": 2.2921605110168457, | |
| "learning_rate": 1.8138350362225193e-06, | |
| "loss": 0.5149, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.4761012183692597, | |
| "grad_norm": 3.190627098083496, | |
| "learning_rate": 1.7900192854130465e-06, | |
| "loss": 0.7386, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.4798500468603561, | |
| "grad_norm": 2.5983688831329346, | |
| "learning_rate": 1.7663267840167936e-06, | |
| "loss": 0.6061, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.4835988753514526, | |
| "grad_norm": 2.47511887550354, | |
| "learning_rate": 1.7427584417236194e-06, | |
| "loss": 0.4628, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.4873477038425493, | |
| "grad_norm": 2.6179940700531006, | |
| "learning_rate": 1.7193151634562071e-06, | |
| "loss": 0.7502, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.4910965323336458, | |
| "grad_norm": 2.2079148292541504, | |
| "learning_rate": 1.695997849335319e-06, | |
| "loss": 0.7195, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.4948453608247423, | |
| "grad_norm": 2.3705711364746094, | |
| "learning_rate": 1.672807394645236e-06, | |
| "loss": 0.679, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.4985941893158388, | |
| "grad_norm": 2.5409562587738037, | |
| "learning_rate": 1.6497446897993885e-06, | |
| "loss": 0.6188, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.5023430178069352, | |
| "grad_norm": 2.718851089477539, | |
| "learning_rate": 1.6268106203061628e-06, | |
| "loss": 0.4311, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.5060918462980317, | |
| "grad_norm": 2.4857470989227295, | |
| "learning_rate": 1.6040060667348995e-06, | |
| "loss": 0.5452, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.5098406747891284, | |
| "grad_norm": 2.272148609161377, | |
| "learning_rate": 1.581331904682089e-06, | |
| "loss": 0.6673, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.513589503280225, | |
| "grad_norm": 2.118818759918213, | |
| "learning_rate": 1.5587890047377512e-06, | |
| "loss": 0.474, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.5173383317713216, | |
| "grad_norm": 2.6302151679992676, | |
| "learning_rate": 1.5363782324520033e-06, | |
| "loss": 0.7036, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.521087160262418, | |
| "grad_norm": 2.78167724609375, | |
| "learning_rate": 1.5141004483018323e-06, | |
| "loss": 0.5538, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.5248359887535146, | |
| "grad_norm": 2.7051169872283936, | |
| "learning_rate": 1.4919565076580577e-06, | |
| "loss": 0.6842, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.528584817244611, | |
| "grad_norm": 2.7234108448028564, | |
| "learning_rate": 1.4699472607524785e-06, | |
| "loss": 0.6473, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.5323336457357075, | |
| "grad_norm": 2.808751106262207, | |
| "learning_rate": 1.4480735526452427e-06, | |
| "loss": 0.7719, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.536082474226804, | |
| "grad_norm": 2.5814692974090576, | |
| "learning_rate": 1.426336223192386e-06, | |
| "loss": 0.6786, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.5398313027179007, | |
| "grad_norm": 2.6848583221435547, | |
| "learning_rate": 1.4047361070135996e-06, | |
| "loss": 0.7245, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.5435801312089972, | |
| "grad_norm": 2.525697708129883, | |
| "learning_rate": 1.3832740334601692e-06, | |
| "loss": 0.5269, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.5473289597000939, | |
| "grad_norm": 2.4603147506713867, | |
| "learning_rate": 1.3619508265831445e-06, | |
| "loss": 0.7593, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.5510777881911904, | |
| "grad_norm": 2.712268114089966, | |
| "learning_rate": 1.340767305101694e-06, | |
| "loss": 0.8538, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.5548266166822868, | |
| "grad_norm": 2.391826629638672, | |
| "learning_rate": 1.319724282371664e-06, | |
| "loss": 0.7118, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.5585754451733833, | |
| "grad_norm": 2.6861860752105713, | |
| "learning_rate": 1.2988225663543601e-06, | |
| "loss": 0.6899, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.5623242736644798, | |
| "grad_norm": 3.0465526580810547, | |
| "learning_rate": 1.2780629595855203e-06, | |
| "loss": 0.6717, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.5660731021555763, | |
| "grad_norm": 2.586912155151367, | |
| "learning_rate": 1.257446259144494e-06, | |
| "loss": 0.6787, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.569821930646673, | |
| "grad_norm": 2.4131391048431396, | |
| "learning_rate": 1.2369732566236508e-06, | |
| "loss": 0.7233, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.5735707591377694, | |
| "grad_norm": 2.3727686405181885, | |
| "learning_rate": 1.2166447380979801e-06, | |
| "loss": 0.7571, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.577319587628866, | |
| "grad_norm": 2.27282452583313, | |
| "learning_rate": 1.1964614840949002e-06, | |
| "loss": 0.5189, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.5810684161199626, | |
| "grad_norm": 2.7784337997436523, | |
| "learning_rate": 1.1764242695643075e-06, | |
| "loss": 0.4099, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.584817244611059, | |
| "grad_norm": 2.5282742977142334, | |
| "learning_rate": 1.1565338638488117e-06, | |
| "loss": 0.5868, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.5885660731021556, | |
| "grad_norm": 2.49729061126709, | |
| "learning_rate": 1.1367910306541918e-06, | |
| "loss": 0.6738, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.592314901593252, | |
| "grad_norm": 2.5107173919677734, | |
| "learning_rate": 1.1171965280200831e-06, | |
| "loss": 0.5341, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.5960637300843485, | |
| "grad_norm": 2.601574659347534, | |
| "learning_rate": 1.097751108290867e-06, | |
| "loss": 0.692, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.599812558575445, | |
| "grad_norm": 2.5476865768432617, | |
| "learning_rate": 1.078455518086784e-06, | |
| "loss": 0.7559, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.6035613870665417, | |
| "grad_norm": 2.2659873962402344, | |
| "learning_rate": 1.0593104982752645e-06, | |
| "loss": 0.5147, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.6073102155576382, | |
| "grad_norm": 2.6162917613983154, | |
| "learning_rate": 1.0403167839424883e-06, | |
| "loss": 0.7862, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.611059044048735, | |
| "grad_norm": 3.0038373470306396, | |
| "learning_rate": 1.0214751043651582e-06, | |
| "loss": 0.7098, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.6148078725398314, | |
| "grad_norm": 2.810269832611084, | |
| "learning_rate": 1.0027861829824953e-06, | |
| "loss": 0.4501, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.6185567010309279, | |
| "grad_norm": 2.2872512340545654, | |
| "learning_rate": 9.842507373684646e-07, | |
| "loss": 0.6829, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.6223055295220243, | |
| "grad_norm": 2.7606618404388428, | |
| "learning_rate": 9.658694792042284e-07, | |
| "loss": 0.8641, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.6260543580131208, | |
| "grad_norm": 2.647319793701172, | |
| "learning_rate": 9.476431142508097e-07, | |
| "loss": 0.684, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.6298031865042173, | |
| "grad_norm": 2.3936805725097656, | |
| "learning_rate": 9.295723423220049e-07, | |
| "loss": 0.4689, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.633552014995314, | |
| "grad_norm": 2.8151843547821045, | |
| "learning_rate": 9.116578572575091e-07, | |
| "loss": 0.7088, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.6373008434864105, | |
| "grad_norm": 2.6258625984191895, | |
| "learning_rate": 8.939003468962726e-07, | |
| "loss": 0.7019, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.6410496719775072, | |
| "grad_norm": 2.601217031478882, | |
| "learning_rate": 8.763004930500979e-07, | |
| "loss": 0.5881, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.6447985004686037, | |
| "grad_norm": 2.325805187225342, | |
| "learning_rate": 8.58858971477457e-07, | |
| "loss": 0.6775, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.6485473289597001, | |
| "grad_norm": 2.8295209407806396, | |
| "learning_rate": 8.415764518575415e-07, | |
| "loss": 0.7012, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.6522961574507966, | |
| "grad_norm": 2.421982765197754, | |
| "learning_rate": 8.244535977645584e-07, | |
| "loss": 0.6516, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.656044985941893, | |
| "grad_norm": 2.5072197914123535, | |
| "learning_rate": 8.074910666422475e-07, | |
| "loss": 0.7382, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.6597938144329896, | |
| "grad_norm": 2.841768264770508, | |
| "learning_rate": 7.906895097786338e-07, | |
| "loss": 0.5838, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.6635426429240863, | |
| "grad_norm": 2.738243818283081, | |
| "learning_rate": 7.740495722810271e-07, | |
| "loss": 0.7085, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.6672914714151827, | |
| "grad_norm": 2.763880491256714, | |
| "learning_rate": 7.575718930512516e-07, | |
| "loss": 0.6332, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.6710402999062794, | |
| "grad_norm": 2.3609251976013184, | |
| "learning_rate": 7.412571047611156e-07, | |
| "loss": 0.5682, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.674789128397376, | |
| "grad_norm": 3.1101462841033936, | |
| "learning_rate": 7.25105833828113e-07, | |
| "loss": 0.8626, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.6785379568884724, | |
| "grad_norm": 2.56254506111145, | |
| "learning_rate": 7.091187003913802e-07, | |
| "loss": 0.733, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.6822867853795689, | |
| "grad_norm": 2.901610851287842, | |
| "learning_rate": 6.932963182878821e-07, | |
| "loss": 0.7838, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.6860356138706654, | |
| "grad_norm": 2.393528699874878, | |
| "learning_rate": 6.776392950288397e-07, | |
| "loss": 0.6754, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.6897844423617618, | |
| "grad_norm": 2.7382218837738037, | |
| "learning_rate": 6.621482317764105e-07, | |
| "loss": 0.7144, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.6935332708528583, | |
| "grad_norm": 2.618114471435547, | |
| "learning_rate": 6.468237233206043e-07, | |
| "loss": 0.7003, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.697282099343955, | |
| "grad_norm": 2.418599843978882, | |
| "learning_rate": 6.316663580564425e-07, | |
| "loss": 0.6957, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.7010309278350515, | |
| "grad_norm": 2.5395448207855225, | |
| "learning_rate": 6.166767179613691e-07, | |
| "loss": 0.5987, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.7047797563261482, | |
| "grad_norm": 2.5443437099456787, | |
| "learning_rate": 6.018553785729075e-07, | |
| "loss": 0.4977, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.7085285848172447, | |
| "grad_norm": 2.4432942867279053, | |
| "learning_rate": 5.872029089665588e-07, | |
| "loss": 0.675, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.7122774133083412, | |
| "grad_norm": 2.7112233638763428, | |
| "learning_rate": 5.727198717339511e-07, | |
| "loss": 0.7934, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.7160262417994376, | |
| "grad_norm": 2.9048352241516113, | |
| "learning_rate": 5.584068229612422e-07, | |
| "loss": 0.7687, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.7197750702905341, | |
| "grad_norm": 2.5611398220062256, | |
| "learning_rate": 5.442643122077673e-07, | |
| "loss": 0.516, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.7235238987816306, | |
| "grad_norm": 2.6895134449005127, | |
| "learning_rate": 5.302928824849335e-07, | |
| "loss": 0.662, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.7272727272727273, | |
| "grad_norm": 2.621039390563965, | |
| "learning_rate": 5.164930702353782e-07, | |
| "loss": 0.6618, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.7310215557638238, | |
| "grad_norm": 2.5842714309692383, | |
| "learning_rate": 5.028654053123666e-07, | |
| "loss": 0.5826, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.7347703842549205, | |
| "grad_norm": 2.4873173236846924, | |
| "learning_rate": 4.894104109594466e-07, | |
| "loss": 0.6776, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.738519212746017, | |
| "grad_norm": 2.2931952476501465, | |
| "learning_rate": 4.7612860379036674e-07, | |
| "loss": 0.6094, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.7422680412371134, | |
| "grad_norm": 2.699720859527588, | |
| "learning_rate": 4.6302049376922843e-07, | |
| "loss": 0.5347, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.74601686972821, | |
| "grad_norm": 3.027738571166992, | |
| "learning_rate": 4.500865841909169e-07, | |
| "loss": 0.6739, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.7497656982193064, | |
| "grad_norm": 2.582227945327759, | |
| "learning_rate": 4.373273716617682e-07, | |
| "loss": 0.547, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.7535145267104029, | |
| "grad_norm": 3.154305934906006, | |
| "learning_rate": 4.247433460805067e-07, | |
| "loss": 0.7603, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.7572633552014996, | |
| "grad_norm": 2.7173681259155273, | |
| "learning_rate": 4.123349906194357e-07, | |
| "loss": 0.6546, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.761012183692596, | |
| "grad_norm": 2.6075384616851807, | |
| "learning_rate": 4.001027817058789e-07, | |
| "loss": 0.6589, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.7647610121836927, | |
| "grad_norm": 2.7403595447540283, | |
| "learning_rate": 3.8804718900389673e-07, | |
| "loss": 0.7657, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.7685098406747892, | |
| "grad_norm": 2.6404995918273926, | |
| "learning_rate": 3.7616867539624733e-07, | |
| "loss": 0.4362, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.7722586691658857, | |
| "grad_norm": 2.6141321659088135, | |
| "learning_rate": 3.6446769696661445e-07, | |
| "loss": 0.5542, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.7760074976569822, | |
| "grad_norm": 2.7053098678588867, | |
| "learning_rate": 3.5294470298209817e-07, | |
| "loss": 0.7215, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.7797563261480787, | |
| "grad_norm": 2.3503482341766357, | |
| "learning_rate": 3.416001358759635e-07, | |
| "loss": 0.5368, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.7835051546391751, | |
| "grad_norm": 2.7156739234924316, | |
| "learning_rate": 3.304344312306529e-07, | |
| "loss": 0.6804, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.7872539831302718, | |
| "grad_norm": 2.5638582706451416, | |
| "learning_rate": 3.194480177610604e-07, | |
| "loss": 0.8309, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.7910028116213683, | |
| "grad_norm": 2.8452188968658447, | |
| "learning_rate": 3.08641317298074e-07, | |
| "loss": 0.5907, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.794751640112465, | |
| "grad_norm": 2.5471601486206055, | |
| "learning_rate": 2.980147447723775e-07, | |
| "loss": 0.6524, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.7985004686035615, | |
| "grad_norm": 2.443708658218384, | |
| "learning_rate": 2.8756870819851736e-07, | |
| "loss": 0.5538, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.802249297094658, | |
| "grad_norm": 2.7727556228637695, | |
| "learning_rate": 2.7730360865923954e-07, | |
| "loss": 0.6459, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.8059981255857545, | |
| "grad_norm": 3.0080816745758057, | |
| "learning_rate": 2.672198402900883e-07, | |
| "loss": 0.7243, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.809746954076851, | |
| "grad_norm": 2.2730140686035156, | |
| "learning_rate": 2.573177902642726e-07, | |
| "loss": 0.7811, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.8134957825679474, | |
| "grad_norm": 2.5842466354370117, | |
| "learning_rate": 2.475978387778e-07, | |
| "loss": 0.595, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.817244611059044, | |
| "grad_norm": 2.4688827991485596, | |
| "learning_rate": 2.380603590348829e-07, | |
| "loss": 0.6941, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.8209934395501406, | |
| "grad_norm": 2.346224546432495, | |
| "learning_rate": 2.2870571723360212e-07, | |
| "loss": 0.7242, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.824742268041237, | |
| "grad_norm": 2.721781015396118, | |
| "learning_rate": 2.1953427255185122e-07, | |
| "loss": 0.7004, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.8284910965323338, | |
| "grad_norm": 2.539914846420288, | |
| "learning_rate": 2.1054637713354586e-07, | |
| "loss": 0.6182, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.8322399250234302, | |
| "grad_norm": 2.6623215675354004, | |
| "learning_rate": 2.0174237607510138e-07, | |
| "loss": 0.4989, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.8359887535145267, | |
| "grad_norm": 2.411862373352051, | |
| "learning_rate": 1.9312260741218114e-07, | |
| "loss": 0.4213, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.8397375820056232, | |
| "grad_norm": 2.1537928581237793, | |
| "learning_rate": 1.8468740210672077e-07, | |
| "loss": 0.5353, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.8434864104967197, | |
| "grad_norm": 3.083552598953247, | |
| "learning_rate": 1.7643708403422055e-07, | |
| "loss": 1.1005, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.8472352389878162, | |
| "grad_norm": 2.445988178253174, | |
| "learning_rate": 1.6837196997130434e-07, | |
| "loss": 0.6158, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.8509840674789129, | |
| "grad_norm": 2.336162567138672, | |
| "learning_rate": 1.6049236958356475e-07, | |
| "loss": 0.649, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.8547328959700093, | |
| "grad_norm": 2.544160842895508, | |
| "learning_rate": 1.5279858541366876e-07, | |
| "loss": 0.7222, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.858481724461106, | |
| "grad_norm": 2.5376548767089844, | |
| "learning_rate": 1.4529091286973994e-07, | |
| "loss": 0.7403, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.8622305529522025, | |
| "grad_norm": 3.008063554763794, | |
| "learning_rate": 1.3796964021402072e-07, | |
| "loss": 0.7811, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.865979381443299, | |
| "grad_norm": 2.640939712524414, | |
| "learning_rate": 1.3083504855180007e-07, | |
| "loss": 0.6864, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.8697282099343955, | |
| "grad_norm": 2.6732051372528076, | |
| "learning_rate": 1.2388741182062348e-07, | |
| "loss": 0.6969, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.873477038425492, | |
| "grad_norm": 2.688314199447632, | |
| "learning_rate": 1.1712699677977224e-07, | |
| "loss": 0.659, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.8772258669165884, | |
| "grad_norm": 2.5495784282684326, | |
| "learning_rate": 1.1055406300002347e-07, | |
| "loss": 0.5468, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.8809746954076851, | |
| "grad_norm": 2.6933047771453857, | |
| "learning_rate": 1.0416886285368188e-07, | |
| "loss": 0.5204, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.8847235238987816, | |
| "grad_norm": 2.7378575801849365, | |
| "learning_rate": 9.797164150489035e-08, | |
| "loss": 0.5859, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.8884723523898783, | |
| "grad_norm": 2.0356202125549316, | |
| "learning_rate": 9.1962636900218e-08, | |
| "loss": 0.3406, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.8922211808809748, | |
| "grad_norm": 3.0371510982513428, | |
| "learning_rate": 8.614207975952083e-08, | |
| "loss": 0.7767, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.8959700093720713, | |
| "grad_norm": 2.813620090484619, | |
| "learning_rate": 8.0510193567086e-08, | |
| "loss": 0.5616, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.8997188378631678, | |
| "grad_norm": 2.557776689529419, | |
| "learning_rate": 7.5067194563051e-08, | |
| "loss": 0.5433, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.9034676663542642, | |
| "grad_norm": 2.6855971813201904, | |
| "learning_rate": 6.981329173509909e-08, | |
| "loss": 0.6334, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.9072164948453607, | |
| "grad_norm": 2.539196729660034, | |
| "learning_rate": 6.474868681043578e-08, | |
| "loss": 0.6286, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.9109653233364574, | |
| "grad_norm": 2.1722402572631836, | |
| "learning_rate": 5.987357424804441e-08, | |
| "loss": 0.5581, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.914714151827554, | |
| "grad_norm": 2.4900224208831787, | |
| "learning_rate": 5.518814123121885e-08, | |
| "loss": 0.6132, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.9184629803186504, | |
| "grad_norm": 2.708784818649292, | |
| "learning_rate": 5.0692567660375334e-08, | |
| "loss": 0.6154, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.922211808809747, | |
| "grad_norm": 2.573993444442749, | |
| "learning_rate": 4.638702614614854e-08, | |
| "loss": 0.6727, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.9259606373008435, | |
| "grad_norm": 2.826658248901367, | |
| "learning_rate": 4.227168200276077e-08, | |
| "loss": 0.7038, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.92970946579194, | |
| "grad_norm": 2.657594680786133, | |
| "learning_rate": 3.834669324167428e-08, | |
| "loss": 0.7022, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.9334582942830365, | |
| "grad_norm": 2.5230019092559814, | |
| "learning_rate": 3.4612210565528323e-08, | |
| "loss": 0.5854, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.937207122774133, | |
| "grad_norm": 2.966693162918091, | |
| "learning_rate": 3.10683773623488e-08, | |
| "loss": 0.6926, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.9409559512652295, | |
| "grad_norm": 2.689201593399048, | |
| "learning_rate": 2.7715329700044315e-08, | |
| "loss": 0.7031, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.9447047797563262, | |
| "grad_norm": 2.46077036857605, | |
| "learning_rate": 2.455319632118147e-08, | |
| "loss": 0.5426, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.9484536082474226, | |
| "grad_norm": 2.4496285915374756, | |
| "learning_rate": 2.158209863804217e-08, | |
| "loss": 0.5547, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.9522024367385193, | |
| "grad_norm": 2.5419907569885254, | |
| "learning_rate": 1.8802150727962876e-08, | |
| "loss": 0.5622, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.9559512652296158, | |
| "grad_norm": 2.629879951477051, | |
| "learning_rate": 1.6213459328950355e-08, | |
| "loss": 0.6756, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.9597000937207123, | |
| "grad_norm": 2.748396396636963, | |
| "learning_rate": 1.3816123835588835e-08, | |
| "loss": 0.5403, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.9634489222118088, | |
| "grad_norm": 2.49360728263855, | |
| "learning_rate": 1.161023629522029e-08, | |
| "loss": 0.735, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.9671977507029053, | |
| "grad_norm": 2.472134828567505, | |
| "learning_rate": 9.595881404411145e-09, | |
| "loss": 0.5665, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.9709465791940017, | |
| "grad_norm": 2.7294187545776367, | |
| "learning_rate": 7.773136505700995e-09, | |
| "loss": 0.8548, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.9746954076850984, | |
| "grad_norm": 2.79746675491333, | |
| "learning_rate": 6.142071584630538e-09, | |
| "loss": 0.7157, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.978444236176195, | |
| "grad_norm": 2.4330995082855225, | |
| "learning_rate": 4.702749267057604e-09, | |
| "loss": 0.5992, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.9821930646672916, | |
| "grad_norm": 2.6152493953704834, | |
| "learning_rate": 3.4552248167507576e-09, | |
| "loss": 0.6798, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.985941893158388, | |
| "grad_norm": 2.38468861579895, | |
| "learning_rate": 2.3995461332676496e-09, | |
| "loss": 0.5985, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.9896907216494846, | |
| "grad_norm": 2.6697652339935303, | |
| "learning_rate": 1.5357537501159425e-09, | |
| "loss": 0.5737, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.993439550140581, | |
| "grad_norm": 2.0764174461364746, | |
| "learning_rate": 8.638808331973281e-10, | |
| "loss": 0.5335, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.9971883786316775, | |
| "grad_norm": 2.559661626815796, | |
| "learning_rate": 3.8395317953354717e-10, | |
| "loss": 0.6069, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 3.018411636352539, | |
| "learning_rate": 9.598921627607116e-11, | |
| "loss": 0.5042, | |
| "step": 534 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 534, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.243263714603827e+16, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |