| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.9972020145495244, | |
| "eval_steps": 500, | |
| "global_step": 891, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003357582540570789, | |
| "grad_norm": 15.05952844044008, | |
| "learning_rate": 0.0, | |
| "loss": 1.1094, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.006715165081141578, | |
| "grad_norm": 15.55795952285201, | |
| "learning_rate": 1.1111111111111112e-07, | |
| "loss": 1.2127, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.010072747621712367, | |
| "grad_norm": 16.539001809523764, | |
| "learning_rate": 2.2222222222222224e-07, | |
| "loss": 1.1671, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.013430330162283156, | |
| "grad_norm": 16.368302779349722, | |
| "learning_rate": 3.3333333333333335e-07, | |
| "loss": 1.1785, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.016787912702853944, | |
| "grad_norm": 15.588296216474147, | |
| "learning_rate": 4.444444444444445e-07, | |
| "loss": 1.1263, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.020145495243424735, | |
| "grad_norm": 16.499164600464685, | |
| "learning_rate": 5.555555555555555e-07, | |
| "loss": 1.1748, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.023503077783995522, | |
| "grad_norm": 14.083316686941275, | |
| "learning_rate": 6.666666666666667e-07, | |
| "loss": 1.1657, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.026860660324566313, | |
| "grad_norm": 16.604129854168686, | |
| "learning_rate": 7.777777777777779e-07, | |
| "loss": 1.2313, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0302182428651371, | |
| "grad_norm": 14.603261078745698, | |
| "learning_rate": 8.88888888888889e-07, | |
| "loss": 1.1465, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.03357582540570789, | |
| "grad_norm": 12.7832318620063, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.1103, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03693340794627868, | |
| "grad_norm": 11.676821024645601, | |
| "learning_rate": 1.111111111111111e-06, | |
| "loss": 1.0456, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.04029099048684947, | |
| "grad_norm": 10.276715553394455, | |
| "learning_rate": 1.2222222222222223e-06, | |
| "loss": 1.0994, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.04364857302742026, | |
| "grad_norm": 7.580128095019628, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.9192, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.047006155567991044, | |
| "grad_norm": 4.579086500989691, | |
| "learning_rate": 1.4444444444444445e-06, | |
| "loss": 0.9019, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.05036373810856184, | |
| "grad_norm": 4.462238466517284, | |
| "learning_rate": 1.5555555555555558e-06, | |
| "loss": 0.932, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.053721320649132626, | |
| "grad_norm": 4.49504437288462, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.9504, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.05707890318970341, | |
| "grad_norm": 4.856137118720444, | |
| "learning_rate": 1.777777777777778e-06, | |
| "loss": 0.8997, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0604364857302742, | |
| "grad_norm": 5.458807952193987, | |
| "learning_rate": 1.888888888888889e-06, | |
| "loss": 0.9135, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.063794068270845, | |
| "grad_norm": 4.914187499366455, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.895, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.06715165081141578, | |
| "grad_norm": 5.8484928291138685, | |
| "learning_rate": 2.1111111111111114e-06, | |
| "loss": 0.9178, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07050923335198657, | |
| "grad_norm": 6.405365379448204, | |
| "learning_rate": 2.222222222222222e-06, | |
| "loss": 0.9436, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.07386681589255736, | |
| "grad_norm": 5.140279358058298, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 0.8577, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.07722439843312814, | |
| "grad_norm": 4.573855490751355, | |
| "learning_rate": 2.4444444444444447e-06, | |
| "loss": 0.849, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.08058198097369894, | |
| "grad_norm": 3.4903819904679914, | |
| "learning_rate": 2.5555555555555557e-06, | |
| "loss": 0.8684, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.08393956351426973, | |
| "grad_norm": 3.2866185409697413, | |
| "learning_rate": 2.666666666666667e-06, | |
| "loss": 0.9139, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.08729714605484051, | |
| "grad_norm": 2.9343312861580553, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 0.825, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.09065472859541131, | |
| "grad_norm": 2.7441074948207635, | |
| "learning_rate": 2.888888888888889e-06, | |
| "loss": 0.7743, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.09401231113598209, | |
| "grad_norm": 3.0852450829413036, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8325, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.09736989367655288, | |
| "grad_norm": 2.5969750970962107, | |
| "learning_rate": 3.1111111111111116e-06, | |
| "loss": 0.8181, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.10072747621712368, | |
| "grad_norm": 3.0607284743078367, | |
| "learning_rate": 3.2222222222222227e-06, | |
| "loss": 0.8839, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.10408505875769446, | |
| "grad_norm": 2.6971303460280813, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.803, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.10744264129826525, | |
| "grad_norm": 2.7795448200981054, | |
| "learning_rate": 3.444444444444445e-06, | |
| "loss": 0.7857, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.11080022383883603, | |
| "grad_norm": 2.672977099956262, | |
| "learning_rate": 3.555555555555556e-06, | |
| "loss": 0.8166, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.11415780637940683, | |
| "grad_norm": 2.433972508138389, | |
| "learning_rate": 3.6666666666666666e-06, | |
| "loss": 0.7488, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.11751538891997762, | |
| "grad_norm": 2.4637559196857812, | |
| "learning_rate": 3.777777777777778e-06, | |
| "loss": 0.7965, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1208729714605484, | |
| "grad_norm": 2.456980144441013, | |
| "learning_rate": 3.88888888888889e-06, | |
| "loss": 0.8154, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.1242305540011192, | |
| "grad_norm": 2.4869279368019273, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.8126, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.12758813654169, | |
| "grad_norm": 2.467906619619702, | |
| "learning_rate": 4.111111111111111e-06, | |
| "loss": 0.8001, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.13094571908226077, | |
| "grad_norm": 2.581215311622158, | |
| "learning_rate": 4.222222222222223e-06, | |
| "loss": 0.8286, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.13430330162283155, | |
| "grad_norm": 2.501542706637013, | |
| "learning_rate": 4.333333333333334e-06, | |
| "loss": 0.8294, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.13766088416340236, | |
| "grad_norm": 2.334359062766939, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.8057, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.14101846670397314, | |
| "grad_norm": 2.512161617115715, | |
| "learning_rate": 4.555555555555556e-06, | |
| "loss": 0.7816, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.14437604924454392, | |
| "grad_norm": 2.3512830431401186, | |
| "learning_rate": 4.666666666666667e-06, | |
| "loss": 0.7406, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.14773363178511473, | |
| "grad_norm": 2.353319121545363, | |
| "learning_rate": 4.777777777777778e-06, | |
| "loss": 0.7836, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1510912143256855, | |
| "grad_norm": 2.3785414037245065, | |
| "learning_rate": 4.888888888888889e-06, | |
| "loss": 0.7905, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.1544487968662563, | |
| "grad_norm": 2.489874176687805, | |
| "learning_rate": 5e-06, | |
| "loss": 0.77, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.1578063794068271, | |
| "grad_norm": 2.640302288366859, | |
| "learning_rate": 5.1111111111111115e-06, | |
| "loss": 0.8096, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.16116396194739788, | |
| "grad_norm": 2.6168372492389875, | |
| "learning_rate": 5.2222222222222226e-06, | |
| "loss": 0.8085, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.16452154448796866, | |
| "grad_norm": 2.355298598185473, | |
| "learning_rate": 5.333333333333334e-06, | |
| "loss": 0.7634, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.16787912702853947, | |
| "grad_norm": 2.4159132229794564, | |
| "learning_rate": 5.444444444444445e-06, | |
| "loss": 0.7764, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.17123670956911025, | |
| "grad_norm": 2.4612658382916344, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.7637, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.17459429210968103, | |
| "grad_norm": 2.454872028492967, | |
| "learning_rate": 5.666666666666667e-06, | |
| "loss": 0.8304, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.1779518746502518, | |
| "grad_norm": 2.231797525957087, | |
| "learning_rate": 5.777777777777778e-06, | |
| "loss": 0.8054, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.18130945719082261, | |
| "grad_norm": 2.5145359657110573, | |
| "learning_rate": 5.88888888888889e-06, | |
| "loss": 0.7767, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.1846670397313934, | |
| "grad_norm": 2.2680418205268817, | |
| "learning_rate": 6e-06, | |
| "loss": 0.7446, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.18802462227196418, | |
| "grad_norm": 2.5397886944135095, | |
| "learning_rate": 6.111111111111112e-06, | |
| "loss": 0.8216, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.19138220481253498, | |
| "grad_norm": 2.5677000160418615, | |
| "learning_rate": 6.222222222222223e-06, | |
| "loss": 0.7516, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.19473978735310576, | |
| "grad_norm": 2.464277933743688, | |
| "learning_rate": 6.333333333333333e-06, | |
| "loss": 0.7809, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.19809736989367654, | |
| "grad_norm": 2.4742771459078763, | |
| "learning_rate": 6.444444444444445e-06, | |
| "loss": 0.7734, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.20145495243424735, | |
| "grad_norm": 2.4284525343661794, | |
| "learning_rate": 6.555555555555556e-06, | |
| "loss": 0.7763, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.20481253497481813, | |
| "grad_norm": 2.4953358610535985, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.7742, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.2081701175153889, | |
| "grad_norm": 2.221862854493057, | |
| "learning_rate": 6.777777777777779e-06, | |
| "loss": 0.7388, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.21152770005595972, | |
| "grad_norm": 2.4115958840359135, | |
| "learning_rate": 6.88888888888889e-06, | |
| "loss": 0.8117, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.2148852825965305, | |
| "grad_norm": 2.247325502758182, | |
| "learning_rate": 7e-06, | |
| "loss": 0.874, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.21824286513710128, | |
| "grad_norm": 2.459302207580601, | |
| "learning_rate": 7.111111111111112e-06, | |
| "loss": 0.7753, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.22160044767767206, | |
| "grad_norm": 2.3615999287857194, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 0.7364, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.22495803021824287, | |
| "grad_norm": 2.345803430121652, | |
| "learning_rate": 7.333333333333333e-06, | |
| "loss": 0.7828, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.22831561275881365, | |
| "grad_norm": 2.3409970367083095, | |
| "learning_rate": 7.444444444444445e-06, | |
| "loss": 0.7061, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.23167319529938443, | |
| "grad_norm": 2.4774854380802624, | |
| "learning_rate": 7.555555555555556e-06, | |
| "loss": 0.7717, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.23503077783995524, | |
| "grad_norm": 2.5208849967610782, | |
| "learning_rate": 7.666666666666667e-06, | |
| "loss": 0.7771, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.23838836038052602, | |
| "grad_norm": 2.6549693177981055, | |
| "learning_rate": 7.77777777777778e-06, | |
| "loss": 0.8112, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.2417459429210968, | |
| "grad_norm": 2.832698323256436, | |
| "learning_rate": 7.88888888888889e-06, | |
| "loss": 0.7739, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.2451035254616676, | |
| "grad_norm": 2.5304942003986453, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.8008, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.2484611080022384, | |
| "grad_norm": 2.431052677971174, | |
| "learning_rate": 8.111111111111112e-06, | |
| "loss": 0.8011, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.2518186905428092, | |
| "grad_norm": 2.2473360155050286, | |
| "learning_rate": 8.222222222222222e-06, | |
| "loss": 0.7438, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.25517627308338, | |
| "grad_norm": 2.5408424868361017, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.8015, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.25853385562395076, | |
| "grad_norm": 2.3659875362077996, | |
| "learning_rate": 8.444444444444446e-06, | |
| "loss": 0.7927, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.26189143816452154, | |
| "grad_norm": 2.3184771169883636, | |
| "learning_rate": 8.555555555555556e-06, | |
| "loss": 0.7832, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.2652490207050923, | |
| "grad_norm": 2.426200561656744, | |
| "learning_rate": 8.666666666666668e-06, | |
| "loss": 0.7475, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.2686066032456631, | |
| "grad_norm": 2.39729048396846, | |
| "learning_rate": 8.777777777777778e-06, | |
| "loss": 0.8269, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.27196418578623394, | |
| "grad_norm": 2.375526510228167, | |
| "learning_rate": 8.888888888888888e-06, | |
| "loss": 0.7924, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.2753217683268047, | |
| "grad_norm": 2.2496572603077833, | |
| "learning_rate": 9e-06, | |
| "loss": 0.8096, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2786793508673755, | |
| "grad_norm": 2.3057931599473913, | |
| "learning_rate": 9.111111111111112e-06, | |
| "loss": 0.7586, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.2820369334079463, | |
| "grad_norm": 2.3828592253623784, | |
| "learning_rate": 9.222222222222224e-06, | |
| "loss": 0.7741, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.28539451594851706, | |
| "grad_norm": 2.479750143175691, | |
| "learning_rate": 9.333333333333334e-06, | |
| "loss": 0.7718, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.28875209848908784, | |
| "grad_norm": 2.2810057071437466, | |
| "learning_rate": 9.444444444444445e-06, | |
| "loss": 0.7687, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.2921096810296586, | |
| "grad_norm": 2.3537687144315655, | |
| "learning_rate": 9.555555555555556e-06, | |
| "loss": 0.7694, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.29546726357022945, | |
| "grad_norm": 2.480085659080849, | |
| "learning_rate": 9.666666666666667e-06, | |
| "loss": 0.8077, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.29882484611080024, | |
| "grad_norm": 2.3071631718134733, | |
| "learning_rate": 9.777777777777779e-06, | |
| "loss": 0.8218, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.302182428651371, | |
| "grad_norm": 2.42645063403485, | |
| "learning_rate": 9.88888888888889e-06, | |
| "loss": 0.7613, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3055400111919418, | |
| "grad_norm": 2.2871396355674958, | |
| "learning_rate": 1e-05, | |
| "loss": 0.7597, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.3088975937325126, | |
| "grad_norm": 2.550927280073565, | |
| "learning_rate": 9.999961543109546e-06, | |
| "loss": 0.7757, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.31225517627308336, | |
| "grad_norm": 2.27888182793667, | |
| "learning_rate": 9.999846173029752e-06, | |
| "loss": 0.7865, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.3156127588136542, | |
| "grad_norm": 2.1772873128455807, | |
| "learning_rate": 9.99965389153533e-06, | |
| "loss": 0.7405, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.318970341354225, | |
| "grad_norm": 2.313731791988704, | |
| "learning_rate": 9.999384701584098e-06, | |
| "loss": 0.7605, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.32232792389479575, | |
| "grad_norm": 2.66647271515439, | |
| "learning_rate": 9.999038607316942e-06, | |
| "loss": 0.8139, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.32568550643536653, | |
| "grad_norm": 2.345938824538182, | |
| "learning_rate": 9.998615614057743e-06, | |
| "loss": 0.7782, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.3290430889759373, | |
| "grad_norm": 2.445303562264931, | |
| "learning_rate": 9.998115728313305e-06, | |
| "loss": 0.7628, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.3324006715165081, | |
| "grad_norm": 2.3792304151925974, | |
| "learning_rate": 9.997538957773248e-06, | |
| "loss": 0.773, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.33575825405707893, | |
| "grad_norm": 2.3052323825950816, | |
| "learning_rate": 9.996885311309892e-06, | |
| "loss": 0.8015, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3391158365976497, | |
| "grad_norm": 2.2653536708074338, | |
| "learning_rate": 9.996154798978122e-06, | |
| "loss": 0.759, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.3424734191382205, | |
| "grad_norm": 2.445188297437759, | |
| "learning_rate": 9.99534743201523e-06, | |
| "loss": 0.7828, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.34583100167879127, | |
| "grad_norm": 2.1926261320850555, | |
| "learning_rate": 9.994463222840748e-06, | |
| "loss": 0.7518, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.34918858421936205, | |
| "grad_norm": 2.3228023822506154, | |
| "learning_rate": 9.993502185056244e-06, | |
| "loss": 0.7541, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.35254616675993283, | |
| "grad_norm": 2.361098770321157, | |
| "learning_rate": 9.992464333445134e-06, | |
| "loss": 0.6949, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.3559037493005036, | |
| "grad_norm": 2.2629507834451146, | |
| "learning_rate": 9.991349683972435e-06, | |
| "loss": 0.7731, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.35926133184107445, | |
| "grad_norm": 2.264400946281939, | |
| "learning_rate": 9.990158253784525e-06, | |
| "loss": 0.7746, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.36261891438164523, | |
| "grad_norm": 2.536408740504836, | |
| "learning_rate": 9.988890061208889e-06, | |
| "loss": 0.757, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.365976496922216, | |
| "grad_norm": 2.1481392669710657, | |
| "learning_rate": 9.987545125753818e-06, | |
| "loss": 0.8101, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.3693340794627868, | |
| "grad_norm": 2.271583484189747, | |
| "learning_rate": 9.986123468108134e-06, | |
| "loss": 0.7716, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.37269166200335757, | |
| "grad_norm": 2.301407093107629, | |
| "learning_rate": 9.984625110140844e-06, | |
| "loss": 0.7842, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.37604924454392835, | |
| "grad_norm": 2.3089880928671045, | |
| "learning_rate": 9.983050074900824e-06, | |
| "loss": 0.7452, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.3794068270844992, | |
| "grad_norm": 2.413625325654982, | |
| "learning_rate": 9.98139838661646e-06, | |
| "loss": 0.7502, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.38276440962506997, | |
| "grad_norm": 2.289871304585377, | |
| "learning_rate": 9.979670070695265e-06, | |
| "loss": 0.7708, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.38612199216564075, | |
| "grad_norm": 2.290612460056919, | |
| "learning_rate": 9.977865153723508e-06, | |
| "loss": 0.784, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.38947957470621153, | |
| "grad_norm": 2.3027870660016725, | |
| "learning_rate": 9.97598366346578e-06, | |
| "loss": 0.7534, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.3928371572467823, | |
| "grad_norm": 2.2479898856154015, | |
| "learning_rate": 9.974025628864592e-06, | |
| "loss": 0.7388, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.3961947397873531, | |
| "grad_norm": 2.20503493895051, | |
| "learning_rate": 9.971991080039912e-06, | |
| "loss": 0.763, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.39955232232792387, | |
| "grad_norm": 2.2879831426439807, | |
| "learning_rate": 9.969880048288704e-06, | |
| "loss": 0.8042, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.4029099048684947, | |
| "grad_norm": 2.0895550497991815, | |
| "learning_rate": 9.96769256608446e-06, | |
| "loss": 0.7267, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.4062674874090655, | |
| "grad_norm": 2.2118550621037047, | |
| "learning_rate": 9.965428667076687e-06, | |
| "loss": 0.7642, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.40962506994963627, | |
| "grad_norm": 2.3589353890246416, | |
| "learning_rate": 9.963088386090386e-06, | |
| "loss": 0.7688, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.41298265249020705, | |
| "grad_norm": 2.3245448693859805, | |
| "learning_rate": 9.960671759125529e-06, | |
| "loss": 0.7909, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.4163402350307778, | |
| "grad_norm": 2.2267480190116893, | |
| "learning_rate": 9.958178823356503e-06, | |
| "loss": 0.7525, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.4196978175713486, | |
| "grad_norm": 2.4295908603398364, | |
| "learning_rate": 9.95560961713153e-06, | |
| "loss": 0.8311, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.42305540011191944, | |
| "grad_norm": 2.2939978579098987, | |
| "learning_rate": 9.95296417997208e-06, | |
| "loss": 0.7679, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.4264129826524902, | |
| "grad_norm": 2.438507683294902, | |
| "learning_rate": 9.950242552572272e-06, | |
| "loss": 0.783, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.429770565193061, | |
| "grad_norm": 2.121254822380545, | |
| "learning_rate": 9.947444776798235e-06, | |
| "loss": 0.7213, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.4331281477336318, | |
| "grad_norm": 2.393441549390287, | |
| "learning_rate": 9.944570895687471e-06, | |
| "loss": 0.7832, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.43648573027420257, | |
| "grad_norm": 2.354252110767644, | |
| "learning_rate": 9.941620953448195e-06, | |
| "loss": 0.7984, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.43984331281477335, | |
| "grad_norm": 2.3944424356758143, | |
| "learning_rate": 9.938594995458644e-06, | |
| "loss": 0.7794, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.4432008953553441, | |
| "grad_norm": 2.3095894964107093, | |
| "learning_rate": 9.935493068266396e-06, | |
| "loss": 0.7876, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.44655847789591496, | |
| "grad_norm": 2.3120517699348273, | |
| "learning_rate": 9.932315219587641e-06, | |
| "loss": 0.7665, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.44991606043648574, | |
| "grad_norm": 2.4246945894908554, | |
| "learning_rate": 9.929061498306448e-06, | |
| "loss": 0.7985, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.4532736429770565, | |
| "grad_norm": 2.351327159028273, | |
| "learning_rate": 9.92573195447402e-06, | |
| "loss": 0.8119, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.4566312255176273, | |
| "grad_norm": 2.1900583413629042, | |
| "learning_rate": 9.922326639307918e-06, | |
| "loss": 0.753, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.4599888080581981, | |
| "grad_norm": 2.39557007686719, | |
| "learning_rate": 9.918845605191274e-06, | |
| "loss": 0.792, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.46334639059876886, | |
| "grad_norm": 2.245343140864623, | |
| "learning_rate": 9.915288905671986e-06, | |
| "loss": 0.7924, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.4667039731393397, | |
| "grad_norm": 2.095454810542127, | |
| "learning_rate": 9.911656595461899e-06, | |
| "loss": 0.7287, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.4700615556799105, | |
| "grad_norm": 2.3661531662229445, | |
| "learning_rate": 9.90794873043595e-06, | |
| "loss": 0.783, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.47341913822048126, | |
| "grad_norm": 2.218576799584266, | |
| "learning_rate": 9.904165367631329e-06, | |
| "loss": 0.7682, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.47677672076105204, | |
| "grad_norm": 2.2752695851092777, | |
| "learning_rate": 9.900306565246579e-06, | |
| "loss": 0.757, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.4801343033016228, | |
| "grad_norm": 2.201550744224962, | |
| "learning_rate": 9.896372382640718e-06, | |
| "loss": 0.7691, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.4834918858421936, | |
| "grad_norm": 2.1043983734924185, | |
| "learning_rate": 9.892362880332316e-06, | |
| "loss": 0.7383, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.4868494683827644, | |
| "grad_norm": 2.3833966355515153, | |
| "learning_rate": 9.888278119998573e-06, | |
| "loss": 0.807, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.4902070509233352, | |
| "grad_norm": 2.0943822596303674, | |
| "learning_rate": 9.884118164474359e-06, | |
| "loss": 0.7899, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.493564633463906, | |
| "grad_norm": 2.0793762196537524, | |
| "learning_rate": 9.879883077751255e-06, | |
| "loss": 0.7425, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.4969222160044768, | |
| "grad_norm": 2.2384449240578377, | |
| "learning_rate": 9.875572924976568e-06, | |
| "loss": 0.7857, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.5002797985450476, | |
| "grad_norm": 2.0645202722027642, | |
| "learning_rate": 9.871187772452327e-06, | |
| "loss": 0.7932, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.5036373810856184, | |
| "grad_norm": 2.2655225270221377, | |
| "learning_rate": 9.866727687634266e-06, | |
| "loss": 0.7613, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5069949636261891, | |
| "grad_norm": 2.139175314801808, | |
| "learning_rate": 9.86219273913078e-06, | |
| "loss": 0.7728, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.51035254616676, | |
| "grad_norm": 2.2148970746168932, | |
| "learning_rate": 9.857582996701878e-06, | |
| "loss": 0.7613, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.5137101287073307, | |
| "grad_norm": 2.3638044057711065, | |
| "learning_rate": 9.852898531258102e-06, | |
| "loss": 0.7538, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.5170677112479015, | |
| "grad_norm": 2.16926286809674, | |
| "learning_rate": 9.848139414859441e-06, | |
| "loss": 0.7518, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.5204252937884724, | |
| "grad_norm": 2.264174308627129, | |
| "learning_rate": 9.843305720714227e-06, | |
| "loss": 0.758, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.5237828763290431, | |
| "grad_norm": 2.1658356317455403, | |
| "learning_rate": 9.838397523177993e-06, | |
| "loss": 0.7508, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.5271404588696139, | |
| "grad_norm": 2.2969058345093116, | |
| "learning_rate": 9.833414897752346e-06, | |
| "loss": 0.7595, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.5304980414101846, | |
| "grad_norm": 2.180966154918318, | |
| "learning_rate": 9.828357921083803e-06, | |
| "loss": 0.7734, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.5338556239507555, | |
| "grad_norm": 2.236205762743028, | |
| "learning_rate": 9.823226670962598e-06, | |
| "loss": 0.821, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.5372132064913262, | |
| "grad_norm": 2.4457753187441837, | |
| "learning_rate": 9.818021226321502e-06, | |
| "loss": 0.8161, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.540570789031897, | |
| "grad_norm": 2.1706687105611313, | |
| "learning_rate": 9.812741667234599e-06, | |
| "loss": 0.7693, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.5439283715724679, | |
| "grad_norm": 2.1712893591002045, | |
| "learning_rate": 9.807388074916064e-06, | |
| "loss": 0.759, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.5472859541130386, | |
| "grad_norm": 2.1454942490466675, | |
| "learning_rate": 9.801960531718898e-06, | |
| "loss": 0.7605, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.5506435366536094, | |
| "grad_norm": 2.22853836765068, | |
| "learning_rate": 9.796459121133675e-06, | |
| "loss": 0.8167, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.5540011191941802, | |
| "grad_norm": 2.147190276259434, | |
| "learning_rate": 9.790883927787254e-06, | |
| "loss": 0.7771, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.557358701734751, | |
| "grad_norm": 2.177712396336002, | |
| "learning_rate": 9.785235037441473e-06, | |
| "loss": 0.7749, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.5607162842753217, | |
| "grad_norm": 2.2764867484419353, | |
| "learning_rate": 9.779512536991839e-06, | |
| "loss": 0.7186, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.5640738668158926, | |
| "grad_norm": 2.0316602958146177, | |
| "learning_rate": 9.773716514466179e-06, | |
| "loss": 0.7092, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.5674314493564634, | |
| "grad_norm": 2.335399900268128, | |
| "learning_rate": 9.767847059023292e-06, | |
| "loss": 0.7561, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.5707890318970341, | |
| "grad_norm": 2.1709764537143945, | |
| "learning_rate": 9.761904260951583e-06, | |
| "loss": 0.7802, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.574146614437605, | |
| "grad_norm": 2.0593129146431512, | |
| "learning_rate": 9.755888211667663e-06, | |
| "loss": 0.7301, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.5775041969781757, | |
| "grad_norm": 2.441441500782324, | |
| "learning_rate": 9.749799003714954e-06, | |
| "loss": 0.7799, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.5808617795187465, | |
| "grad_norm": 2.2660299178551773, | |
| "learning_rate": 9.743636730762259e-06, | |
| "loss": 0.7827, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.5842193620593172, | |
| "grad_norm": 2.1765013932906396, | |
| "learning_rate": 9.737401487602314e-06, | |
| "loss": 0.7267, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.5875769445998881, | |
| "grad_norm": 2.3327142301922956, | |
| "learning_rate": 9.731093370150349e-06, | |
| "loss": 0.7456, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.5909345271404589, | |
| "grad_norm": 2.1506465746934973, | |
| "learning_rate": 9.724712475442597e-06, | |
| "loss": 0.7703, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.5942921096810296, | |
| "grad_norm": 2.356569475164915, | |
| "learning_rate": 9.718258901634802e-06, | |
| "loss": 0.7102, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.5976496922216005, | |
| "grad_norm": 2.232872599999044, | |
| "learning_rate": 9.71173274800072e-06, | |
| "loss": 0.7432, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.6010072747621712, | |
| "grad_norm": 2.2094954984314996, | |
| "learning_rate": 9.70513411493058e-06, | |
| "loss": 0.7298, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.604364857302742, | |
| "grad_norm": 2.247936712152706, | |
| "learning_rate": 9.698463103929542e-06, | |
| "loss": 0.7627, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6077224398433129, | |
| "grad_norm": 2.178442031708872, | |
| "learning_rate": 9.691719817616148e-06, | |
| "loss": 0.747, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.6110800223838836, | |
| "grad_norm": 1.963354472091314, | |
| "learning_rate": 9.684904359720724e-06, | |
| "loss": 0.7338, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.6144376049244544, | |
| "grad_norm": 2.353759222193367, | |
| "learning_rate": 9.678016835083798e-06, | |
| "loss": 0.7535, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.6177951874650252, | |
| "grad_norm": 2.1917086328236373, | |
| "learning_rate": 9.671057349654481e-06, | |
| "loss": 0.8249, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.621152770005596, | |
| "grad_norm": 2.068590516399619, | |
| "learning_rate": 9.66402601048884e-06, | |
| "loss": 0.7565, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.6245103525461667, | |
| "grad_norm": 2.233193447193882, | |
| "learning_rate": 9.656922925748254e-06, | |
| "loss": 0.779, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.6278679350867375, | |
| "grad_norm": 1.9913300969947432, | |
| "learning_rate": 9.649748204697741e-06, | |
| "loss": 0.7111, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.6312255176273084, | |
| "grad_norm": 2.0770537048721205, | |
| "learning_rate": 9.642501957704287e-06, | |
| "loss": 0.7737, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.6345831001678791, | |
| "grad_norm": 2.026052626547883, | |
| "learning_rate": 9.63518429623514e-06, | |
| "loss": 0.7678, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.63794068270845, | |
| "grad_norm": 2.004420216986479, | |
| "learning_rate": 9.627795332856107e-06, | |
| "loss": 0.7765, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6412982652490207, | |
| "grad_norm": 2.0936509457969374, | |
| "learning_rate": 9.620335181229805e-06, | |
| "loss": 0.7583, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.6446558477895915, | |
| "grad_norm": 2.038743312113399, | |
| "learning_rate": 9.612803956113932e-06, | |
| "loss": 0.7755, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.6480134303301622, | |
| "grad_norm": 2.1648346294989294, | |
| "learning_rate": 9.605201773359485e-06, | |
| "loss": 0.7125, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.6513710128707331, | |
| "grad_norm": 2.023535905337207, | |
| "learning_rate": 9.59752874990899e-06, | |
| "loss": 0.72, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.6547285954113039, | |
| "grad_norm": 2.0647928726363776, | |
| "learning_rate": 9.589785003794692e-06, | |
| "loss": 0.741, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.6580861779518746, | |
| "grad_norm": 2.657611602927363, | |
| "learning_rate": 9.581970654136752e-06, | |
| "loss": 0.7723, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.6614437604924455, | |
| "grad_norm": 2.211743855012671, | |
| "learning_rate": 9.574085821141406e-06, | |
| "loss": 0.754, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.6648013430330162, | |
| "grad_norm": 2.131488016325981, | |
| "learning_rate": 9.566130626099118e-06, | |
| "loss": 0.7738, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.668158925573587, | |
| "grad_norm": 2.1420712267681195, | |
| "learning_rate": 9.55810519138271e-06, | |
| "loss": 0.781, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.6715165081141579, | |
| "grad_norm": 2.245825883364256, | |
| "learning_rate": 9.550009640445492e-06, | |
| "loss": 0.7606, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.6748740906547286, | |
| "grad_norm": 2.027082764830745, | |
| "learning_rate": 9.541844097819347e-06, | |
| "loss": 0.7535, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.6782316731952994, | |
| "grad_norm": 1.9877374251586413, | |
| "learning_rate": 9.533608689112827e-06, | |
| "loss": 0.7559, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.6815892557358701, | |
| "grad_norm": 1.9927267340906514, | |
| "learning_rate": 9.525303541009218e-06, | |
| "loss": 0.6754, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.684946838276441, | |
| "grad_norm": 2.092882170995953, | |
| "learning_rate": 9.516928781264588e-06, | |
| "loss": 0.7431, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.6883044208170117, | |
| "grad_norm": 2.0525414950537972, | |
| "learning_rate": 9.508484538705823e-06, | |
| "loss": 0.7649, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.6916620033575825, | |
| "grad_norm": 1.9668368596759938, | |
| "learning_rate": 9.499970943228646e-06, | |
| "loss": 0.7218, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.6950195858981534, | |
| "grad_norm": 1.9745189748654561, | |
| "learning_rate": 9.491388125795623e-06, | |
| "loss": 0.7104, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.6983771684387241, | |
| "grad_norm": 2.201042855482161, | |
| "learning_rate": 9.482736218434144e-06, | |
| "loss": 0.7477, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.7017347509792949, | |
| "grad_norm": 2.0154595111164895, | |
| "learning_rate": 9.474015354234385e-06, | |
| "loss": 0.7587, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.7050923335198657, | |
| "grad_norm": 2.313239702363474, | |
| "learning_rate": 9.465225667347275e-06, | |
| "loss": 0.7292, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7084499160604365, | |
| "grad_norm": 2.025272477121896, | |
| "learning_rate": 9.45636729298243e-06, | |
| "loss": 0.7241, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.7118074986010072, | |
| "grad_norm": 2.0795424496465573, | |
| "learning_rate": 9.447440367406053e-06, | |
| "loss": 0.7458, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.7151650811415781, | |
| "grad_norm": 2.320432869755114, | |
| "learning_rate": 9.438445027938873e-06, | |
| "loss": 0.76, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.7185226636821489, | |
| "grad_norm": 2.1418660928160875, | |
| "learning_rate": 9.429381412954e-06, | |
| "loss": 0.7463, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.7218802462227196, | |
| "grad_norm": 2.037263699160382, | |
| "learning_rate": 9.420249661874812e-06, | |
| "loss": 0.7562, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.7252378287632905, | |
| "grad_norm": 2.0664873609895844, | |
| "learning_rate": 9.41104991517281e-06, | |
| "loss": 0.719, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.7285954113038612, | |
| "grad_norm": 2.13157533259705, | |
| "learning_rate": 9.401782314365458e-06, | |
| "loss": 0.7611, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.731952993844432, | |
| "grad_norm": 1.9677915863746518, | |
| "learning_rate": 9.392447002013996e-06, | |
| "loss": 0.7241, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.7353105763850027, | |
| "grad_norm": 2.0599615713607466, | |
| "learning_rate": 9.383044121721257e-06, | |
| "loss": 0.7413, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.7386681589255736, | |
| "grad_norm": 2.0298587251271107, | |
| "learning_rate": 9.37357381812946e-06, | |
| "loss": 0.7479, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7420257414661444, | |
| "grad_norm": 1.9944542234012115, | |
| "learning_rate": 9.364036236917972e-06, | |
| "loss": 0.6834, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.7453833240067151, | |
| "grad_norm": 2.0288217614081265, | |
| "learning_rate": 9.354431524801082e-06, | |
| "loss": 0.7512, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.748740906547286, | |
| "grad_norm": 2.0374680319858514, | |
| "learning_rate": 9.344759829525734e-06, | |
| "loss": 0.7138, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.7520984890878567, | |
| "grad_norm": 1.911969221157047, | |
| "learning_rate": 9.335021299869256e-06, | |
| "loss": 0.7382, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.7554560716284275, | |
| "grad_norm": 1.9835399597591075, | |
| "learning_rate": 9.32521608563708e-06, | |
| "loss": 0.7436, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.7588136541689984, | |
| "grad_norm": 2.0847726685364045, | |
| "learning_rate": 9.315344337660422e-06, | |
| "loss": 0.7364, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.7621712367095691, | |
| "grad_norm": 2.0415670551670426, | |
| "learning_rate": 9.305406207793974e-06, | |
| "loss": 0.7225, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.7655288192501399, | |
| "grad_norm": 1.987105167453389, | |
| "learning_rate": 9.295401848913569e-06, | |
| "loss": 0.7458, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.7688864017907107, | |
| "grad_norm": 1.9316452047409314, | |
| "learning_rate": 9.285331414913816e-06, | |
| "loss": 0.6967, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.7722439843312815, | |
| "grad_norm": 1.9874288288055189, | |
| "learning_rate": 9.275195060705749e-06, | |
| "loss": 0.7501, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.7756015668718522, | |
| "grad_norm": 2.0963531264104533, | |
| "learning_rate": 9.264992942214427e-06, | |
| "loss": 0.7236, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.7789591494124231, | |
| "grad_norm": 2.0961978727374397, | |
| "learning_rate": 9.254725216376562e-06, | |
| "loss": 0.7666, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.7823167319529939, | |
| "grad_norm": 1.981574993630262, | |
| "learning_rate": 9.244392041138068e-06, | |
| "loss": 0.7449, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.7856743144935646, | |
| "grad_norm": 1.907742433400848, | |
| "learning_rate": 9.233993575451663e-06, | |
| "loss": 0.7052, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.7890318970341355, | |
| "grad_norm": 2.0188171238038426, | |
| "learning_rate": 9.223529979274411e-06, | |
| "loss": 0.7166, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.7923894795747062, | |
| "grad_norm": 2.047444273717583, | |
| "learning_rate": 9.213001413565259e-06, | |
| "loss": 0.7614, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.795747062115277, | |
| "grad_norm": 2.0237204195979603, | |
| "learning_rate": 9.202408040282567e-06, | |
| "loss": 0.7407, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.7991046446558477, | |
| "grad_norm": 2.132189489227427, | |
| "learning_rate": 9.191750022381613e-06, | |
| "loss": 0.76, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.8024622271964186, | |
| "grad_norm": 1.9452576363735405, | |
| "learning_rate": 9.181027523812088e-06, | |
| "loss": 0.6906, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.8058198097369894, | |
| "grad_norm": 2.0526086557604204, | |
| "learning_rate": 9.170240709515573e-06, | |
| "loss": 0.7492, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8091773922775601, | |
| "grad_norm": 2.127542984513071, | |
| "learning_rate": 9.159389745423003e-06, | |
| "loss": 0.753, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.812534974818131, | |
| "grad_norm": 2.190600637823083, | |
| "learning_rate": 9.14847479845211e-06, | |
| "loss": 0.7687, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.8158925573587017, | |
| "grad_norm": 1.976664413924339, | |
| "learning_rate": 9.137496036504868e-06, | |
| "loss": 0.7236, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.8192501398992725, | |
| "grad_norm": 2.0073069621610813, | |
| "learning_rate": 9.126453628464889e-06, | |
| "loss": 0.7513, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.8226077224398433, | |
| "grad_norm": 2.0275714232847366, | |
| "learning_rate": 9.115347744194844e-06, | |
| "loss": 0.7117, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.8259653049804141, | |
| "grad_norm": 1.950051870826525, | |
| "learning_rate": 9.10417855453385e-06, | |
| "loss": 0.7421, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.8293228875209849, | |
| "grad_norm": 1.9776365232195794, | |
| "learning_rate": 9.09294623129482e-06, | |
| "loss": 0.7683, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.8326804700615557, | |
| "grad_norm": 1.9425183877927914, | |
| "learning_rate": 9.081650947261847e-06, | |
| "loss": 0.7454, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.8360380526021265, | |
| "grad_norm": 2.0588781735343926, | |
| "learning_rate": 9.070292876187532e-06, | |
| "loss": 0.7511, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.8393956351426972, | |
| "grad_norm": 2.1208079935750734, | |
| "learning_rate": 9.058872192790314e-06, | |
| "loss": 0.7594, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.842753217683268, | |
| "grad_norm": 1.960725561854021, | |
| "learning_rate": 9.047389072751777e-06, | |
| "loss": 0.7164, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.8461108002238389, | |
| "grad_norm": 2.073474220676331, | |
| "learning_rate": 9.035843692713961e-06, | |
| "loss": 0.7256, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.8494683827644096, | |
| "grad_norm": 2.0957908157579137, | |
| "learning_rate": 9.02423623027663e-06, | |
| "loss": 0.7307, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.8528259653049804, | |
| "grad_norm": 2.152126139365395, | |
| "learning_rate": 9.012566863994548e-06, | |
| "loss": 0.7434, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.8561835478455512, | |
| "grad_norm": 2.024800247472397, | |
| "learning_rate": 9.000835773374733e-06, | |
| "loss": 0.7454, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.859541130386122, | |
| "grad_norm": 1.9021347801188042, | |
| "learning_rate": 8.98904313887369e-06, | |
| "loss": 0.7057, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.8628987129266927, | |
| "grad_norm": 2.1498786642675607, | |
| "learning_rate": 8.977189141894645e-06, | |
| "loss": 0.7711, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.8662562954672636, | |
| "grad_norm": 1.92161399371383, | |
| "learning_rate": 8.965273964784735e-06, | |
| "loss": 0.6948, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.8696138780078344, | |
| "grad_norm": 2.064988715065064, | |
| "learning_rate": 8.953297790832231e-06, | |
| "loss": 0.7545, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.8729714605484051, | |
| "grad_norm": 2.209448102356102, | |
| "learning_rate": 8.941260804263697e-06, | |
| "loss": 0.7427, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.876329043088976, | |
| "grad_norm": 2.0663786056657165, | |
| "learning_rate": 8.929163190241157e-06, | |
| "loss": 0.7129, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.8796866256295467, | |
| "grad_norm": 1.9398344638285674, | |
| "learning_rate": 8.917005134859263e-06, | |
| "loss": 0.6766, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.8830442081701175, | |
| "grad_norm": 2.1841210939318914, | |
| "learning_rate": 8.904786825142416e-06, | |
| "loss": 0.7312, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.8864017907106883, | |
| "grad_norm": 2.077339438871376, | |
| "learning_rate": 8.892508449041893e-06, | |
| "loss": 0.752, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.8897593732512591, | |
| "grad_norm": 2.0959324603771123, | |
| "learning_rate": 8.88017019543296e-06, | |
| "loss": 0.741, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.8931169557918299, | |
| "grad_norm": 2.0026594953656427, | |
| "learning_rate": 8.867772254111966e-06, | |
| "loss": 0.7121, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.8964745383324007, | |
| "grad_norm": 1.9765771238302214, | |
| "learning_rate": 8.85531481579342e-06, | |
| "loss": 0.7259, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.8998321208729715, | |
| "grad_norm": 2.081056505352585, | |
| "learning_rate": 8.842798072107055e-06, | |
| "loss": 0.8211, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.9031897034135422, | |
| "grad_norm": 2.0635888708323313, | |
| "learning_rate": 8.83022221559489e-06, | |
| "loss": 0.7545, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.906547285954113, | |
| "grad_norm": 1.9645226085630179, | |
| "learning_rate": 8.81758743970826e-06, | |
| "loss": 0.7097, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9099048684946839, | |
| "grad_norm": 2.090055548313761, | |
| "learning_rate": 8.804893938804839e-06, | |
| "loss": 0.7085, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.9132624510352546, | |
| "grad_norm": 1.9631224971590835, | |
| "learning_rate": 8.79214190814566e-06, | |
| "loss": 0.749, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.9166200335758254, | |
| "grad_norm": 2.151696624831223, | |
| "learning_rate": 8.779331543892097e-06, | |
| "loss": 0.7437, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.9199776161163962, | |
| "grad_norm": 2.0673655977509533, | |
| "learning_rate": 8.766463043102864e-06, | |
| "loss": 0.7405, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.923335198656967, | |
| "grad_norm": 1.915638224161175, | |
| "learning_rate": 8.75353660373097e-06, | |
| "loss": 0.7099, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.9266927811975377, | |
| "grad_norm": 2.143796983074377, | |
| "learning_rate": 8.740552424620679e-06, | |
| "loss": 0.6971, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.9300503637381086, | |
| "grad_norm": 2.057152351652913, | |
| "learning_rate": 8.727510705504453e-06, | |
| "loss": 0.7293, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.9334079462786794, | |
| "grad_norm": 2.1240051292960116, | |
| "learning_rate": 8.714411646999878e-06, | |
| "loss": 0.7741, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.9367655288192501, | |
| "grad_norm": 2.0326991553451945, | |
| "learning_rate": 8.701255450606579e-06, | |
| "loss": 0.7643, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.940123111359821, | |
| "grad_norm": 2.088415602611133, | |
| "learning_rate": 8.688042318703111e-06, | |
| "loss": 0.7464, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.9434806939003917, | |
| "grad_norm": 2.1711465158973393, | |
| "learning_rate": 8.674772454543869e-06, | |
| "loss": 0.7103, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.9468382764409625, | |
| "grad_norm": 1.9500315142474602, | |
| "learning_rate": 8.661446062255931e-06, | |
| "loss": 0.6947, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.9501958589815332, | |
| "grad_norm": 1.846901032667399, | |
| "learning_rate": 8.648063346835943e-06, | |
| "loss": 0.726, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.9535534415221041, | |
| "grad_norm": 2.078490317740966, | |
| "learning_rate": 8.634624514146954e-06, | |
| "loss": 0.7353, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.9569110240626749, | |
| "grad_norm": 2.02753516538753, | |
| "learning_rate": 8.621129770915248e-06, | |
| "loss": 0.7712, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.9602686066032456, | |
| "grad_norm": 1.9463320169189116, | |
| "learning_rate": 8.607579324727175e-06, | |
| "loss": 0.7472, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.9636261891438165, | |
| "grad_norm": 1.9444246710199546, | |
| "learning_rate": 8.59397338402594e-06, | |
| "loss": 0.7502, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.9669837716843872, | |
| "grad_norm": 1.9590062622910647, | |
| "learning_rate": 8.580312158108413e-06, | |
| "loss": 0.7464, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.970341354224958, | |
| "grad_norm": 1.956148304515578, | |
| "learning_rate": 8.566595857121902e-06, | |
| "loss": 0.7099, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.9736989367655288, | |
| "grad_norm": 1.8926398800628126, | |
| "learning_rate": 8.55282469206092e-06, | |
| "loss": 0.7299, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.9770565193060996, | |
| "grad_norm": 2.0917641075340123, | |
| "learning_rate": 8.538998874763942e-06, | |
| "loss": 0.7639, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.9804141018466704, | |
| "grad_norm": 1.9718732131481036, | |
| "learning_rate": 8.525118617910144e-06, | |
| "loss": 0.7547, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.9837716843872412, | |
| "grad_norm": 1.9069388930319855, | |
| "learning_rate": 8.511184135016134e-06, | |
| "loss": 0.7309, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.987129266927812, | |
| "grad_norm": 2.014949861976609, | |
| "learning_rate": 8.497195640432664e-06, | |
| "loss": 0.7261, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.9904868494683827, | |
| "grad_norm": 1.9120513950060911, | |
| "learning_rate": 8.483153349341336e-06, | |
| "loss": 0.7166, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.9938444320089536, | |
| "grad_norm": 1.8053701743199972, | |
| "learning_rate": 8.46905747775129e-06, | |
| "loss": 0.6816, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.9972020145495244, | |
| "grad_norm": 2.003732103494112, | |
| "learning_rate": 8.45490824249588e-06, | |
| "loss": 0.7147, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.0033575825405707, | |
| "grad_norm": 8.303350987198142, | |
| "learning_rate": 8.440705861229344e-06, | |
| "loss": 1.2836, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.0067151650811417, | |
| "grad_norm": 2.2471820576545714, | |
| "learning_rate": 8.426450552423451e-06, | |
| "loss": 0.5247, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.0100727476217124, | |
| "grad_norm": 2.151639519325533, | |
| "learning_rate": 8.412142535364139e-06, | |
| "loss": 0.5023, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0134303301622831, | |
| "grad_norm": 2.045570754833388, | |
| "learning_rate": 8.397782030148147e-06, | |
| "loss": 0.5212, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.0167879127028538, | |
| "grad_norm": 2.157217329180836, | |
| "learning_rate": 8.383369257679625e-06, | |
| "loss": 0.5258, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.0201454952434248, | |
| "grad_norm": 2.1393611084312547, | |
| "learning_rate": 8.368904439666739e-06, | |
| "loss": 0.4882, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.0235030777839955, | |
| "grad_norm": 2.4819644900313738, | |
| "learning_rate": 8.354387798618254e-06, | |
| "loss": 0.5222, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.0268606603245662, | |
| "grad_norm": 2.5039988079708904, | |
| "learning_rate": 8.339819557840124e-06, | |
| "loss": 0.4725, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.0302182428651372, | |
| "grad_norm": 2.4034784239124063, | |
| "learning_rate": 8.32519994143204e-06, | |
| "loss": 0.5223, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.033575825405708, | |
| "grad_norm": 2.388530410162058, | |
| "learning_rate": 8.310529174284004e-06, | |
| "loss": 0.5291, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.0369334079462786, | |
| "grad_norm": 2.3271329489823613, | |
| "learning_rate": 8.295807482072842e-06, | |
| "loss": 0.5197, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.0402909904868494, | |
| "grad_norm": 2.1361835792169432, | |
| "learning_rate": 8.281035091258762e-06, | |
| "loss": 0.4758, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.0436485730274203, | |
| "grad_norm": 2.2920827998361784, | |
| "learning_rate": 8.266212229081846e-06, | |
| "loss": 0.4927, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.047006155567991, | |
| "grad_norm": 2.296117198784697, | |
| "learning_rate": 8.251339123558573e-06, | |
| "loss": 0.4897, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.0503637381085618, | |
| "grad_norm": 2.4100622360712793, | |
| "learning_rate": 8.236416003478295e-06, | |
| "loss": 0.4794, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.0537213206491327, | |
| "grad_norm": 2.3131605280324865, | |
| "learning_rate": 8.221443098399733e-06, | |
| "loss": 0.4872, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.0570789031897034, | |
| "grad_norm": 2.236522744276815, | |
| "learning_rate": 8.206420638647433e-06, | |
| "loss": 0.4945, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.0604364857302742, | |
| "grad_norm": 2.2019268672438286, | |
| "learning_rate": 8.191348855308229e-06, | |
| "loss": 0.4766, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.063794068270845, | |
| "grad_norm": 2.1588616911267002, | |
| "learning_rate": 8.176227980227693e-06, | |
| "loss": 0.4723, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.0671516508114158, | |
| "grad_norm": 2.3900841792146577, | |
| "learning_rate": 8.161058246006558e-06, | |
| "loss": 0.5207, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.0705092333519866, | |
| "grad_norm": 2.3583948513012394, | |
| "learning_rate": 8.145839885997146e-06, | |
| "loss": 0.4906, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.0738668158925573, | |
| "grad_norm": 2.3746826909440064, | |
| "learning_rate": 8.130573134299782e-06, | |
| "loss": 0.4918, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.0772243984331282, | |
| "grad_norm": 2.002312382387202, | |
| "learning_rate": 8.11525822575918e-06, | |
| "loss": 0.4515, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.080581980973699, | |
| "grad_norm": 2.2773099709125497, | |
| "learning_rate": 8.099895395960847e-06, | |
| "loss": 0.5124, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.0839395635142697, | |
| "grad_norm": 1.9626004596776638, | |
| "learning_rate": 8.084484881227449e-06, | |
| "loss": 0.4867, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.0872971460548406, | |
| "grad_norm": 2.2075102999748317, | |
| "learning_rate": 8.069026918615173e-06, | |
| "loss": 0.4901, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.0906547285954113, | |
| "grad_norm": 2.1111018515847793, | |
| "learning_rate": 8.05352174591009e-06, | |
| "loss": 0.5072, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.094012311135982, | |
| "grad_norm": 2.343955652192936, | |
| "learning_rate": 8.037969601624495e-06, | |
| "loss": 0.5104, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.0973698936765528, | |
| "grad_norm": 2.0222492524048734, | |
| "learning_rate": 8.022370724993229e-06, | |
| "loss": 0.4585, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.1007274762171237, | |
| "grad_norm": 2.3170486820891316, | |
| "learning_rate": 8.006725355970008e-06, | |
| "loss": 0.4979, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.1040850587576945, | |
| "grad_norm": 2.2109613576414646, | |
| "learning_rate": 7.99103373522373e-06, | |
| "loss": 0.4929, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.1074426412982652, | |
| "grad_norm": 2.052769278396322, | |
| "learning_rate": 7.975296104134768e-06, | |
| "loss": 0.4891, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.1108002238388361, | |
| "grad_norm": 2.197495831185824, | |
| "learning_rate": 7.959512704791269e-06, | |
| "loss": 0.4957, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.1141578063794069, | |
| "grad_norm": 2.113288752249262, | |
| "learning_rate": 7.943683779985412e-06, | |
| "loss": 0.4891, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.1175153889199776, | |
| "grad_norm": 2.3150223677191604, | |
| "learning_rate": 7.927809573209691e-06, | |
| "loss": 0.4667, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.1208729714605483, | |
| "grad_norm": 2.066863441298375, | |
| "learning_rate": 7.911890328653156e-06, | |
| "loss": 0.4485, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.1242305540011193, | |
| "grad_norm": 2.1565376459504977, | |
| "learning_rate": 7.895926291197667e-06, | |
| "loss": 0.4817, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.12758813654169, | |
| "grad_norm": 2.3475799553693038, | |
| "learning_rate": 7.87991770641412e-06, | |
| "loss": 0.5305, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.1309457190822607, | |
| "grad_norm": 2.1203359960845116, | |
| "learning_rate": 7.863864820558669e-06, | |
| "loss": 0.5083, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.1343033016228317, | |
| "grad_norm": 2.081087965565155, | |
| "learning_rate": 7.847767880568944e-06, | |
| "loss": 0.4588, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.1376608841634024, | |
| "grad_norm": 2.081318262101045, | |
| "learning_rate": 7.831627134060249e-06, | |
| "loss": 0.4846, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.141018466703973, | |
| "grad_norm": 2.209767183684003, | |
| "learning_rate": 7.815442829321754e-06, | |
| "loss": 0.483, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.1443760492445438, | |
| "grad_norm": 2.011880925223341, | |
| "learning_rate": 7.799215215312667e-06, | |
| "loss": 0.4579, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1477336317851148, | |
| "grad_norm": 2.0797725312115203, | |
| "learning_rate": 7.782944541658423e-06, | |
| "loss": 0.5117, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.1510912143256855, | |
| "grad_norm": 1.9959817767175392, | |
| "learning_rate": 7.766631058646826e-06, | |
| "loss": 0.4622, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.1544487968662562, | |
| "grad_norm": 2.2735006851582202, | |
| "learning_rate": 7.750275017224208e-06, | |
| "loss": 0.4724, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.1578063794068272, | |
| "grad_norm": 2.0970076738982044, | |
| "learning_rate": 7.733876668991565e-06, | |
| "loss": 0.4924, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.161163961947398, | |
| "grad_norm": 2.2352119607145307, | |
| "learning_rate": 7.71743626620069e-06, | |
| "loss": 0.517, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.1645215444879686, | |
| "grad_norm": 2.302631355787852, | |
| "learning_rate": 7.700954061750295e-06, | |
| "loss": 0.487, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.1678791270285394, | |
| "grad_norm": 2.1221219186208966, | |
| "learning_rate": 7.684430309182106e-06, | |
| "loss": 0.4709, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.1712367095691103, | |
| "grad_norm": 2.101482815450197, | |
| "learning_rate": 7.667865262676981e-06, | |
| "loss": 0.489, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.174594292109681, | |
| "grad_norm": 2.105161148484614, | |
| "learning_rate": 7.651259177050996e-06, | |
| "loss": 0.5033, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.1779518746502518, | |
| "grad_norm": 1.957600802485906, | |
| "learning_rate": 7.634612307751513e-06, | |
| "loss": 0.45, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.1813094571908227, | |
| "grad_norm": 2.2812310534697775, | |
| "learning_rate": 7.617924910853266e-06, | |
| "loss": 0.5108, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.1846670397313934, | |
| "grad_norm": 2.2141528970819246, | |
| "learning_rate": 7.601197243054411e-06, | |
| "loss": 0.4998, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.1880246222719641, | |
| "grad_norm": 2.1186842901832033, | |
| "learning_rate": 7.584429561672586e-06, | |
| "loss": 0.4822, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.1913822048125349, | |
| "grad_norm": 2.2241188415508857, | |
| "learning_rate": 7.567622124640942e-06, | |
| "loss": 0.4824, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.1947397873531058, | |
| "grad_norm": 2.2126573138667402, | |
| "learning_rate": 7.5507751905041885e-06, | |
| "loss": 0.5051, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.1980973698936765, | |
| "grad_norm": 2.175213998538769, | |
| "learning_rate": 7.533889018414602e-06, | |
| "loss": 0.4909, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.2014549524342473, | |
| "grad_norm": 2.1682912210779732, | |
| "learning_rate": 7.516963868128054e-06, | |
| "loss": 0.4975, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.2048125349748182, | |
| "grad_norm": 2.183195934735365, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.5057, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.208170117515389, | |
| "grad_norm": 2.1348501168594525, | |
| "learning_rate": 7.4829976749814935e-06, | |
| "loss": 0.4958, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.2115277000559597, | |
| "grad_norm": 2.45122587491375, | |
| "learning_rate": 7.46595715461515e-06, | |
| "loss": 0.51, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.2148852825965304, | |
| "grad_norm": 2.273341521941601, | |
| "learning_rate": 7.4488787010311425e-06, | |
| "loss": 0.4986, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.2182428651371013, | |
| "grad_norm": 2.1265214648425808, | |
| "learning_rate": 7.431762576943157e-06, | |
| "loss": 0.5224, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.221600447677672, | |
| "grad_norm": 2.207039996610266, | |
| "learning_rate": 7.414609045644356e-06, | |
| "loss": 0.5036, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.2249580302182428, | |
| "grad_norm": 2.2511356341290045, | |
| "learning_rate": 7.3974183710033334e-06, | |
| "loss": 0.4985, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.2283156127588137, | |
| "grad_norm": 2.2414220008243655, | |
| "learning_rate": 7.38019081746004e-06, | |
| "loss": 0.4826, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.2316731952993845, | |
| "grad_norm": 2.2345724571038583, | |
| "learning_rate": 7.362926650021736e-06, | |
| "loss": 0.4734, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.2350307778399552, | |
| "grad_norm": 2.020267975703017, | |
| "learning_rate": 7.345626134258897e-06, | |
| "loss": 0.4707, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.238388360380526, | |
| "grad_norm": 2.017700711508486, | |
| "learning_rate": 7.3282895363011405e-06, | |
| "loss": 0.4429, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.2417459429210969, | |
| "grad_norm": 2.4075032884901484, | |
| "learning_rate": 7.310917122833127e-06, | |
| "loss": 0.5123, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.2451035254616676, | |
| "grad_norm": 2.0744065650842924, | |
| "learning_rate": 7.293509161090453e-06, | |
| "loss": 0.4868, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.2484611080022383, | |
| "grad_norm": 2.1398898292409654, | |
| "learning_rate": 7.276065918855554e-06, | |
| "loss": 0.4917, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.2518186905428093, | |
| "grad_norm": 2.1639714949391915, | |
| "learning_rate": 7.2585876644535705e-06, | |
| "loss": 0.4957, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.25517627308338, | |
| "grad_norm": 2.3088349879154446, | |
| "learning_rate": 7.241074666748228e-06, | |
| "loss": 0.5311, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.2585338556239507, | |
| "grad_norm": 2.20877128750944, | |
| "learning_rate": 7.2235271951377005e-06, | |
| "loss": 0.5217, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.2618914381645214, | |
| "grad_norm": 2.079999331915844, | |
| "learning_rate": 7.205945519550467e-06, | |
| "loss": 0.4972, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.2652490207050924, | |
| "grad_norm": 2.1653370386720643, | |
| "learning_rate": 7.188329910441154e-06, | |
| "loss": 0.4715, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.268606603245663, | |
| "grad_norm": 2.2020260358728083, | |
| "learning_rate": 7.170680638786383e-06, | |
| "loss": 0.4841, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.271964185786234, | |
| "grad_norm": 2.313938671583983, | |
| "learning_rate": 7.1529979760805946e-06, | |
| "loss": 0.5132, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.2753217683268048, | |
| "grad_norm": 2.3187218424786433, | |
| "learning_rate": 7.135282194331881e-06, | |
| "loss": 0.4916, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.2786793508673755, | |
| "grad_norm": 2.381365131626571, | |
| "learning_rate": 7.1175335660577906e-06, | |
| "loss": 0.4985, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.2820369334079462, | |
| "grad_norm": 2.188895335954739, | |
| "learning_rate": 7.099752364281147e-06, | |
| "loss": 0.4985, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.285394515948517, | |
| "grad_norm": 2.2491929362704863, | |
| "learning_rate": 7.0819388625258385e-06, | |
| "loss": 0.4648, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 1.288752098489088, | |
| "grad_norm": 2.302063139422305, | |
| "learning_rate": 7.0640933348126235e-06, | |
| "loss": 0.5151, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 1.2921096810296586, | |
| "grad_norm": 2.2033193942118423, | |
| "learning_rate": 7.046216055654902e-06, | |
| "loss": 0.4853, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 1.2954672635702296, | |
| "grad_norm": 2.2781079428025364, | |
| "learning_rate": 7.028307300054499e-06, | |
| "loss": 0.5407, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 1.2988248461108003, | |
| "grad_norm": 2.4113225121524366, | |
| "learning_rate": 7.0103673434974375e-06, | |
| "loss": 0.504, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 1.302182428651371, | |
| "grad_norm": 2.1395395477301933, | |
| "learning_rate": 6.992396461949693e-06, | |
| "loss": 0.4724, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 1.3055400111919417, | |
| "grad_norm": 2.1628168470264364, | |
| "learning_rate": 6.974394931852957e-06, | |
| "loss": 0.4901, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 1.3088975937325125, | |
| "grad_norm": 2.0779952047374244, | |
| "learning_rate": 6.956363030120377e-06, | |
| "loss": 0.4779, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 1.3122551762730834, | |
| "grad_norm": 2.026126139321993, | |
| "learning_rate": 6.9383010341323e-06, | |
| "loss": 0.4661, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.3156127588136541, | |
| "grad_norm": 2.290741048224957, | |
| "learning_rate": 6.920209221732007e-06, | |
| "loss": 0.4814, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 1.318970341354225, | |
| "grad_norm": 2.1365050290585423, | |
| "learning_rate": 6.902087871221439e-06, | |
| "loss": 0.498, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 1.3223279238947958, | |
| "grad_norm": 2.2937163334382817, | |
| "learning_rate": 6.88393726135691e-06, | |
| "loss": 0.5079, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 1.3256855064353665, | |
| "grad_norm": 2.072458351219792, | |
| "learning_rate": 6.865757671344827e-06, | |
| "loss": 0.4769, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 1.3290430889759373, | |
| "grad_norm": 2.0989809200956544, | |
| "learning_rate": 6.8475493808373895e-06, | |
| "loss": 0.4766, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 1.332400671516508, | |
| "grad_norm": 2.064563966968899, | |
| "learning_rate": 6.829312669928293e-06, | |
| "loss": 0.456, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 1.335758254057079, | |
| "grad_norm": 2.1069716399584197, | |
| "learning_rate": 6.811047819148413e-06, | |
| "loss": 0.4984, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 1.3391158365976497, | |
| "grad_norm": 2.213794419117928, | |
| "learning_rate": 6.792755109461498e-06, | |
| "loss": 0.4866, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 1.3424734191382206, | |
| "grad_norm": 2.2174112182270522, | |
| "learning_rate": 6.7744348222598386e-06, | |
| "loss": 0.499, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 1.3458310016787913, | |
| "grad_norm": 2.079145185573435, | |
| "learning_rate": 6.756087239359948e-06, | |
| "loss": 0.493, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.349188584219362, | |
| "grad_norm": 2.1392680497000085, | |
| "learning_rate": 6.737712642998219e-06, | |
| "loss": 0.5378, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 1.3525461667599328, | |
| "grad_norm": 2.0513971936886675, | |
| "learning_rate": 6.719311315826589e-06, | |
| "loss": 0.4714, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 1.3559037493005035, | |
| "grad_norm": 2.2552876351142284, | |
| "learning_rate": 6.700883540908185e-06, | |
| "loss": 0.4872, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 1.3592613318410744, | |
| "grad_norm": 2.0728376046951777, | |
| "learning_rate": 6.682429601712976e-06, | |
| "loss": 0.4799, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 1.3626189143816452, | |
| "grad_norm": 2.0220373122992674, | |
| "learning_rate": 6.663949782113413e-06, | |
| "loss": 0.5166, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 1.3659764969222161, | |
| "grad_norm": 2.220177493037904, | |
| "learning_rate": 6.64544436638005e-06, | |
| "loss": 0.4781, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.3693340794627868, | |
| "grad_norm": 1.9793083289019477, | |
| "learning_rate": 6.626913639177189e-06, | |
| "loss": 0.4867, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 1.3726916620033576, | |
| "grad_norm": 2.0274582078000822, | |
| "learning_rate": 6.608357885558485e-06, | |
| "loss": 0.4443, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 1.3760492445439283, | |
| "grad_norm": 2.1847030624625883, | |
| "learning_rate": 6.589777390962575e-06, | |
| "loss": 0.5259, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 1.3794068270844992, | |
| "grad_norm": 2.2214921474975395, | |
| "learning_rate": 6.571172441208678e-06, | |
| "loss": 0.4816, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.38276440962507, | |
| "grad_norm": 2.349546029810803, | |
| "learning_rate": 6.552543322492195e-06, | |
| "loss": 0.5083, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 1.3861219921656407, | |
| "grad_norm": 2.2544220841659315, | |
| "learning_rate": 6.53389032138032e-06, | |
| "loss": 0.5073, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.3894795747062116, | |
| "grad_norm": 2.1899206248487135, | |
| "learning_rate": 6.515213724807621e-06, | |
| "loss": 0.473, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 1.3928371572467824, | |
| "grad_norm": 2.144097777561319, | |
| "learning_rate": 6.49651382007163e-06, | |
| "loss": 0.4745, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.396194739787353, | |
| "grad_norm": 2.249837918425837, | |
| "learning_rate": 6.477790894828422e-06, | |
| "loss": 0.5074, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 1.3995523223279238, | |
| "grad_norm": 2.203568377218348, | |
| "learning_rate": 6.459045237088189e-06, | |
| "loss": 0.5182, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.4029099048684948, | |
| "grad_norm": 2.207808012115493, | |
| "learning_rate": 6.440277135210815e-06, | |
| "loss": 0.4861, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 1.4062674874090655, | |
| "grad_norm": 2.1441864682397886, | |
| "learning_rate": 6.421486877901436e-06, | |
| "loss": 0.4886, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.4096250699496362, | |
| "grad_norm": 2.228627269764285, | |
| "learning_rate": 6.402674754205998e-06, | |
| "loss": 0.4773, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 1.4129826524902072, | |
| "grad_norm": 2.1994987489882893, | |
| "learning_rate": 6.383841053506813e-06, | |
| "loss": 0.5075, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.4163402350307779, | |
| "grad_norm": 2.183688476148547, | |
| "learning_rate": 6.364986065518106e-06, | |
| "loss": 0.4917, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 1.4196978175713486, | |
| "grad_norm": 2.269619040413762, | |
| "learning_rate": 6.3461100802815625e-06, | |
| "loss": 0.4967, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.4230554001119193, | |
| "grad_norm": 2.103981888475452, | |
| "learning_rate": 6.3272133881618596e-06, | |
| "loss": 0.4431, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 1.4264129826524903, | |
| "grad_norm": 2.1258273496585156, | |
| "learning_rate": 6.308296279842204e-06, | |
| "loss": 0.5031, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.429770565193061, | |
| "grad_norm": 2.196489174906513, | |
| "learning_rate": 6.289359046319862e-06, | |
| "loss": 0.4924, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 1.4331281477336317, | |
| "grad_norm": 2.1269856468419963, | |
| "learning_rate": 6.270401978901678e-06, | |
| "loss": 0.4895, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.4364857302742027, | |
| "grad_norm": 2.2126446207865587, | |
| "learning_rate": 6.2514253691996e-06, | |
| "loss": 0.5122, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 1.4398433128147734, | |
| "grad_norm": 2.1913325147414726, | |
| "learning_rate": 6.2324295091261885e-06, | |
| "loss": 0.5283, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.4432008953553441, | |
| "grad_norm": 2.091169249602068, | |
| "learning_rate": 6.213414690890125e-06, | |
| "loss": 0.4879, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 1.4465584778959149, | |
| "grad_norm": 2.210503348980095, | |
| "learning_rate": 6.194381206991723e-06, | |
| "loss": 0.4887, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.4499160604364858, | |
| "grad_norm": 2.164926411078867, | |
| "learning_rate": 6.175329350218426e-06, | |
| "loss": 0.4711, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 1.4532736429770565, | |
| "grad_norm": 2.1482110012584203, | |
| "learning_rate": 6.156259413640302e-06, | |
| "loss": 0.462, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.4566312255176272, | |
| "grad_norm": 2.294567867253737, | |
| "learning_rate": 6.1371716906055336e-06, | |
| "loss": 0.5164, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 1.4599888080581982, | |
| "grad_norm": 2.1428555316811346, | |
| "learning_rate": 6.11806647473591e-06, | |
| "loss": 0.4756, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.463346390598769, | |
| "grad_norm": 2.0267786414519313, | |
| "learning_rate": 6.098944059922311e-06, | |
| "loss": 0.477, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 1.4667039731393396, | |
| "grad_norm": 2.14676631545224, | |
| "learning_rate": 6.079804740320181e-06, | |
| "loss": 0.4668, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.4700615556799104, | |
| "grad_norm": 2.2538467654250267, | |
| "learning_rate": 6.060648810345006e-06, | |
| "loss": 0.495, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 1.4734191382204813, | |
| "grad_norm": 2.1469963540401067, | |
| "learning_rate": 6.041476564667785e-06, | |
| "loss": 0.4824, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.476776720761052, | |
| "grad_norm": 2.222380545278691, | |
| "learning_rate": 6.022288298210502e-06, | |
| "loss": 0.4753, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 1.4801343033016228, | |
| "grad_norm": 2.1083124763573746, | |
| "learning_rate": 6.003084306141579e-06, | |
| "loss": 0.5052, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.4834918858421937, | |
| "grad_norm": 2.1370436926979353, | |
| "learning_rate": 5.983864883871344e-06, | |
| "loss": 0.4789, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 1.4868494683827644, | |
| "grad_norm": 2.3178640332647316, | |
| "learning_rate": 5.964630327047485e-06, | |
| "loss": 0.5193, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.4902070509233352, | |
| "grad_norm": 2.1956778031643496, | |
| "learning_rate": 5.945380931550497e-06, | |
| "loss": 0.4849, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 1.4935646334639059, | |
| "grad_norm": 2.1954225526435063, | |
| "learning_rate": 5.926116993489143e-06, | |
| "loss": 0.4852, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.4969222160044768, | |
| "grad_norm": 2.0679488397891896, | |
| "learning_rate": 5.906838809195879e-06, | |
| "loss": 0.477, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 1.5002797985450476, | |
| "grad_norm": 2.1914965672856375, | |
| "learning_rate": 5.887546675222319e-06, | |
| "loss": 0.4897, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.5036373810856185, | |
| "grad_norm": 2.2169157191153603, | |
| "learning_rate": 5.8682408883346535e-06, | |
| "loss": 0.481, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 1.5069949636261892, | |
| "grad_norm": 2.2332788961047636, | |
| "learning_rate": 5.848921745509094e-06, | |
| "loss": 0.5045, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.51035254616676, | |
| "grad_norm": 2.116170949280568, | |
| "learning_rate": 5.829589543927305e-06, | |
| "loss": 0.4674, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 1.5137101287073307, | |
| "grad_norm": 2.043440626400537, | |
| "learning_rate": 5.8102445809718325e-06, | |
| "loss": 0.4964, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.5170677112479014, | |
| "grad_norm": 2.0944801885939204, | |
| "learning_rate": 5.790887154221521e-06, | |
| "loss": 0.4948, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 1.5204252937884724, | |
| "grad_norm": 2.3084091670034623, | |
| "learning_rate": 5.771517561446949e-06, | |
| "loss": 0.5108, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.523782876329043, | |
| "grad_norm": 2.3208214523309683, | |
| "learning_rate": 5.75213610060584e-06, | |
| "loss": 0.4914, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 1.527140458869614, | |
| "grad_norm": 2.024408080024364, | |
| "learning_rate": 5.7327430698384775e-06, | |
| "loss": 0.4919, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.5304980414101848, | |
| "grad_norm": 2.2191159501747073, | |
| "learning_rate": 5.713338767463129e-06, | |
| "loss": 0.5085, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 1.5338556239507555, | |
| "grad_norm": 2.192013145259984, | |
| "learning_rate": 5.693923491971445e-06, | |
| "loss": 0.4936, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.5372132064913262, | |
| "grad_norm": 2.137744041707972, | |
| "learning_rate": 5.674497542023875e-06, | |
| "loss": 0.5004, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 1.540570789031897, | |
| "grad_norm": 2.032793978959609, | |
| "learning_rate": 5.65506121644507e-06, | |
| "loss": 0.491, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.5439283715724679, | |
| "grad_norm": 2.2242785868303194, | |
| "learning_rate": 5.635614814219289e-06, | |
| "loss": 0.4974, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 1.5472859541130386, | |
| "grad_norm": 2.2872129720575662, | |
| "learning_rate": 5.616158634485793e-06, | |
| "loss": 0.5045, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.5506435366536095, | |
| "grad_norm": 2.5161657966811197, | |
| "learning_rate": 5.596692976534256e-06, | |
| "loss": 0.4776, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 1.5540011191941803, | |
| "grad_norm": 2.1830947757285566, | |
| "learning_rate": 5.577218139800143e-06, | |
| "loss": 0.4779, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.557358701734751, | |
| "grad_norm": 2.2723393111932286, | |
| "learning_rate": 5.557734423860122e-06, | |
| "loss": 0.4559, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 1.5607162842753217, | |
| "grad_norm": 2.039582763815814, | |
| "learning_rate": 5.538242128427444e-06, | |
| "loss": 0.4967, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.5640738668158924, | |
| "grad_norm": 2.1867791207126572, | |
| "learning_rate": 5.518741553347341e-06, | |
| "loss": 0.4793, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 1.5674314493564634, | |
| "grad_norm": 2.184162576004028, | |
| "learning_rate": 5.499232998592399e-06, | |
| "loss": 0.4563, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.5707890318970341, | |
| "grad_norm": 2.2279834676183077, | |
| "learning_rate": 5.479716764257961e-06, | |
| "loss": 0.4726, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 1.574146614437605, | |
| "grad_norm": 2.146603101538437, | |
| "learning_rate": 5.4601931505575e-06, | |
| "loss": 0.4761, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.5775041969781758, | |
| "grad_norm": 2.2103167022630386, | |
| "learning_rate": 5.44066245781801e-06, | |
| "loss": 0.4955, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 1.5808617795187465, | |
| "grad_norm": 2.3722558539750156, | |
| "learning_rate": 5.421124986475371e-06, | |
| "loss": 0.5089, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.5842193620593172, | |
| "grad_norm": 2.3496747654346697, | |
| "learning_rate": 5.4015810370697445e-06, | |
| "loss": 0.4878, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 1.587576944599888, | |
| "grad_norm": 2.185007470426342, | |
| "learning_rate": 5.382030910240936e-06, | |
| "loss": 0.4713, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.590934527140459, | |
| "grad_norm": 2.2103514110080593, | |
| "learning_rate": 5.362474906723781e-06, | |
| "loss": 0.5096, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 1.5942921096810296, | |
| "grad_norm": 2.141248032889711, | |
| "learning_rate": 5.342913327343515e-06, | |
| "loss": 0.4891, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.5976496922216006, | |
| "grad_norm": 2.172202613793392, | |
| "learning_rate": 5.3233464730111426e-06, | |
| "loss": 0.4929, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 1.6010072747621713, | |
| "grad_norm": 2.238037583616825, | |
| "learning_rate": 5.303774644718813e-06, | |
| "loss": 0.4849, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.604364857302742, | |
| "grad_norm": 2.0572504308978954, | |
| "learning_rate": 5.284198143535188e-06, | |
| "loss": 0.4946, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 1.6077224398433128, | |
| "grad_norm": 2.0917913417158016, | |
| "learning_rate": 5.2646172706008154e-06, | |
| "loss": 0.4834, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.6110800223838835, | |
| "grad_norm": 2.013696077036751, | |
| "learning_rate": 5.245032327123488e-06, | |
| "loss": 0.4564, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 1.6144376049244544, | |
| "grad_norm": 2.073234984497162, | |
| "learning_rate": 5.225443614373614e-06, | |
| "loss": 0.4479, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.6177951874650252, | |
| "grad_norm": 2.208574587163591, | |
| "learning_rate": 5.20585143367959e-06, | |
| "loss": 0.4761, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 1.621152770005596, | |
| "grad_norm": 2.1530116735583316, | |
| "learning_rate": 5.186256086423148e-06, | |
| "loss": 0.4702, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.6245103525461668, | |
| "grad_norm": 2.200115990394491, | |
| "learning_rate": 5.166657874034745e-06, | |
| "loss": 0.5088, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 1.6278679350867375, | |
| "grad_norm": 2.1407757545469424, | |
| "learning_rate": 5.147057097988898e-06, | |
| "loss": 0.5084, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.6312255176273083, | |
| "grad_norm": 2.124229197200729, | |
| "learning_rate": 5.127454059799567e-06, | |
| "loss": 0.4623, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 1.634583100167879, | |
| "grad_norm": 2.217735463671646, | |
| "learning_rate": 5.1078490610155105e-06, | |
| "loss": 0.4946, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.63794068270845, | |
| "grad_norm": 2.0764480657249758, | |
| "learning_rate": 5.088242403215644e-06, | |
| "loss": 0.5089, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 1.6412982652490207, | |
| "grad_norm": 2.331029267583452, | |
| "learning_rate": 5.0686343880044044e-06, | |
| "loss": 0.473, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.6446558477895916, | |
| "grad_norm": 2.1804145039026546, | |
| "learning_rate": 5.049025317007108e-06, | |
| "loss": 0.4934, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 1.6480134303301623, | |
| "grad_norm": 1.9682449318933604, | |
| "learning_rate": 5.029415491865311e-06, | |
| "loss": 0.4616, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.651371012870733, | |
| "grad_norm": 2.3938687293871133, | |
| "learning_rate": 5.009805214232177e-06, | |
| "loss": 0.5293, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 1.6547285954113038, | |
| "grad_norm": 2.1248954294843094, | |
| "learning_rate": 4.990194785767824e-06, | |
| "loss": 0.4815, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.6580861779518745, | |
| "grad_norm": 2.066522021485902, | |
| "learning_rate": 4.97058450813469e-06, | |
| "loss": 0.4782, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 1.6614437604924455, | |
| "grad_norm": 2.132367969004758, | |
| "learning_rate": 4.950974682992894e-06, | |
| "loss": 0.4493, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.6648013430330162, | |
| "grad_norm": 2.195382323978713, | |
| "learning_rate": 4.931365611995598e-06, | |
| "loss": 0.5095, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 1.6681589255735871, | |
| "grad_norm": 2.0731108125500346, | |
| "learning_rate": 4.911757596784358e-06, | |
| "loss": 0.5098, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.6715165081141579, | |
| "grad_norm": 2.2045936873389618, | |
| "learning_rate": 4.892150938984491e-06, | |
| "loss": 0.5034, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 1.6748740906547286, | |
| "grad_norm": 2.119499431019322, | |
| "learning_rate": 4.872545940200435e-06, | |
| "loss": 0.4621, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.6782316731952993, | |
| "grad_norm": 2.0571530774922717, | |
| "learning_rate": 4.8529429020111035e-06, | |
| "loss": 0.4324, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 1.68158925573587, | |
| "grad_norm": 2.080333899217976, | |
| "learning_rate": 4.833342125965257e-06, | |
| "loss": 0.4786, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.684946838276441, | |
| "grad_norm": 2.198178104289353, | |
| "learning_rate": 4.813743913576852e-06, | |
| "loss": 0.476, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 1.6883044208170117, | |
| "grad_norm": 2.1587194322010514, | |
| "learning_rate": 4.794148566320412e-06, | |
| "loss": 0.463, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.6916620033575827, | |
| "grad_norm": 2.3918082310854296, | |
| "learning_rate": 4.774556385626386e-06, | |
| "loss": 0.502, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 1.6950195858981534, | |
| "grad_norm": 2.267854796695586, | |
| "learning_rate": 4.754967672876513e-06, | |
| "loss": 0.5066, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.698377168438724, | |
| "grad_norm": 2.172897001954314, | |
| "learning_rate": 4.7353827293991845e-06, | |
| "loss": 0.4865, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 1.7017347509792948, | |
| "grad_norm": 2.249989307898594, | |
| "learning_rate": 4.715801856464812e-06, | |
| "loss": 0.5135, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.7050923335198656, | |
| "grad_norm": 2.182167380702455, | |
| "learning_rate": 4.6962253552811885e-06, | |
| "loss": 0.52, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 1.7084499160604365, | |
| "grad_norm": 2.3182206016361384, | |
| "learning_rate": 4.676653526988858e-06, | |
| "loss": 0.4623, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.7118074986010072, | |
| "grad_norm": 2.298712891082637, | |
| "learning_rate": 4.657086672656486e-06, | |
| "loss": 0.4734, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 1.7151650811415782, | |
| "grad_norm": 2.1611212242763194, | |
| "learning_rate": 4.63752509327622e-06, | |
| "loss": 0.4484, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.718522663682149, | |
| "grad_norm": 2.2744157038160497, | |
| "learning_rate": 4.617969089759066e-06, | |
| "loss": 0.5041, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 1.7218802462227196, | |
| "grad_norm": 2.001332944054495, | |
| "learning_rate": 4.598418962930258e-06, | |
| "loss": 0.494, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.7252378287632903, | |
| "grad_norm": 2.15178549984152, | |
| "learning_rate": 4.57887501352463e-06, | |
| "loss": 0.4906, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 1.728595411303861, | |
| "grad_norm": 2.0567126022881403, | |
| "learning_rate": 4.559337542181993e-06, | |
| "loss": 0.4654, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.731952993844432, | |
| "grad_norm": 2.3440444984090214, | |
| "learning_rate": 4.539806849442501e-06, | |
| "loss": 0.4806, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 1.7353105763850027, | |
| "grad_norm": 2.2415234653831475, | |
| "learning_rate": 4.520283235742042e-06, | |
| "loss": 0.4623, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.7386681589255737, | |
| "grad_norm": 2.1332817601414997, | |
| "learning_rate": 4.500767001407604e-06, | |
| "loss": 0.4522, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 1.7420257414661444, | |
| "grad_norm": 2.116507838045294, | |
| "learning_rate": 4.481258446652662e-06, | |
| "loss": 0.4842, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.7453833240067151, | |
| "grad_norm": 2.215105939137212, | |
| "learning_rate": 4.4617578715725565e-06, | |
| "loss": 0.4649, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 1.7487409065472859, | |
| "grad_norm": 2.225499325958965, | |
| "learning_rate": 4.4422655761398785e-06, | |
| "loss": 0.4853, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.7520984890878566, | |
| "grad_norm": 2.254229574580799, | |
| "learning_rate": 4.4227818601998575e-06, | |
| "loss": 0.4883, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 1.7554560716284275, | |
| "grad_norm": 2.1468138229729443, | |
| "learning_rate": 4.403307023465746e-06, | |
| "loss": 0.4786, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.7588136541689985, | |
| "grad_norm": 2.169367653787952, | |
| "learning_rate": 4.383841365514208e-06, | |
| "loss": 0.4933, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 1.7621712367095692, | |
| "grad_norm": 2.2540586767928916, | |
| "learning_rate": 4.364385185780712e-06, | |
| "loss": 0.4423, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.76552881925014, | |
| "grad_norm": 2.046616711801834, | |
| "learning_rate": 4.3449387835549305e-06, | |
| "loss": 0.4517, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 1.7688864017907107, | |
| "grad_norm": 2.054096374050294, | |
| "learning_rate": 4.325502457976126e-06, | |
| "loss": 0.4562, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.7722439843312814, | |
| "grad_norm": 2.1897562491663023, | |
| "learning_rate": 4.306076508028557e-06, | |
| "loss": 0.4872, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 1.775601566871852, | |
| "grad_norm": 2.1685603692137985, | |
| "learning_rate": 4.286661232536873e-06, | |
| "loss": 0.4847, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.778959149412423, | |
| "grad_norm": 2.1915605427774034, | |
| "learning_rate": 4.267256930161523e-06, | |
| "loss": 0.5192, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 1.782316731952994, | |
| "grad_norm": 2.254879047746067, | |
| "learning_rate": 4.247863899394162e-06, | |
| "loss": 0.4687, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.7856743144935647, | |
| "grad_norm": 2.1134520274196125, | |
| "learning_rate": 4.228482438553052e-06, | |
| "loss": 0.5262, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 1.7890318970341355, | |
| "grad_norm": 2.068253922507518, | |
| "learning_rate": 4.209112845778481e-06, | |
| "loss": 0.4839, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.7923894795747062, | |
| "grad_norm": 2.0887709442403346, | |
| "learning_rate": 4.189755419028169e-06, | |
| "loss": 0.4623, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 1.795747062115277, | |
| "grad_norm": 2.300959929057977, | |
| "learning_rate": 4.1704104560726955e-06, | |
| "loss": 0.5047, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.7991046446558476, | |
| "grad_norm": 2.156784226239325, | |
| "learning_rate": 4.151078254490908e-06, | |
| "loss": 0.4553, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 1.8024622271964186, | |
| "grad_norm": 2.0472342846000893, | |
| "learning_rate": 4.131759111665349e-06, | |
| "loss": 0.444, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.8058198097369895, | |
| "grad_norm": 2.15308812602405, | |
| "learning_rate": 4.112453324777683e-06, | |
| "loss": 0.4504, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 1.8091773922775602, | |
| "grad_norm": 2.305556846294406, | |
| "learning_rate": 4.09316119080412e-06, | |
| "loss": 0.4561, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.812534974818131, | |
| "grad_norm": 2.0629496902341304, | |
| "learning_rate": 4.073883006510858e-06, | |
| "loss": 0.4639, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 1.8158925573587017, | |
| "grad_norm": 2.621774490091145, | |
| "learning_rate": 4.054619068449502e-06, | |
| "loss": 0.4988, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.8192501398992724, | |
| "grad_norm": 2.249906503732158, | |
| "learning_rate": 4.035369672952516e-06, | |
| "loss": 0.4665, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 1.8226077224398431, | |
| "grad_norm": 2.124308282019638, | |
| "learning_rate": 4.016135116128656e-06, | |
| "loss": 0.4837, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.825965304980414, | |
| "grad_norm": 2.216203736606476, | |
| "learning_rate": 3.996915693858422e-06, | |
| "loss": 0.4599, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 1.829322887520985, | |
| "grad_norm": 2.199055435229539, | |
| "learning_rate": 3.977711701789499e-06, | |
| "loss": 0.4996, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.8326804700615558, | |
| "grad_norm": 2.1456836995965123, | |
| "learning_rate": 3.9585234353322155e-06, | |
| "loss": 0.474, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 1.8360380526021265, | |
| "grad_norm": 2.088890168115453, | |
| "learning_rate": 3.939351189654996e-06, | |
| "loss": 0.4551, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.8393956351426972, | |
| "grad_norm": 2.059736510246409, | |
| "learning_rate": 3.920195259679822e-06, | |
| "loss": 0.484, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 1.842753217683268, | |
| "grad_norm": 2.4289464926505504, | |
| "learning_rate": 3.901055940077691e-06, | |
| "loss": 0.5043, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.8461108002238389, | |
| "grad_norm": 2.0962090325586606, | |
| "learning_rate": 3.881933525264092e-06, | |
| "loss": 0.4398, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 1.8494683827644096, | |
| "grad_norm": 2.1707115890324165, | |
| "learning_rate": 3.862828309394469e-06, | |
| "loss": 0.4925, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.8528259653049806, | |
| "grad_norm": 2.015982277160347, | |
| "learning_rate": 3.843740586359701e-06, | |
| "loss": 0.4757, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 1.8561835478455513, | |
| "grad_norm": 2.2304879540207203, | |
| "learning_rate": 3.824670649781576e-06, | |
| "loss": 0.4614, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.859541130386122, | |
| "grad_norm": 2.091915679725523, | |
| "learning_rate": 3.805618793008279e-06, | |
| "loss": 0.4448, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 1.8628987129266927, | |
| "grad_norm": 2.16946840752013, | |
| "learning_rate": 3.786585309109877e-06, | |
| "loss": 0.4649, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.8662562954672635, | |
| "grad_norm": 2.128475934593608, | |
| "learning_rate": 3.7675704908738136e-06, | |
| "loss": 0.4802, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 1.8696138780078344, | |
| "grad_norm": 2.165472295510831, | |
| "learning_rate": 3.7485746308004013e-06, | |
| "loss": 0.4977, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.8729714605484051, | |
| "grad_norm": 2.212252433928935, | |
| "learning_rate": 3.7295980210983233e-06, | |
| "loss": 0.4935, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 1.876329043088976, | |
| "grad_norm": 2.1928900923013956, | |
| "learning_rate": 3.71064095368014e-06, | |
| "loss": 0.4627, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.8796866256295468, | |
| "grad_norm": 2.085149102799712, | |
| "learning_rate": 3.6917037201577977e-06, | |
| "loss": 0.4278, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 1.8830442081701175, | |
| "grad_norm": 2.0816980346294307, | |
| "learning_rate": 3.672786611838142e-06, | |
| "loss": 0.4631, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.8864017907106883, | |
| "grad_norm": 2.103474181111992, | |
| "learning_rate": 3.653889919718439e-06, | |
| "loss": 0.4511, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 1.889759373251259, | |
| "grad_norm": 2.2744496879859275, | |
| "learning_rate": 3.635013934481895e-06, | |
| "loss": 0.4974, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.89311695579183, | |
| "grad_norm": 2.2227249595968934, | |
| "learning_rate": 3.616158946493188e-06, | |
| "loss": 0.4769, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 1.8964745383324007, | |
| "grad_norm": 2.152621448218455, | |
| "learning_rate": 3.5973252457940034e-06, | |
| "loss": 0.4994, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.8998321208729716, | |
| "grad_norm": 2.070223009955467, | |
| "learning_rate": 3.578513122098566e-06, | |
| "loss": 0.5039, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 1.9031897034135423, | |
| "grad_norm": 2.121366546607162, | |
| "learning_rate": 3.559722864789187e-06, | |
| "loss": 0.4789, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.906547285954113, | |
| "grad_norm": 2.1017767501093823, | |
| "learning_rate": 3.5409547629118124e-06, | |
| "loss": 0.4562, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 1.9099048684946838, | |
| "grad_norm": 2.274326927273865, | |
| "learning_rate": 3.5222091051715803e-06, | |
| "loss": 0.4623, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.9132624510352545, | |
| "grad_norm": 1.9186891449000945, | |
| "learning_rate": 3.5034861799283713e-06, | |
| "loss": 0.5144, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 1.9166200335758254, | |
| "grad_norm": 2.3556578562236252, | |
| "learning_rate": 3.48478627519238e-06, | |
| "loss": 0.4503, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.9199776161163962, | |
| "grad_norm": 1.9892297433331403, | |
| "learning_rate": 3.466109678619681e-06, | |
| "loss": 0.4934, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 1.9233351986569671, | |
| "grad_norm": 2.2314638850872144, | |
| "learning_rate": 3.4474566775078055e-06, | |
| "loss": 0.4934, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.9266927811975378, | |
| "grad_norm": 2.2782528645464173, | |
| "learning_rate": 3.4288275587913235e-06, | |
| "loss": 0.4948, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 1.9300503637381086, | |
| "grad_norm": 2.36739181255683, | |
| "learning_rate": 3.4102226090374246e-06, | |
| "loss": 0.4741, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.9334079462786793, | |
| "grad_norm": 2.1683249260482444, | |
| "learning_rate": 3.3916421144415146e-06, | |
| "loss": 0.4732, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 1.93676552881925, | |
| "grad_norm": 2.1221739672342497, | |
| "learning_rate": 3.3730863608228125e-06, | |
| "loss": 0.4274, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.940123111359821, | |
| "grad_norm": 2.095387968090082, | |
| "learning_rate": 3.35455563361995e-06, | |
| "loss": 0.4649, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 1.9434806939003917, | |
| "grad_norm": 2.1689072561371154, | |
| "learning_rate": 3.336050217886588e-06, | |
| "loss": 0.4986, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.9468382764409626, | |
| "grad_norm": 2.2504194821367345, | |
| "learning_rate": 3.3175703982870232e-06, | |
| "loss": 0.4716, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 1.9501958589815334, | |
| "grad_norm": 2.084871129563975, | |
| "learning_rate": 3.2991164590918162e-06, | |
| "loss": 0.4403, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.953553441522104, | |
| "grad_norm": 2.0143154450944123, | |
| "learning_rate": 3.280688684173412e-06, | |
| "loss": 0.4452, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 1.9569110240626748, | |
| "grad_norm": 2.1804050757472977, | |
| "learning_rate": 3.262287357001781e-06, | |
| "loss": 0.516, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.9602686066032455, | |
| "grad_norm": 2.0645062573328232, | |
| "learning_rate": 3.2439127606400546e-06, | |
| "loss": 0.461, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 1.9636261891438165, | |
| "grad_norm": 2.070842126513229, | |
| "learning_rate": 3.225565177740163e-06, | |
| "loss": 0.466, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.9669837716843872, | |
| "grad_norm": 2.0600131128468595, | |
| "learning_rate": 3.2072448905385046e-06, | |
| "loss": 0.433, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 1.9703413542249582, | |
| "grad_norm": 2.0764551843814107, | |
| "learning_rate": 3.1889521808515888e-06, | |
| "loss": 0.45, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.9736989367655289, | |
| "grad_norm": 2.1493865400194103, | |
| "learning_rate": 3.1706873300717094e-06, | |
| "loss": 0.4903, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 1.9770565193060996, | |
| "grad_norm": 2.1217797993065988, | |
| "learning_rate": 3.152450619162612e-06, | |
| "loss": 0.456, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.9804141018466703, | |
| "grad_norm": 2.2674307611908273, | |
| "learning_rate": 3.1342423286551756e-06, | |
| "loss": 0.4758, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 1.983771684387241, | |
| "grad_norm": 2.016774239865244, | |
| "learning_rate": 3.116062738643092e-06, | |
| "loss": 0.4871, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.987129266927812, | |
| "grad_norm": 2.167349361097923, | |
| "learning_rate": 3.097912128778563e-06, | |
| "loss": 0.4621, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 1.9904868494683827, | |
| "grad_norm": 2.1086622374082644, | |
| "learning_rate": 3.0797907782679944e-06, | |
| "loss": 0.462, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.9938444320089537, | |
| "grad_norm": 2.142067661122772, | |
| "learning_rate": 3.061698965867701e-06, | |
| "loss": 0.4403, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 1.9972020145495244, | |
| "grad_norm": 2.211038574058772, | |
| "learning_rate": 3.043636969879625e-06, | |
| "loss": 0.4748, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 2.0033575825405707, | |
| "grad_norm": 8.113958823135038, | |
| "learning_rate": 3.0256050681470446e-06, | |
| "loss": 0.7156, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 2.0067151650811414, | |
| "grad_norm": 2.568864765515772, | |
| "learning_rate": 3.007603538050309e-06, | |
| "loss": 0.2897, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 2.010072747621712, | |
| "grad_norm": 2.313285944394538, | |
| "learning_rate": 2.989632656502564e-06, | |
| "loss": 0.2573, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 2.0134303301622833, | |
| "grad_norm": 2.2688120461445687, | |
| "learning_rate": 2.971692699945502e-06, | |
| "loss": 0.2617, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 2.016787912702854, | |
| "grad_norm": 2.2390498633994875, | |
| "learning_rate": 2.9537839443451e-06, | |
| "loss": 0.2628, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 2.020145495243425, | |
| "grad_norm": 2.018211337998392, | |
| "learning_rate": 2.935906665187378e-06, | |
| "loss": 0.2577, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 2.0235030777839955, | |
| "grad_norm": 2.0822307985642268, | |
| "learning_rate": 2.9180611374741623e-06, | |
| "loss": 0.2481, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 2.0268606603245662, | |
| "grad_norm": 2.651846463170353, | |
| "learning_rate": 2.900247635718856e-06, | |
| "loss": 0.2961, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 2.030218242865137, | |
| "grad_norm": 3.6928643005110513, | |
| "learning_rate": 2.8824664339422115e-06, | |
| "loss": 0.281, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 2.0335758254057077, | |
| "grad_norm": 3.075668633649421, | |
| "learning_rate": 2.8647178056681197e-06, | |
| "loss": 0.2588, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 2.036933407946279, | |
| "grad_norm": 2.6467867984028577, | |
| "learning_rate": 2.847002023919406e-06, | |
| "loss": 0.2678, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 2.0402909904868496, | |
| "grad_norm": 2.724532376339797, | |
| "learning_rate": 2.8293193612136183e-06, | |
| "loss": 0.2405, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 2.0436485730274203, | |
| "grad_norm": 2.4483147811459975, | |
| "learning_rate": 2.8116700895588473e-06, | |
| "loss": 0.241, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 2.047006155567991, | |
| "grad_norm": 2.2477666437496566, | |
| "learning_rate": 2.7940544804495345e-06, | |
| "loss": 0.2513, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 2.0503637381085618, | |
| "grad_norm": 2.154565484035418, | |
| "learning_rate": 2.7764728048623003e-06, | |
| "loss": 0.2506, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 2.0537213206491325, | |
| "grad_norm": 2.17650555134875, | |
| "learning_rate": 2.7589253332517736e-06, | |
| "loss": 0.2387, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 2.057078903189703, | |
| "grad_norm": 2.253001429434042, | |
| "learning_rate": 2.741412335546431e-06, | |
| "loss": 0.2491, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 2.0604364857302744, | |
| "grad_norm": 2.0324493606743146, | |
| "learning_rate": 2.7239340811444476e-06, | |
| "loss": 0.2402, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 2.063794068270845, | |
| "grad_norm": 2.2299955712377666, | |
| "learning_rate": 2.706490838909547e-06, | |
| "loss": 0.255, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 2.067151650811416, | |
| "grad_norm": 2.0377107617066965, | |
| "learning_rate": 2.6890828771668742e-06, | |
| "loss": 0.2576, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 2.0705092333519866, | |
| "grad_norm": 2.1876913026406037, | |
| "learning_rate": 2.671710463698859e-06, | |
| "loss": 0.2427, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 2.0738668158925573, | |
| "grad_norm": 2.0004569183512233, | |
| "learning_rate": 2.6543738657411033e-06, | |
| "loss": 0.2305, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 2.077224398433128, | |
| "grad_norm": 2.2750688222972695, | |
| "learning_rate": 2.6370733499782654e-06, | |
| "loss": 0.2398, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 2.0805819809736987, | |
| "grad_norm": 2.196557061021631, | |
| "learning_rate": 2.6198091825399606e-06, | |
| "loss": 0.2659, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 2.08393956351427, | |
| "grad_norm": 2.4966706499173306, | |
| "learning_rate": 2.6025816289966703e-06, | |
| "loss": 0.2528, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 2.0872971460548406, | |
| "grad_norm": 1.9987480291512625, | |
| "learning_rate": 2.5853909543556444e-06, | |
| "loss": 0.2381, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 2.0906547285954113, | |
| "grad_norm": 2.3473080858318793, | |
| "learning_rate": 2.568237423056844e-06, | |
| "loss": 0.2185, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 2.094012311135982, | |
| "grad_norm": 2.2351448577994, | |
| "learning_rate": 2.5511212989688587e-06, | |
| "loss": 0.2492, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 2.097369893676553, | |
| "grad_norm": 2.334819375193785, | |
| "learning_rate": 2.534042845384851e-06, | |
| "loss": 0.2264, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 2.1007274762171235, | |
| "grad_norm": 2.1645622435514578, | |
| "learning_rate": 2.517002325018508e-06, | |
| "loss": 0.2433, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 2.1040850587576942, | |
| "grad_norm": 2.4245838623271645, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.2685, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 2.1074426412982654, | |
| "grad_norm": 2.187347569512869, | |
| "learning_rate": 2.4830361318719493e-06, | |
| "loss": 0.2314, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 2.110800223838836, | |
| "grad_norm": 2.125826756884641, | |
| "learning_rate": 2.4661109815854005e-06, | |
| "loss": 0.2601, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 2.114157806379407, | |
| "grad_norm": 2.175643578704326, | |
| "learning_rate": 2.449224809495815e-06, | |
| "loss": 0.248, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 2.1175153889199776, | |
| "grad_norm": 2.3549155345423842, | |
| "learning_rate": 2.4323778753590582e-06, | |
| "loss": 0.2289, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 2.1208729714605483, | |
| "grad_norm": 2.2111119256937877, | |
| "learning_rate": 2.4155704383274154e-06, | |
| "loss": 0.2437, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 2.124230554001119, | |
| "grad_norm": 2.3478963695246273, | |
| "learning_rate": 2.3988027569455895e-06, | |
| "loss": 0.2517, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 2.12758813654169, | |
| "grad_norm": 2.2461086864800106, | |
| "learning_rate": 2.3820750891467355e-06, | |
| "loss": 0.2333, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 2.130945719082261, | |
| "grad_norm": 2.2329675778706926, | |
| "learning_rate": 2.365387692248488e-06, | |
| "loss": 0.2359, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 2.1343033016228317, | |
| "grad_norm": 2.3145727443697934, | |
| "learning_rate": 2.348740822949006e-06, | |
| "loss": 0.2477, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 2.1376608841634024, | |
| "grad_norm": 2.1666894881929895, | |
| "learning_rate": 2.33213473732302e-06, | |
| "loss": 0.2442, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 2.141018466703973, | |
| "grad_norm": 2.215179935351511, | |
| "learning_rate": 2.3155696908178974e-06, | |
| "loss": 0.2492, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 2.144376049244544, | |
| "grad_norm": 2.0303810141253344, | |
| "learning_rate": 2.2990459382497086e-06, | |
| "loss": 0.2414, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 2.1477336317851146, | |
| "grad_norm": 2.2599318995246636, | |
| "learning_rate": 2.2825637337993094e-06, | |
| "loss": 0.2542, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.1510912143256853, | |
| "grad_norm": 2.229319114603247, | |
| "learning_rate": 2.266123331008436e-06, | |
| "loss": 0.2763, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 2.1544487968662565, | |
| "grad_norm": 2.018769817603059, | |
| "learning_rate": 2.2497249827757933e-06, | |
| "loss": 0.2279, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 2.157806379406827, | |
| "grad_norm": 2.1991246705651317, | |
| "learning_rate": 2.233368941353175e-06, | |
| "loss": 0.2415, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 2.161163961947398, | |
| "grad_norm": 2.2319026864631875, | |
| "learning_rate": 2.2170554583415782e-06, | |
| "loss": 0.2207, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 2.1645215444879686, | |
| "grad_norm": 2.1965612835784936, | |
| "learning_rate": 2.2007847846873342e-06, | |
| "loss": 0.2425, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 2.1678791270285394, | |
| "grad_norm": 2.1567182556620774, | |
| "learning_rate": 2.1845571706782486e-06, | |
| "loss": 0.2303, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 2.17123670956911, | |
| "grad_norm": 2.2622288541045683, | |
| "learning_rate": 2.1683728659397517e-06, | |
| "loss": 0.2429, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 2.1745942921096812, | |
| "grad_norm": 2.2558926541854176, | |
| "learning_rate": 2.1522321194310577e-06, | |
| "loss": 0.2541, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 2.177951874650252, | |
| "grad_norm": 2.2770654917423765, | |
| "learning_rate": 2.1361351794413334e-06, | |
| "loss": 0.2446, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 2.1813094571908227, | |
| "grad_norm": 2.173609001184362, | |
| "learning_rate": 2.1200822935858807e-06, | |
| "loss": 0.251, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 2.1846670397313934, | |
| "grad_norm": 2.2522011504738577, | |
| "learning_rate": 2.1040737088023323e-06, | |
| "loss": 0.2481, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 2.188024622271964, | |
| "grad_norm": 2.1688069367996596, | |
| "learning_rate": 2.0881096713468435e-06, | |
| "loss": 0.2486, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 2.191382204812535, | |
| "grad_norm": 2.3838615198983706, | |
| "learning_rate": 2.0721904267903097e-06, | |
| "loss": 0.2457, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 2.1947397873531056, | |
| "grad_norm": 2.1625681906768346, | |
| "learning_rate": 2.056316220014588e-06, | |
| "loss": 0.2271, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 2.1980973698936763, | |
| "grad_norm": 2.2745686739163014, | |
| "learning_rate": 2.040487295208732e-06, | |
| "loss": 0.2238, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 2.2014549524342475, | |
| "grad_norm": 2.0740837502881235, | |
| "learning_rate": 2.024703895865232e-06, | |
| "loss": 0.2633, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 2.204812534974818, | |
| "grad_norm": 2.348484545437271, | |
| "learning_rate": 2.0089662647762716e-06, | |
| "loss": 0.2502, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 2.208170117515389, | |
| "grad_norm": 2.1411611952162346, | |
| "learning_rate": 1.9932746440299926e-06, | |
| "loss": 0.2352, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 2.2115277000559597, | |
| "grad_norm": 2.1602600116386514, | |
| "learning_rate": 1.977629275006772e-06, | |
| "loss": 0.2214, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 2.2148852825965304, | |
| "grad_norm": 2.1825649500433104, | |
| "learning_rate": 1.962030398375506e-06, | |
| "loss": 0.2217, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 2.218242865137101, | |
| "grad_norm": 2.076444790094385, | |
| "learning_rate": 1.946478254089911e-06, | |
| "loss": 0.2327, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 2.2216004476776723, | |
| "grad_norm": 2.242188296768225, | |
| "learning_rate": 1.9309730813848302e-06, | |
| "loss": 0.2341, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 2.224958030218243, | |
| "grad_norm": 2.324463919600608, | |
| "learning_rate": 1.915515118772555e-06, | |
| "loss": 0.2367, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 2.2283156127588137, | |
| "grad_norm": 2.0513327269909487, | |
| "learning_rate": 1.9001046040391558e-06, | |
| "loss": 0.242, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 2.2316731952993845, | |
| "grad_norm": 2.344026468770851, | |
| "learning_rate": 1.884741774240823e-06, | |
| "loss": 0.2665, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 2.235030777839955, | |
| "grad_norm": 2.2216964878287735, | |
| "learning_rate": 1.8694268657002197e-06, | |
| "loss": 0.2433, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 2.238388360380526, | |
| "grad_norm": 2.1555814735411976, | |
| "learning_rate": 1.8541601140028542e-06, | |
| "loss": 0.2397, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 2.2417459429210966, | |
| "grad_norm": 2.1482906635963253, | |
| "learning_rate": 1.8389417539934428e-06, | |
| "loss": 0.2216, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 2.245103525461668, | |
| "grad_norm": 2.096021593139733, | |
| "learning_rate": 1.8237720197723075e-06, | |
| "loss": 0.2262, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 2.2484611080022385, | |
| "grad_norm": 2.266711561199349, | |
| "learning_rate": 1.8086511446917715e-06, | |
| "loss": 0.2343, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 2.2518186905428093, | |
| "grad_norm": 2.232410418505839, | |
| "learning_rate": 1.7935793613525693e-06, | |
| "loss": 0.2593, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 2.25517627308338, | |
| "grad_norm": 2.3428925980088264, | |
| "learning_rate": 1.7785569016002686e-06, | |
| "loss": 0.2743, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 2.2585338556239507, | |
| "grad_norm": 2.236303024740682, | |
| "learning_rate": 1.7635839965217055e-06, | |
| "loss": 0.2301, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 2.2618914381645214, | |
| "grad_norm": 2.0637903925635177, | |
| "learning_rate": 1.748660876441428e-06, | |
| "loss": 0.2643, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 2.265249020705092, | |
| "grad_norm": 2.2271012154627994, | |
| "learning_rate": 1.7337877709181527e-06, | |
| "loss": 0.2309, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 2.2686066032456633, | |
| "grad_norm": 2.1077879509424005, | |
| "learning_rate": 1.7189649087412385e-06, | |
| "loss": 0.261, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 2.271964185786234, | |
| "grad_norm": 2.2008818744280263, | |
| "learning_rate": 1.7041925179271584e-06, | |
| "loss": 0.2453, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 2.2753217683268048, | |
| "grad_norm": 2.2427383093212394, | |
| "learning_rate": 1.689470825715998e-06, | |
| "loss": 0.2349, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 2.2786793508673755, | |
| "grad_norm": 2.325006154923223, | |
| "learning_rate": 1.6748000585679602e-06, | |
| "loss": 0.2529, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 2.282036933407946, | |
| "grad_norm": 2.30699822949776, | |
| "learning_rate": 1.6601804421598787e-06, | |
| "loss": 0.2558, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 2.285394515948517, | |
| "grad_norm": 2.131117963004742, | |
| "learning_rate": 1.6456122013817477e-06, | |
| "loss": 0.2334, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 2.2887520984890877, | |
| "grad_norm": 2.1171412775183582, | |
| "learning_rate": 1.631095560333264e-06, | |
| "loss": 0.2431, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 2.2921096810296584, | |
| "grad_norm": 2.2433228629531774, | |
| "learning_rate": 1.6166307423203765e-06, | |
| "loss": 0.214, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 2.2954672635702296, | |
| "grad_norm": 2.268872598922477, | |
| "learning_rate": 1.6022179698518525e-06, | |
| "loss": 0.2401, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 2.2988248461108003, | |
| "grad_norm": 2.191932766219746, | |
| "learning_rate": 1.5878574646358608e-06, | |
| "loss": 0.2178, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 2.302182428651371, | |
| "grad_norm": 2.2800614694305144, | |
| "learning_rate": 1.573549447576549e-06, | |
| "loss": 0.2335, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 2.3055400111919417, | |
| "grad_norm": 2.3217546136753273, | |
| "learning_rate": 1.5592941387706562e-06, | |
| "loss": 0.2349, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 2.3088975937325125, | |
| "grad_norm": 2.2055299968173, | |
| "learning_rate": 1.5450917575041209e-06, | |
| "loss": 0.2461, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 2.312255176273083, | |
| "grad_norm": 2.1265013617268256, | |
| "learning_rate": 1.5309425222487119e-06, | |
| "loss": 0.2166, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 2.3156127588136544, | |
| "grad_norm": 2.1979179058845695, | |
| "learning_rate": 1.5168466506586654e-06, | |
| "loss": 0.2196, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 2.318970341354225, | |
| "grad_norm": 2.167123534236895, | |
| "learning_rate": 1.502804359567337e-06, | |
| "loss": 0.2427, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 2.322327923894796, | |
| "grad_norm": 2.3539399012866418, | |
| "learning_rate": 1.4888158649838675e-06, | |
| "loss": 0.2386, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 2.3256855064353665, | |
| "grad_norm": 2.1796719146281345, | |
| "learning_rate": 1.4748813820898554e-06, | |
| "loss": 0.236, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 2.3290430889759373, | |
| "grad_norm": 2.2205243451241, | |
| "learning_rate": 1.4610011252360594e-06, | |
| "loss": 0.2229, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 2.332400671516508, | |
| "grad_norm": 2.3094055557029494, | |
| "learning_rate": 1.4471753079390815e-06, | |
| "loss": 0.2396, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 2.3357582540570787, | |
| "grad_norm": 2.1939883736480157, | |
| "learning_rate": 1.4334041428781003e-06, | |
| "loss": 0.231, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 2.33911583659765, | |
| "grad_norm": 2.149486473333343, | |
| "learning_rate": 1.4196878418915894e-06, | |
| "loss": 0.2365, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 2.3424734191382206, | |
| "grad_norm": 2.2453237673213255, | |
| "learning_rate": 1.4060266159740627e-06, | |
| "loss": 0.2388, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.3458310016787913, | |
| "grad_norm": 2.423577584509045, | |
| "learning_rate": 1.3924206752728282e-06, | |
| "loss": 0.2401, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 2.349188584219362, | |
| "grad_norm": 2.4806635634108187, | |
| "learning_rate": 1.3788702290847517e-06, | |
| "loss": 0.2429, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 2.3525461667599328, | |
| "grad_norm": 2.380640333144661, | |
| "learning_rate": 1.3653754858530477e-06, | |
| "loss": 0.2258, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 2.3559037493005035, | |
| "grad_norm": 2.2491745778254066, | |
| "learning_rate": 1.3519366531640589e-06, | |
| "loss": 0.2622, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 2.3592613318410747, | |
| "grad_norm": 2.316018513747914, | |
| "learning_rate": 1.3385539377440709e-06, | |
| "loss": 0.248, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 2.3626189143816454, | |
| "grad_norm": 2.314430211194231, | |
| "learning_rate": 1.3252275454561337e-06, | |
| "loss": 0.2536, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 2.365976496922216, | |
| "grad_norm": 2.1763408291528674, | |
| "learning_rate": 1.3119576812968893e-06, | |
| "loss": 0.2403, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 2.369334079462787, | |
| "grad_norm": 2.1416964805672283, | |
| "learning_rate": 1.2987445493934236e-06, | |
| "loss": 0.2273, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 2.3726916620033576, | |
| "grad_norm": 2.1683455218648358, | |
| "learning_rate": 1.2855883530001228e-06, | |
| "loss": 0.2423, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 2.3760492445439283, | |
| "grad_norm": 2.2242131906759597, | |
| "learning_rate": 1.272489294495548e-06, | |
| "loss": 0.2404, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 2.379406827084499, | |
| "grad_norm": 2.1865211099221553, | |
| "learning_rate": 1.2594475753793211e-06, | |
| "loss": 0.2483, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 2.3827644096250697, | |
| "grad_norm": 2.0946660616224815, | |
| "learning_rate": 1.2464633962690304e-06, | |
| "loss": 0.255, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 2.386121992165641, | |
| "grad_norm": 2.2231751389825463, | |
| "learning_rate": 1.2335369568971362e-06, | |
| "loss": 0.2343, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 2.3894795747062116, | |
| "grad_norm": 2.312671362781463, | |
| "learning_rate": 1.2206684561079035e-06, | |
| "loss": 0.2408, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 2.3928371572467824, | |
| "grad_norm": 2.179660173524886, | |
| "learning_rate": 1.207858091854342e-06, | |
| "loss": 0.2383, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 2.396194739787353, | |
| "grad_norm": 2.1293264857856555, | |
| "learning_rate": 1.1951060611951615e-06, | |
| "loss": 0.23, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 2.399552322327924, | |
| "grad_norm": 2.1755461849480446, | |
| "learning_rate": 1.1824125602917414e-06, | |
| "loss": 0.2354, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 2.4029099048684945, | |
| "grad_norm": 1.9840827894505013, | |
| "learning_rate": 1.1697777844051105e-06, | |
| "loss": 0.2284, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 2.4062674874090657, | |
| "grad_norm": 2.0944031352778643, | |
| "learning_rate": 1.1572019278929457e-06, | |
| "loss": 0.2357, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 2.4096250699496364, | |
| "grad_norm": 2.286049493049029, | |
| "learning_rate": 1.1446851842065804e-06, | |
| "loss": 0.2219, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 2.412982652490207, | |
| "grad_norm": 2.195180056991891, | |
| "learning_rate": 1.1322277458880337e-06, | |
| "loss": 0.2443, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 2.416340235030778, | |
| "grad_norm": 2.360744350880101, | |
| "learning_rate": 1.1198298045670402e-06, | |
| "loss": 0.2307, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 2.4196978175713486, | |
| "grad_norm": 2.27588589395575, | |
| "learning_rate": 1.1074915509581086e-06, | |
| "loss": 0.2218, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 2.4230554001119193, | |
| "grad_norm": 2.2915120318076894, | |
| "learning_rate": 1.0952131748575855e-06, | |
| "loss": 0.2348, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 2.42641298265249, | |
| "grad_norm": 2.3740865208478428, | |
| "learning_rate": 1.0829948651407374e-06, | |
| "loss": 0.233, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 2.429770565193061, | |
| "grad_norm": 2.309847767416398, | |
| "learning_rate": 1.0708368097588435e-06, | |
| "loss": 0.2411, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 2.433128147733632, | |
| "grad_norm": 2.1585332492732703, | |
| "learning_rate": 1.0587391957363053e-06, | |
| "loss": 0.2689, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 2.4364857302742027, | |
| "grad_norm": 2.2620198419443645, | |
| "learning_rate": 1.0467022091677692e-06, | |
| "loss": 0.2386, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 2.4398433128147734, | |
| "grad_norm": 2.4789714742421998, | |
| "learning_rate": 1.0347260352152644e-06, | |
| "loss": 0.2542, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 2.443200895355344, | |
| "grad_norm": 2.233580671739803, | |
| "learning_rate": 1.0228108581053565e-06, | |
| "loss": 0.2342, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 2.446558477895915, | |
| "grad_norm": 2.1024662676881314, | |
| "learning_rate": 1.0109568611263094e-06, | |
| "loss": 0.222, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 2.4499160604364856, | |
| "grad_norm": 2.276521915661851, | |
| "learning_rate": 9.991642266252672e-07, | |
| "loss": 0.2099, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 2.4532736429770567, | |
| "grad_norm": 2.260563206399162, | |
| "learning_rate": 9.87433136005454e-07, | |
| "loss": 0.2548, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 2.4566312255176275, | |
| "grad_norm": 2.156550439442849, | |
| "learning_rate": 9.757637697233723e-07, | |
| "loss": 0.2211, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.459988808058198, | |
| "grad_norm": 2.2236291372113866, | |
| "learning_rate": 9.641563072860416e-07, | |
| "loss": 0.2258, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 2.463346390598769, | |
| "grad_norm": 2.1205062426646437, | |
| "learning_rate": 9.526109272482237e-07, | |
| "loss": 0.2201, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 2.4667039731393396, | |
| "grad_norm": 2.3294849171122567, | |
| "learning_rate": 9.41127807209688e-07, | |
| "loss": 0.2303, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 2.4700615556799104, | |
| "grad_norm": 2.127623518916985, | |
| "learning_rate": 9.297071238124683e-07, | |
| "loss": 0.2374, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 2.473419138220481, | |
| "grad_norm": 2.358371434685423, | |
| "learning_rate": 9.183490527381539e-07, | |
| "loss": 0.2415, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 2.476776720761052, | |
| "grad_norm": 2.3348118958807014, | |
| "learning_rate": 9.070537687051817e-07, | |
| "loss": 0.2253, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.480134303301623, | |
| "grad_norm": 2.254796435173114, | |
| "learning_rate": 8.958214454661529e-07, | |
| "loss": 0.2474, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 2.4834918858421937, | |
| "grad_norm": 2.135376686047964, | |
| "learning_rate": 8.846522558051563e-07, | |
| "loss": 0.2193, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 2.4868494683827644, | |
| "grad_norm": 2.4255834301642745, | |
| "learning_rate": 8.735463715351139e-07, | |
| "loss": 0.2569, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 2.490207050923335, | |
| "grad_norm": 2.1786279330493694, | |
| "learning_rate": 8.625039634951354e-07, | |
| "loss": 0.2388, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.493564633463906, | |
| "grad_norm": 2.3684247953727144, | |
| "learning_rate": 8.515252015478915e-07, | |
| "loss": 0.2432, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 2.4969222160044766, | |
| "grad_norm": 2.1452983004533706, | |
| "learning_rate": 8.406102545769989e-07, | |
| "loss": 0.2361, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 2.500279798545048, | |
| "grad_norm": 2.239393813510702, | |
| "learning_rate": 8.297592904844282e-07, | |
| "loss": 0.2169, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 2.5036373810856185, | |
| "grad_norm": 2.1080838250603233, | |
| "learning_rate": 8.189724761879131e-07, | |
| "loss": 0.2402, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 2.5069949636261892, | |
| "grad_norm": 2.205253345024292, | |
| "learning_rate": 8.082499776183883e-07, | |
| "loss": 0.2345, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 2.51035254616676, | |
| "grad_norm": 2.236843913141464, | |
| "learning_rate": 7.975919597174342e-07, | |
| "loss": 0.2272, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 2.5137101287073307, | |
| "grad_norm": 2.1687504666441257, | |
| "learning_rate": 7.869985864347424e-07, | |
| "loss": 0.2304, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 2.5170677112479014, | |
| "grad_norm": 2.32345363923919, | |
| "learning_rate": 7.764700207255904e-07, | |
| "loss": 0.2409, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 2.520425293788472, | |
| "grad_norm": 2.2137378690316605, | |
| "learning_rate": 7.660064245483384e-07, | |
| "loss": 0.2273, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 2.523782876329043, | |
| "grad_norm": 2.261904827600831, | |
| "learning_rate": 7.556079588619341e-07, | |
| "loss": 0.2219, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.527140458869614, | |
| "grad_norm": 2.2075096631540245, | |
| "learning_rate": 7.452747836234392e-07, | |
| "loss": 0.2234, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 2.5304980414101848, | |
| "grad_norm": 2.2106003656998707, | |
| "learning_rate": 7.350070577855716e-07, | |
| "loss": 0.2485, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 2.5338556239507555, | |
| "grad_norm": 2.262010781586963, | |
| "learning_rate": 7.24804939294253e-07, | |
| "loss": 0.2405, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 2.537213206491326, | |
| "grad_norm": 2.1267348431476387, | |
| "learning_rate": 7.146685850861851e-07, | |
| "loss": 0.2394, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.540570789031897, | |
| "grad_norm": 2.305882442336494, | |
| "learning_rate": 7.045981510864319e-07, | |
| "loss": 0.2528, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 2.543928371572468, | |
| "grad_norm": 2.018265078425514, | |
| "learning_rate": 6.945937922060259e-07, | |
| "loss": 0.233, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 2.547285954113039, | |
| "grad_norm": 2.2614071614786666, | |
| "learning_rate": 6.846556623395795e-07, | |
| "loss": 0.222, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 2.5506435366536095, | |
| "grad_norm": 2.1351635366635158, | |
| "learning_rate": 6.74783914362922e-07, | |
| "loss": 0.2273, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 2.5540011191941803, | |
| "grad_norm": 2.100876831365208, | |
| "learning_rate": 6.649787001307451e-07, | |
| "loss": 0.2072, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 2.557358701734751, | |
| "grad_norm": 2.1077270882398524, | |
| "learning_rate": 6.552401704742678e-07, | |
| "loss": 0.2147, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.5607162842753217, | |
| "grad_norm": 2.185148590555281, | |
| "learning_rate": 6.455684751989194e-07, | |
| "loss": 0.2387, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 2.5640738668158924, | |
| "grad_norm": 2.4273332489758714, | |
| "learning_rate": 6.359637630820292e-07, | |
| "loss": 0.2187, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 2.567431449356463, | |
| "grad_norm": 2.2410879666266945, | |
| "learning_rate": 6.26426181870542e-07, | |
| "loss": 0.2356, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 2.570789031897034, | |
| "grad_norm": 2.0546535173663236, | |
| "learning_rate": 6.169558782787438e-07, | |
| "loss": 0.2134, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 2.574146614437605, | |
| "grad_norm": 2.2712662103864667, | |
| "learning_rate": 6.075529979860068e-07, | |
| "loss": 0.2434, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 2.577504196978176, | |
| "grad_norm": 2.495990931424611, | |
| "learning_rate": 5.982176856345445e-07, | |
| "loss": 0.2572, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 2.5808617795187465, | |
| "grad_norm": 2.272797069007875, | |
| "learning_rate": 5.889500848271901e-07, | |
| "loss": 0.2365, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 2.5842193620593172, | |
| "grad_norm": 2.2261054362951573, | |
| "learning_rate": 5.797503381251896e-07, | |
| "loss": 0.2345, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 2.587576944599888, | |
| "grad_norm": 2.2214663136739072, | |
| "learning_rate": 5.706185870460018e-07, | |
| "loss": 0.2582, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 2.590934527140459, | |
| "grad_norm": 2.2509221328745666, | |
| "learning_rate": 5.61554972061128e-07, | |
| "loss": 0.2405, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.59429210968103, | |
| "grad_norm": 1.9882931292487553, | |
| "learning_rate": 5.525596325939469e-07, | |
| "loss": 0.2074, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 2.5976496922216006, | |
| "grad_norm": 2.2641730214593667, | |
| "learning_rate": 5.436327070175729e-07, | |
| "loss": 0.2264, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 2.6010072747621713, | |
| "grad_norm": 2.1593141034556607, | |
| "learning_rate": 5.347743326527255e-07, | |
| "loss": 0.2334, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 2.604364857302742, | |
| "grad_norm": 2.160652045504084, | |
| "learning_rate": 5.25984645765617e-07, | |
| "loss": 0.2348, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 2.6077224398433128, | |
| "grad_norm": 2.1624773063108873, | |
| "learning_rate": 5.172637815658583e-07, | |
| "loss": 0.2046, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 2.6110800223838835, | |
| "grad_norm": 2.2467642799151624, | |
| "learning_rate": 5.086118742043761e-07, | |
| "loss": 0.2521, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 2.614437604924454, | |
| "grad_norm": 2.1254445389657435, | |
| "learning_rate": 5.000290567713533e-07, | |
| "loss": 0.2209, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 2.617795187465025, | |
| "grad_norm": 2.331029167817777, | |
| "learning_rate": 4.915154612941781e-07, | |
| "loss": 0.2461, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 2.621152770005596, | |
| "grad_norm": 2.225940066081278, | |
| "learning_rate": 4.830712187354125e-07, | |
| "loss": 0.2521, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 2.624510352546167, | |
| "grad_norm": 2.1960281391785768, | |
| "learning_rate": 4.7469645899078153e-07, | |
| "loss": 0.2081, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.6278679350867375, | |
| "grad_norm": 2.202588232752528, | |
| "learning_rate": 4.663913108871726e-07, | |
| "loss": 0.2217, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 2.6312255176273083, | |
| "grad_norm": 2.1413814255877788, | |
| "learning_rate": 4.581559021806542e-07, | |
| "loss": 0.2279, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 2.634583100167879, | |
| "grad_norm": 2.2575040003842877, | |
| "learning_rate": 4.4999035955450964e-07, | |
| "loss": 0.2507, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 2.63794068270845, | |
| "grad_norm": 2.29100443191833, | |
| "learning_rate": 4.4189480861729137e-07, | |
| "loss": 0.247, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 2.641298265249021, | |
| "grad_norm": 2.0949207855222625, | |
| "learning_rate": 4.3386937390088366e-07, | |
| "loss": 0.205, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 2.6446558477895916, | |
| "grad_norm": 2.3016984109745064, | |
| "learning_rate": 4.259141788585947e-07, | |
| "loss": 0.2436, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 2.6480134303301623, | |
| "grad_norm": 2.2773955253769276, | |
| "learning_rate": 4.1802934586324897e-07, | |
| "loss": 0.2329, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 2.651371012870733, | |
| "grad_norm": 2.2679994509516543, | |
| "learning_rate": 4.102149962053098e-07, | |
| "loss": 0.2416, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 2.654728595411304, | |
| "grad_norm": 2.134297523257456, | |
| "learning_rate": 4.0247125009101275e-07, | |
| "loss": 0.2384, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 2.6580861779518745, | |
| "grad_norm": 2.3727877129494774, | |
| "learning_rate": 3.947982266405159e-07, | |
| "loss": 0.2313, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.6614437604924452, | |
| "grad_norm": 2.1765597243225288, | |
| "learning_rate": 3.871960438860689e-07, | |
| "loss": 0.2257, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 2.664801343033016, | |
| "grad_norm": 2.313798954233314, | |
| "learning_rate": 3.796648187701957e-07, | |
| "loss": 0.2436, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 2.668158925573587, | |
| "grad_norm": 2.3444816164677595, | |
| "learning_rate": 3.72204667143895e-07, | |
| "loss": 0.2534, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 2.671516508114158, | |
| "grad_norm": 2.29388898580906, | |
| "learning_rate": 3.648157037648598e-07, | |
| "loss": 0.2159, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 2.6748740906547286, | |
| "grad_norm": 2.184396762194593, | |
| "learning_rate": 3.574980422957147e-07, | |
| "loss": 0.2151, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 2.6782316731952993, | |
| "grad_norm": 2.3518739979849683, | |
| "learning_rate": 3.5025179530225995e-07, | |
| "loss": 0.2236, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 2.68158925573587, | |
| "grad_norm": 2.2064933728915213, | |
| "learning_rate": 3.43077074251747e-07, | |
| "loss": 0.2305, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 2.684946838276441, | |
| "grad_norm": 2.340839131571328, | |
| "learning_rate": 3.359739895111602e-07, | |
| "loss": 0.2451, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 2.688304420817012, | |
| "grad_norm": 2.2538444477416055, | |
| "learning_rate": 3.289426503455201e-07, | |
| "loss": 0.2234, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 2.6916620033575827, | |
| "grad_norm": 2.244758740164714, | |
| "learning_rate": 3.2198316491620305e-07, | |
| "loss": 0.2294, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.6950195858981534, | |
| "grad_norm": 2.149355042894359, | |
| "learning_rate": 3.150956402792765e-07, | |
| "loss": 0.2216, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 2.698377168438724, | |
| "grad_norm": 2.259174848777998, | |
| "learning_rate": 3.082801823838527e-07, | |
| "loss": 0.2268, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 2.701734750979295, | |
| "grad_norm": 2.2477921891179204, | |
| "learning_rate": 3.015368960704584e-07, | |
| "loss": 0.242, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 2.7050923335198656, | |
| "grad_norm": 2.1272415575213643, | |
| "learning_rate": 2.9486588506942303e-07, | |
| "loss": 0.2342, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 2.7084499160604363, | |
| "grad_norm": 2.334010741930345, | |
| "learning_rate": 2.882672519992824e-07, | |
| "loss": 0.2285, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 2.711807498601007, | |
| "grad_norm": 2.427045189554706, | |
| "learning_rate": 2.817410983651997e-07, | |
| "loss": 0.2562, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 2.715165081141578, | |
| "grad_norm": 2.062462864366435, | |
| "learning_rate": 2.7528752455740606e-07, | |
| "loss": 0.1984, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 2.718522663682149, | |
| "grad_norm": 2.117107968846607, | |
| "learning_rate": 2.6890662984965234e-07, | |
| "loss": 0.2167, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 2.7218802462227196, | |
| "grad_norm": 2.172976071364664, | |
| "learning_rate": 2.625985123976876e-07, | |
| "loss": 0.2312, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 2.7252378287632903, | |
| "grad_norm": 2.1650501300436438, | |
| "learning_rate": 2.5636326923774325e-07, | |
| "loss": 0.2423, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.728595411303861, | |
| "grad_norm": 2.212412244165142, | |
| "learning_rate": 2.5020099628504603e-07, | |
| "loss": 0.2185, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 2.7319529938444322, | |
| "grad_norm": 2.1280889488629287, | |
| "learning_rate": 2.441117883323374e-07, | |
| "loss": 0.2413, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.735310576385003, | |
| "grad_norm": 2.18787418811109, | |
| "learning_rate": 2.3809573904841844e-07, | |
| "loss": 0.233, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 2.7386681589255737, | |
| "grad_norm": 2.2553778724383737, | |
| "learning_rate": 2.3215294097670927e-07, | |
| "loss": 0.2236, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 2.7420257414661444, | |
| "grad_norm": 2.080459053836358, | |
| "learning_rate": 2.262834855338225e-07, | |
| "loss": 0.2376, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 2.745383324006715, | |
| "grad_norm": 2.165809723126224, | |
| "learning_rate": 2.204874630081616e-07, | |
| "loss": 0.2225, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 2.748740906547286, | |
| "grad_norm": 1.956393897313748, | |
| "learning_rate": 2.1476496255852685e-07, | |
| "loss": 0.233, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 2.7520984890878566, | |
| "grad_norm": 2.101187708321008, | |
| "learning_rate": 2.091160722127472e-07, | |
| "loss": 0.2233, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 2.7554560716284273, | |
| "grad_norm": 2.328065045528626, | |
| "learning_rate": 2.0354087886632623e-07, | |
| "loss": 0.2371, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 2.7588136541689985, | |
| "grad_norm": 2.1295011524399086, | |
| "learning_rate": 1.9803946828110376e-07, | |
| "loss": 0.2408, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.762171236709569, | |
| "grad_norm": 2.0812004019092822, | |
| "learning_rate": 1.9261192508393755e-07, | |
| "loss": 0.2211, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 2.76552881925014, | |
| "grad_norm": 2.110225954008512, | |
| "learning_rate": 1.8725833276540095e-07, | |
| "loss": 0.2328, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 2.7688864017907107, | |
| "grad_norm": 2.209831203361078, | |
| "learning_rate": 1.8197877367849948e-07, | |
| "loss": 0.2424, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 2.7722439843312814, | |
| "grad_norm": 2.1406558326813103, | |
| "learning_rate": 1.7677332903740296e-07, | |
| "loss": 0.2293, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 2.775601566871852, | |
| "grad_norm": 2.160818736774281, | |
| "learning_rate": 1.7164207891619823e-07, | |
| "loss": 0.2265, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 2.7789591494124233, | |
| "grad_norm": 2.1902094381897177, | |
| "learning_rate": 1.6658510224765333e-07, | |
| "loss": 0.2253, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.782316731952994, | |
| "grad_norm": 2.311103825756955, | |
| "learning_rate": 1.6160247682200813e-07, | |
| "loss": 0.2455, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 2.7856743144935647, | |
| "grad_norm": 2.218083782097005, | |
| "learning_rate": 1.566942792857745e-07, | |
| "loss": 0.2233, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.7890318970341355, | |
| "grad_norm": 2.2738455358176277, | |
| "learning_rate": 1.5186058514055912e-07, | |
| "loss": 0.2399, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 2.792389479574706, | |
| "grad_norm": 2.193046078475537, | |
| "learning_rate": 1.471014687418998e-07, | |
| "loss": 0.219, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.795747062115277, | |
| "grad_norm": 2.1748698706908773, | |
| "learning_rate": 1.4241700329812368e-07, | |
| "loss": 0.2208, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 2.7991046446558476, | |
| "grad_norm": 2.25948599781455, | |
| "learning_rate": 1.3780726086922103e-07, | |
| "loss": 0.2205, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.8024622271964184, | |
| "grad_norm": 2.1627090816106906, | |
| "learning_rate": 1.332723123657348e-07, | |
| "loss": 0.2155, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 2.8058198097369895, | |
| "grad_norm": 2.1556941222951918, | |
| "learning_rate": 1.288122275476733e-07, | |
| "loss": 0.2209, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.8091773922775602, | |
| "grad_norm": 2.199934634740392, | |
| "learning_rate": 1.244270750234333e-07, | |
| "loss": 0.2362, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 2.812534974818131, | |
| "grad_norm": 2.265457178349347, | |
| "learning_rate": 1.201169222487464e-07, | |
| "loss": 0.2395, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.8158925573587017, | |
| "grad_norm": 2.243639772812836, | |
| "learning_rate": 1.1588183552564247e-07, | |
| "loss": 0.2251, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 2.8192501398992724, | |
| "grad_norm": 2.1527813152932174, | |
| "learning_rate": 1.1172188000142803e-07, | |
| "loss": 0.2434, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.822607722439843, | |
| "grad_norm": 2.290823128697244, | |
| "learning_rate": 1.0763711966768453e-07, | |
| "loss": 0.2078, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 2.8259653049804143, | |
| "grad_norm": 2.108924814493277, | |
| "learning_rate": 1.0362761735928372e-07, | |
| "loss": 0.2209, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.829322887520985, | |
| "grad_norm": 2.2692353365060574, | |
| "learning_rate": 9.969343475342285e-08, | |
| "loss": 0.2413, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 2.8326804700615558, | |
| "grad_norm": 2.320890260637866, | |
| "learning_rate": 9.583463236867318e-08, | |
| "loss": 0.2405, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.8360380526021265, | |
| "grad_norm": 2.114878278170637, | |
| "learning_rate": 9.205126956405075e-08, | |
| "loss": 0.2122, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 2.839395635142697, | |
| "grad_norm": 2.1364581985268356, | |
| "learning_rate": 8.834340453810375e-08, | |
| "loss": 0.2173, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.842753217683268, | |
| "grad_norm": 2.327798293544225, | |
| "learning_rate": 8.471109432801494e-08, | |
| "loss": 0.2305, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 2.8461108002238387, | |
| "grad_norm": 2.1784789640309614, | |
| "learning_rate": 8.11543948087279e-08, | |
| "loss": 0.2238, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.8494683827644094, | |
| "grad_norm": 2.3102169432150563, | |
| "learning_rate": 7.76733606920832e-08, | |
| "loss": 0.2158, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 2.8528259653049806, | |
| "grad_norm": 2.0676705717179877, | |
| "learning_rate": 7.426804552598088e-08, | |
| "loss": 0.2276, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.8561835478455513, | |
| "grad_norm": 2.321852646027188, | |
| "learning_rate": 7.093850169355266e-08, | |
| "loss": 0.2412, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 2.859541130386122, | |
| "grad_norm": 2.129502189630901, | |
| "learning_rate": 6.768478041236037e-08, | |
| "loss": 0.2102, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.8628987129266927, | |
| "grad_norm": 2.432479560424763, | |
| "learning_rate": 6.450693173360445e-08, | |
| "loss": 0.219, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 2.8662562954672635, | |
| "grad_norm": 2.246241784383759, | |
| "learning_rate": 6.140500454135668e-08, | |
| "loss": 0.2172, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.8696138780078346, | |
| "grad_norm": 2.2178086164936777, | |
| "learning_rate": 5.8379046551807486e-08, | |
| "loss": 0.2355, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 2.8729714605484054, | |
| "grad_norm": 2.079524766097902, | |
| "learning_rate": 5.542910431252935e-08, | |
| "loss": 0.2208, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.876329043088976, | |
| "grad_norm": 2.230864146180223, | |
| "learning_rate": 5.255522320176565e-08, | |
| "loss": 0.2268, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 2.879686625629547, | |
| "grad_norm": 2.1326498447596496, | |
| "learning_rate": 4.975744742772848e-08, | |
| "loss": 0.2182, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.8830442081701175, | |
| "grad_norm": 2.1859329833247467, | |
| "learning_rate": 4.7035820027920284e-08, | |
| "loss": 0.2364, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 2.8864017907106883, | |
| "grad_norm": 2.2460754412578505, | |
| "learning_rate": 4.439038286847164e-08, | |
| "loss": 0.2222, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.889759373251259, | |
| "grad_norm": 2.0025699472964504, | |
| "learning_rate": 4.182117664349783e-08, | |
| "loss": 0.2378, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 2.8931169557918297, | |
| "grad_norm": 2.3077570677713948, | |
| "learning_rate": 3.9328240874471624e-08, | |
| "loss": 0.2254, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.8964745383324004, | |
| "grad_norm": 2.0838649182430946, | |
| "learning_rate": 3.6911613909616505e-08, | |
| "loss": 0.2131, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 2.8998321208729716, | |
| "grad_norm": 2.2289951432899024, | |
| "learning_rate": 3.457133292331494e-08, | |
| "loss": 0.2288, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.9031897034135423, | |
| "grad_norm": 2.1558465137002654, | |
| "learning_rate": 3.230743391553881e-08, | |
| "loss": 0.2181, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 2.906547285954113, | |
| "grad_norm": 2.1450777488580774, | |
| "learning_rate": 3.011995171129545e-08, | |
| "loss": 0.2055, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.9099048684946838, | |
| "grad_norm": 2.1709969058929484, | |
| "learning_rate": 2.8008919960090253e-08, | |
| "loss": 0.2475, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 2.9132624510352545, | |
| "grad_norm": 1.9687943814735813, | |
| "learning_rate": 2.5974371135408792e-08, | |
| "loss": 0.2006, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.9166200335758257, | |
| "grad_norm": 2.2277357096196204, | |
| "learning_rate": 2.401633653422053e-08, | |
| "loss": 0.2245, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 2.9199776161163964, | |
| "grad_norm": 2.225179238729893, | |
| "learning_rate": 2.2134846276494205e-08, | |
| "loss": 0.2628, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.923335198656967, | |
| "grad_norm": 2.233813263586234, | |
| "learning_rate": 2.032992930473543e-08, | |
| "loss": 0.2367, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 2.926692781197538, | |
| "grad_norm": 2.192104481598423, | |
| "learning_rate": 1.860161338354205e-08, | |
| "loss": 0.221, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.9300503637381086, | |
| "grad_norm": 2.2721116115550117, | |
| "learning_rate": 1.69499250991767e-08, | |
| "loss": 0.2229, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 2.9334079462786793, | |
| "grad_norm": 2.069015317512474, | |
| "learning_rate": 1.5374889859157137e-08, | |
| "loss": 0.2026, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.93676552881925, | |
| "grad_norm": 2.226727947199419, | |
| "learning_rate": 1.3876531891867106e-08, | |
| "loss": 0.2329, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 2.9401231113598207, | |
| "grad_norm": 2.3274573956235862, | |
| "learning_rate": 1.2454874246181081e-08, | |
| "loss": 0.2307, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.9434806939003915, | |
| "grad_norm": 2.2826046277606267, | |
| "learning_rate": 1.1109938791112328e-08, | |
| "loss": 0.2381, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 2.9468382764409626, | |
| "grad_norm": 2.1682098280901574, | |
| "learning_rate": 9.841746215474845e-09, | |
| "loss": 0.2414, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.9501958589815334, | |
| "grad_norm": 2.023824966326867, | |
| "learning_rate": 8.650316027566386e-09, | |
| "loss": 0.2139, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 2.953553441522104, | |
| "grad_norm": 2.1379663757918106, | |
| "learning_rate": 7.535666554866483e-09, | |
| "loss": 0.2369, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.956911024062675, | |
| "grad_norm": 2.338410010088983, | |
| "learning_rate": 6.497814943756675e-09, | |
| "loss": 0.2422, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 2.9602686066032455, | |
| "grad_norm": 2.069543683427084, | |
| "learning_rate": 5.536777159254603e-09, | |
| "loss": 0.2214, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.9636261891438167, | |
| "grad_norm": 2.3978229945182914, | |
| "learning_rate": 4.652567984770873e-09, | |
| "loss": 0.2285, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 2.9669837716843874, | |
| "grad_norm": 2.177819105580462, | |
| "learning_rate": 3.845201021879241e-09, | |
| "loss": 0.2206, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.970341354224958, | |
| "grad_norm": 2.232082224924113, | |
| "learning_rate": 3.1146886901090024e-09, | |
| "loss": 0.2267, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 2.973698936765529, | |
| "grad_norm": 2.3050073009713854, | |
| "learning_rate": 2.461042226752919e-09, | |
| "loss": 0.2457, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.9770565193060996, | |
| "grad_norm": 2.1979166006413515, | |
| "learning_rate": 1.8842716866956935e-09, | |
| "loss": 0.2412, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 2.9804141018466703, | |
| "grad_norm": 2.255837191828035, | |
| "learning_rate": 1.3843859422574269e-09, | |
| "loss": 0.2331, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.983771684387241, | |
| "grad_norm": 2.342133934859939, | |
| "learning_rate": 9.613926830587262e-10, | |
| "loss": 0.2382, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 2.9871292669278118, | |
| "grad_norm": 2.280659517009034, | |
| "learning_rate": 6.152984159024655e-10, | |
| "loss": 0.2454, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.9904868494683825, | |
| "grad_norm": 2.3892948295661443, | |
| "learning_rate": 3.4610846467109106e-10, | |
| "loss": 0.2257, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 2.9938444320089537, | |
| "grad_norm": 2.2858765844038023, | |
| "learning_rate": 1.538269702494599e-10, | |
| "loss": 0.2244, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.9972020145495244, | |
| "grad_norm": 2.298089033956775, | |
| "learning_rate": 3.8456890455451646e-11, | |
| "loss": 0.2323, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 2.9972020145495244, | |
| "step": 891, | |
| "total_flos": 1.9585051653255987e+17, | |
| "train_loss": 0.5026469534026787, | |
| "train_runtime": 9007.4522, | |
| "train_samples_per_second": 4.76, | |
| "train_steps_per_second": 0.099 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 891, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.9585051653255987e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |