| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.25250227479526843, | |
| "eval_steps": 500, | |
| "global_step": 555, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00045495905368516835, | |
| "grad_norm": 9.424069126182447, | |
| "learning_rate": 5e-06, | |
| "loss": 0.1263, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0009099181073703367, | |
| "grad_norm": 10.713711803681479, | |
| "learning_rate": 4.999999897855645e-06, | |
| "loss": 0.1917, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.001364877161055505, | |
| "grad_norm": 14.140338542227335, | |
| "learning_rate": 4.9999995914225884e-06, | |
| "loss": 0.1578, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0018198362147406734, | |
| "grad_norm": 3.597475372738082, | |
| "learning_rate": 4.999999080700855e-06, | |
| "loss": 0.1266, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0022747952684258415, | |
| "grad_norm": 6.105724745538744, | |
| "learning_rate": 4.999998365690486e-06, | |
| "loss": 0.1182, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.00272975432211101, | |
| "grad_norm": 2.3169263707348047, | |
| "learning_rate": 4.999997446391542e-06, | |
| "loss": 0.0837, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0031847133757961785, | |
| "grad_norm": 1.5580577162131912, | |
| "learning_rate": 4.999996322804095e-06, | |
| "loss": 0.0761, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.003639672429481347, | |
| "grad_norm": 7.8184917268265455, | |
| "learning_rate": 4.999994994928239e-06, | |
| "loss": 0.0922, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.004094631483166515, | |
| "grad_norm": 1.0452365500769838, | |
| "learning_rate": 4.999993462764082e-06, | |
| "loss": 0.0478, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.004549590536851683, | |
| "grad_norm": 13.965028712537013, | |
| "learning_rate": 4.999991726311749e-06, | |
| "loss": 0.0846, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005004549590536852, | |
| "grad_norm": 5.125925143296543, | |
| "learning_rate": 4.999989785571382e-06, | |
| "loss": 0.0881, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.00545950864422202, | |
| "grad_norm": 2.2007100936893242, | |
| "learning_rate": 4.999987640543139e-06, | |
| "loss": 0.0896, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.005914467697907188, | |
| "grad_norm": 1.4259973806728683, | |
| "learning_rate": 4.999985291227196e-06, | |
| "loss": 0.0707, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.006369426751592357, | |
| "grad_norm": 2.5296942505090376, | |
| "learning_rate": 4.999982737623746e-06, | |
| "loss": 0.1089, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.006824385805277525, | |
| "grad_norm": 1.9950751818037182, | |
| "learning_rate": 4.999979979732995e-06, | |
| "loss": 0.0868, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.007279344858962694, | |
| "grad_norm": 1.3920340257758652, | |
| "learning_rate": 4.999977017555171e-06, | |
| "loss": 0.0667, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0077343039126478615, | |
| "grad_norm": 1.6901228042476675, | |
| "learning_rate": 4.999973851090514e-06, | |
| "loss": 0.1032, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.00818926296633303, | |
| "grad_norm": 1.8139241575982044, | |
| "learning_rate": 4.999970480339284e-06, | |
| "loss": 0.0848, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.008644222020018199, | |
| "grad_norm": 2.792209216647474, | |
| "learning_rate": 4.9999669053017564e-06, | |
| "loss": 0.0804, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.009099181073703366, | |
| "grad_norm": 1.9016199513748882, | |
| "learning_rate": 4.9999631259782235e-06, | |
| "loss": 0.0612, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009554140127388535, | |
| "grad_norm": 1.9965871271660314, | |
| "learning_rate": 4.999959142368993e-06, | |
| "loss": 0.0969, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.010009099181073703, | |
| "grad_norm": 2.0914009303085033, | |
| "learning_rate": 4.999954954474391e-06, | |
| "loss": 0.0697, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.010464058234758872, | |
| "grad_norm": 1.4245797905814712, | |
| "learning_rate": 4.9999505622947594e-06, | |
| "loss": 0.0832, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.01091901728844404, | |
| "grad_norm": 1.5918336957933508, | |
| "learning_rate": 4.999945965830458e-06, | |
| "loss": 0.0995, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.011373976342129208, | |
| "grad_norm": 1.5479918567604505, | |
| "learning_rate": 4.999941165081863e-06, | |
| "loss": 0.0807, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.011828935395814377, | |
| "grad_norm": 1.0230515440884096, | |
| "learning_rate": 4.999936160049364e-06, | |
| "loss": 0.0643, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.012283894449499545, | |
| "grad_norm": 1.5686069800283207, | |
| "learning_rate": 4.999930950733373e-06, | |
| "loss": 0.0931, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.012738853503184714, | |
| "grad_norm": 1.2554970571666952, | |
| "learning_rate": 4.999925537134312e-06, | |
| "loss": 0.0815, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.013193812556869881, | |
| "grad_norm": 2.006239028459661, | |
| "learning_rate": 4.9999199192526286e-06, | |
| "loss": 0.1058, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.01364877161055505, | |
| "grad_norm": 1.4436359414979703, | |
| "learning_rate": 4.9999140970887775e-06, | |
| "loss": 0.0869, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014103730664240218, | |
| "grad_norm": 1.9267705188401287, | |
| "learning_rate": 4.999908070643236e-06, | |
| "loss": 0.0781, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.014558689717925387, | |
| "grad_norm": 1.4021843278575745, | |
| "learning_rate": 4.999901839916495e-06, | |
| "loss": 0.0623, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.015013648771610554, | |
| "grad_norm": 1.208153452070421, | |
| "learning_rate": 4.999895404909067e-06, | |
| "loss": 0.063, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.015468607825295723, | |
| "grad_norm": 2.273185304548797, | |
| "learning_rate": 4.999888765621476e-06, | |
| "loss": 0.0901, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01592356687898089, | |
| "grad_norm": 1.0383667898934177, | |
| "learning_rate": 4.999881922054264e-06, | |
| "loss": 0.0529, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.01637852593266606, | |
| "grad_norm": 1.1537070720156926, | |
| "learning_rate": 4.999874874207991e-06, | |
| "loss": 0.0539, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.01683348498635123, | |
| "grad_norm": 7.004645996036244, | |
| "learning_rate": 4.999867622083232e-06, | |
| "loss": 0.1028, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.017288444040036398, | |
| "grad_norm": 2.6515111867419217, | |
| "learning_rate": 4.99986016568058e-06, | |
| "loss": 0.0958, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.017743403093721567, | |
| "grad_norm": 1.5437471575403858, | |
| "learning_rate": 4.999852505000646e-06, | |
| "loss": 0.0738, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.018198362147406732, | |
| "grad_norm": 1.4798019454902687, | |
| "learning_rate": 4.999844640044053e-06, | |
| "loss": 0.0695, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0186533212010919, | |
| "grad_norm": 1.3064785518172293, | |
| "learning_rate": 4.999836570811445e-06, | |
| "loss": 0.0738, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.01910828025477707, | |
| "grad_norm": 2.6092308850144086, | |
| "learning_rate": 4.999828297303483e-06, | |
| "loss": 0.0854, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.019563239308462238, | |
| "grad_norm": 1.1588535962376392, | |
| "learning_rate": 4.9998198195208405e-06, | |
| "loss": 0.0783, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.020018198362147407, | |
| "grad_norm": 1.441993661454023, | |
| "learning_rate": 4.999811137464212e-06, | |
| "loss": 0.0826, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.020473157415832575, | |
| "grad_norm": 1.6833012903770388, | |
| "learning_rate": 4.999802251134307e-06, | |
| "loss": 0.0932, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.020928116469517744, | |
| "grad_norm": 1.061841465675538, | |
| "learning_rate": 4.99979316053185e-06, | |
| "loss": 0.0602, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.021383075523202913, | |
| "grad_norm": 6.235552737213317, | |
| "learning_rate": 4.999783865657585e-06, | |
| "loss": 0.1756, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.02183803457688808, | |
| "grad_norm": 4.150615789136632, | |
| "learning_rate": 4.999774366512272e-06, | |
| "loss": 0.1765, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.022292993630573247, | |
| "grad_norm": 1.6544370579186418, | |
| "learning_rate": 4.9997646630966865e-06, | |
| "loss": 0.0841, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.022747952684258416, | |
| "grad_norm": 1.3759378168890601, | |
| "learning_rate": 4.999754755411621e-06, | |
| "loss": 0.0669, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.023202911737943584, | |
| "grad_norm": 1.182095773050476, | |
| "learning_rate": 4.9997446434578865e-06, | |
| "loss": 0.0653, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.023657870791628753, | |
| "grad_norm": 1.035739970953985, | |
| "learning_rate": 4.999734327236307e-06, | |
| "loss": 0.0678, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.024112829845313922, | |
| "grad_norm": 0.7085636728418604, | |
| "learning_rate": 4.999723806747728e-06, | |
| "loss": 0.0498, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.02456778889899909, | |
| "grad_norm": 2.2722150874810185, | |
| "learning_rate": 4.99971308199301e-06, | |
| "loss": 0.0666, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.02502274795268426, | |
| "grad_norm": 0.9420150282219443, | |
| "learning_rate": 4.999702152973025e-06, | |
| "loss": 0.0516, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.025477707006369428, | |
| "grad_norm": 1.0929779986912587, | |
| "learning_rate": 4.9996910196886694e-06, | |
| "loss": 0.0593, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.025932666060054597, | |
| "grad_norm": 0.783956655534044, | |
| "learning_rate": 4.999679682140852e-06, | |
| "loss": 0.0377, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.026387625113739762, | |
| "grad_norm": 1.5218504285246661, | |
| "learning_rate": 4.999668140330499e-06, | |
| "loss": 0.1052, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.02684258416742493, | |
| "grad_norm": 1.0791722855226673, | |
| "learning_rate": 4.999656394258555e-06, | |
| "loss": 0.0632, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.0272975432211101, | |
| "grad_norm": 0.9557512868551324, | |
| "learning_rate": 4.999644443925978e-06, | |
| "loss": 0.0634, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.027752502274795268, | |
| "grad_norm": 1.0667565930302423, | |
| "learning_rate": 4.999632289333746e-06, | |
| "loss": 0.0518, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.028207461328480437, | |
| "grad_norm": 1.646318745184601, | |
| "learning_rate": 4.999619930482852e-06, | |
| "loss": 0.0766, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.028662420382165606, | |
| "grad_norm": 1.2186400155674944, | |
| "learning_rate": 4.999607367374304e-06, | |
| "loss": 0.0741, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.029117379435850774, | |
| "grad_norm": 1.0807362476000584, | |
| "learning_rate": 4.999594600009131e-06, | |
| "loss": 0.0553, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.029572338489535943, | |
| "grad_norm": 1.3403222529377026, | |
| "learning_rate": 4.999581628388375e-06, | |
| "loss": 0.0886, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03002729754322111, | |
| "grad_norm": 1.5384085589580356, | |
| "learning_rate": 4.999568452513097e-06, | |
| "loss": 0.1371, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.030482256596906277, | |
| "grad_norm": 1.3705617237121213, | |
| "learning_rate": 4.9995550723843726e-06, | |
| "loss": 0.0766, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.030937215650591446, | |
| "grad_norm": 1.0692361538736996, | |
| "learning_rate": 4.999541488003295e-06, | |
| "loss": 0.0607, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.03139217470427662, | |
| "grad_norm": 0.9190382606343962, | |
| "learning_rate": 4.999527699370975e-06, | |
| "loss": 0.0598, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.03184713375796178, | |
| "grad_norm": 1.182484013540807, | |
| "learning_rate": 4.99951370648854e-06, | |
| "loss": 0.0583, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.03230209281164695, | |
| "grad_norm": 1.0728084003134533, | |
| "learning_rate": 4.999499509357132e-06, | |
| "loss": 0.0595, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.03275705186533212, | |
| "grad_norm": 4.354112851430141, | |
| "learning_rate": 4.999485107977912e-06, | |
| "loss": 0.063, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.033212010919017286, | |
| "grad_norm": 1.5709570309947747, | |
| "learning_rate": 4.999470502352057e-06, | |
| "loss": 0.0511, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.03366696997270246, | |
| "grad_norm": 5.936498302941106, | |
| "learning_rate": 4.999455692480759e-06, | |
| "loss": 0.0733, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.034121929026387623, | |
| "grad_norm": 2.227018438923651, | |
| "learning_rate": 4.999440678365229e-06, | |
| "loss": 0.0504, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.034576888080072796, | |
| "grad_norm": 1.7106603940792875, | |
| "learning_rate": 4.999425460006695e-06, | |
| "loss": 0.0672, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.03503184713375796, | |
| "grad_norm": 3.603233120133456, | |
| "learning_rate": 4.9994100374063995e-06, | |
| "loss": 0.0605, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.03548680618744313, | |
| "grad_norm": 1.3736083353388526, | |
| "learning_rate": 4.9993944105656035e-06, | |
| "loss": 0.0892, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.0359417652411283, | |
| "grad_norm": 1.0823220835311542, | |
| "learning_rate": 4.999378579485582e-06, | |
| "loss": 0.0657, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.036396724294813464, | |
| "grad_norm": 2.150924215229101, | |
| "learning_rate": 4.999362544167632e-06, | |
| "loss": 0.0787, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.036851683348498636, | |
| "grad_norm": 1.7178625535843866, | |
| "learning_rate": 4.99934630461306e-06, | |
| "loss": 0.0428, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.0373066424021838, | |
| "grad_norm": 1.3444344286672891, | |
| "learning_rate": 4.999329860823197e-06, | |
| "loss": 0.0683, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.03776160145586897, | |
| "grad_norm": 0.9890758669005086, | |
| "learning_rate": 4.999313212799383e-06, | |
| "loss": 0.0684, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.03821656050955414, | |
| "grad_norm": 1.341850788541947, | |
| "learning_rate": 4.99929636054298e-06, | |
| "loss": 0.0683, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.03867151956323931, | |
| "grad_norm": 1.1425756088631416, | |
| "learning_rate": 4.999279304055366e-06, | |
| "loss": 0.0781, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.039126478616924476, | |
| "grad_norm": 1.1872176417370066, | |
| "learning_rate": 4.999262043337933e-06, | |
| "loss": 0.0652, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.03958143767060965, | |
| "grad_norm": 1.1143093977494338, | |
| "learning_rate": 4.999244578392094e-06, | |
| "loss": 0.0752, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.040036396724294813, | |
| "grad_norm": 1.2369204202074342, | |
| "learning_rate": 4.9992269092192736e-06, | |
| "loss": 0.0822, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.04049135577797998, | |
| "grad_norm": 1.1130108244588752, | |
| "learning_rate": 4.9992090358209166e-06, | |
| "loss": 0.0548, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.04094631483166515, | |
| "grad_norm": 1.0691923631453497, | |
| "learning_rate": 4.9991909581984835e-06, | |
| "loss": 0.058, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.041401273885350316, | |
| "grad_norm": 0.8020461125153492, | |
| "learning_rate": 4.999172676353451e-06, | |
| "loss": 0.0341, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.04185623293903549, | |
| "grad_norm": 0.8729429986066347, | |
| "learning_rate": 4.999154190287314e-06, | |
| "loss": 0.0524, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.042311191992720654, | |
| "grad_norm": 1.3186052508212676, | |
| "learning_rate": 4.999135500001583e-06, | |
| "loss": 0.1067, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.042766151046405826, | |
| "grad_norm": 0.9402363215265714, | |
| "learning_rate": 4.9991166054977844e-06, | |
| "loss": 0.0631, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.04322111010009099, | |
| "grad_norm": 1.8336904222617239, | |
| "learning_rate": 4.999097506777463e-06, | |
| "loss": 0.0897, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.04367606915377616, | |
| "grad_norm": 1.0700343361679827, | |
| "learning_rate": 4.999078203842179e-06, | |
| "loss": 0.084, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.04413102820746133, | |
| "grad_norm": 0.8783050881223096, | |
| "learning_rate": 4.999058696693511e-06, | |
| "loss": 0.0421, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.044585987261146494, | |
| "grad_norm": 0.9801149440827129, | |
| "learning_rate": 4.99903898533305e-06, | |
| "loss": 0.0616, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.045040946314831666, | |
| "grad_norm": 0.9471216563783236, | |
| "learning_rate": 4.99901906976241e-06, | |
| "loss": 0.0614, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.04549590536851683, | |
| "grad_norm": 1.1616379193988644, | |
| "learning_rate": 4.998998949983217e-06, | |
| "loss": 0.0604, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.045950864422202004, | |
| "grad_norm": 1.123688602696856, | |
| "learning_rate": 4.998978625997115e-06, | |
| "loss": 0.0831, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.04640582347588717, | |
| "grad_norm": 1.1154387442545128, | |
| "learning_rate": 4.998958097805765e-06, | |
| "loss": 0.0686, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.04686078252957234, | |
| "grad_norm": 0.9538196832365717, | |
| "learning_rate": 4.9989373654108445e-06, | |
| "loss": 0.0586, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.047315741583257506, | |
| "grad_norm": 14.714854180857428, | |
| "learning_rate": 4.9989164288140465e-06, | |
| "loss": 0.2765, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.04777070063694268, | |
| "grad_norm": 1.5310230722630254, | |
| "learning_rate": 4.998895288017085e-06, | |
| "loss": 0.1114, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.048225659690627844, | |
| "grad_norm": 0.8704851988514942, | |
| "learning_rate": 4.998873943021684e-06, | |
| "loss": 0.0481, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.04868061874431301, | |
| "grad_norm": 0.9229853294124807, | |
| "learning_rate": 4.998852393829589e-06, | |
| "loss": 0.0559, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.04913557779799818, | |
| "grad_norm": 1.6890853415724327, | |
| "learning_rate": 4.9988306404425625e-06, | |
| "loss": 0.1104, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.049590536851683346, | |
| "grad_norm": 0.9281407020626959, | |
| "learning_rate": 4.99880868286238e-06, | |
| "loss": 0.0636, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.05004549590536852, | |
| "grad_norm": 1.2440415104108336, | |
| "learning_rate": 4.998786521090836e-06, | |
| "loss": 0.0522, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.050500454959053684, | |
| "grad_norm": 1.06604652034606, | |
| "learning_rate": 4.9987641551297426e-06, | |
| "loss": 0.0916, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.050955414012738856, | |
| "grad_norm": 0.9619782747004665, | |
| "learning_rate": 4.998741584980926e-06, | |
| "loss": 0.0822, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.05141037306642402, | |
| "grad_norm": 1.0679619427370142, | |
| "learning_rate": 4.9987188106462314e-06, | |
| "loss": 0.0644, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.051865332120109194, | |
| "grad_norm": 0.8424012677371406, | |
| "learning_rate": 4.99869583212752e-06, | |
| "loss": 0.0536, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.05232029117379436, | |
| "grad_norm": 1.660603270099433, | |
| "learning_rate": 4.9986726494266694e-06, | |
| "loss": 0.1336, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.052775250227479524, | |
| "grad_norm": 1.0314643506984187, | |
| "learning_rate": 4.998649262545574e-06, | |
| "loss": 0.0606, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.053230209281164696, | |
| "grad_norm": 0.9468486095046134, | |
| "learning_rate": 4.998625671486144e-06, | |
| "loss": 0.0598, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.05368516833484986, | |
| "grad_norm": 0.8800045267913842, | |
| "learning_rate": 4.998601876250308e-06, | |
| "loss": 0.06, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.054140127388535034, | |
| "grad_norm": 1.0192910760666323, | |
| "learning_rate": 4.998577876840011e-06, | |
| "loss": 0.0601, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.0545950864422202, | |
| "grad_norm": 0.9462635574827357, | |
| "learning_rate": 4.9985536732572124e-06, | |
| "loss": 0.06, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.05505004549590537, | |
| "grad_norm": 0.7487116084320051, | |
| "learning_rate": 4.998529265503891e-06, | |
| "loss": 0.0458, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.055505004549590536, | |
| "grad_norm": 1.0663282141956507, | |
| "learning_rate": 4.9985046535820416e-06, | |
| "loss": 0.0758, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.05595996360327571, | |
| "grad_norm": 1.7476635252011261, | |
| "learning_rate": 4.998479837493675e-06, | |
| "loss": 0.0876, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.056414922656960874, | |
| "grad_norm": 1.1513932571098853, | |
| "learning_rate": 4.9984548172408195e-06, | |
| "loss": 0.0475, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.05686988171064604, | |
| "grad_norm": 97.54679492281674, | |
| "learning_rate": 4.998429592825519e-06, | |
| "loss": 0.2117, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.05732484076433121, | |
| "grad_norm": 1.2146893500357796, | |
| "learning_rate": 4.998404164249835e-06, | |
| "loss": 0.0887, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.05777979981801638, | |
| "grad_norm": 0.8319799978985719, | |
| "learning_rate": 4.998378531515845e-06, | |
| "loss": 0.0411, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.05823475887170155, | |
| "grad_norm": 1.5818516008522756, | |
| "learning_rate": 4.998352694625645e-06, | |
| "loss": 0.068, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.058689717925386714, | |
| "grad_norm": 0.883733186490376, | |
| "learning_rate": 4.998326653581343e-06, | |
| "loss": 0.0595, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.059144676979071886, | |
| "grad_norm": 0.9357726879327158, | |
| "learning_rate": 4.998300408385072e-06, | |
| "loss": 0.0686, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05959963603275705, | |
| "grad_norm": 1.3606472296483436, | |
| "learning_rate": 4.998273959038972e-06, | |
| "loss": 0.0837, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.06005459508644222, | |
| "grad_norm": 0.9597337111291308, | |
| "learning_rate": 4.998247305545207e-06, | |
| "loss": 0.0733, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.06050955414012739, | |
| "grad_norm": 0.7271469650592398, | |
| "learning_rate": 4.998220447905953e-06, | |
| "loss": 0.0454, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.060964513193812554, | |
| "grad_norm": 0.9630498239095886, | |
| "learning_rate": 4.998193386123408e-06, | |
| "loss": 0.074, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.061419472247497726, | |
| "grad_norm": 1.133544314724227, | |
| "learning_rate": 4.99816612019978e-06, | |
| "loss": 0.077, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.06187443130118289, | |
| "grad_norm": 4.162875613842665, | |
| "learning_rate": 4.998138650137298e-06, | |
| "loss": 0.1461, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.062329390354868064, | |
| "grad_norm": 1.02851611153301, | |
| "learning_rate": 4.998110975938208e-06, | |
| "loss": 0.0883, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.06278434940855324, | |
| "grad_norm": 1.4803017864082986, | |
| "learning_rate": 4.998083097604769e-06, | |
| "loss": 0.093, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.0632393084622384, | |
| "grad_norm": 0.775173461523887, | |
| "learning_rate": 4.998055015139261e-06, | |
| "loss": 0.0446, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.06369426751592357, | |
| "grad_norm": 0.9314427643573137, | |
| "learning_rate": 4.998026728543979e-06, | |
| "loss": 0.0627, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.06414922656960874, | |
| "grad_norm": 0.8532842969957802, | |
| "learning_rate": 4.997998237821233e-06, | |
| "loss": 0.07, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.0646041856232939, | |
| "grad_norm": 0.8003964270143441, | |
| "learning_rate": 4.997969542973352e-06, | |
| "loss": 0.0563, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.06505914467697907, | |
| "grad_norm": 1.0449654693074535, | |
| "learning_rate": 4.997940644002681e-06, | |
| "loss": 0.0705, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.06551410373066424, | |
| "grad_norm": 1.2317539206735935, | |
| "learning_rate": 4.997911540911581e-06, | |
| "loss": 0.0552, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.06596906278434941, | |
| "grad_norm": 1.0170288864286834, | |
| "learning_rate": 4.99788223370243e-06, | |
| "loss": 0.075, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.06642402183803457, | |
| "grad_norm": 2.1516221031707796, | |
| "learning_rate": 4.9978527223776245e-06, | |
| "loss": 0.1294, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.06687898089171974, | |
| "grad_norm": 0.8159636795919125, | |
| "learning_rate": 4.9978230069395735e-06, | |
| "loss": 0.0512, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.06733393994540492, | |
| "grad_norm": 1.0575473809333984, | |
| "learning_rate": 4.9977930873907065e-06, | |
| "loss": 0.0598, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.06778889899909009, | |
| "grad_norm": 1.0958109016760909, | |
| "learning_rate": 4.997762963733468e-06, | |
| "loss": 0.074, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.06824385805277525, | |
| "grad_norm": 1.047477211054204, | |
| "learning_rate": 4.997732635970321e-06, | |
| "loss": 0.0539, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.06869881710646042, | |
| "grad_norm": 1.0301191819422422, | |
| "learning_rate": 4.9977021041037425e-06, | |
| "loss": 0.0686, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.06915377616014559, | |
| "grad_norm": 1.225998339573777, | |
| "learning_rate": 4.9976713681362265e-06, | |
| "loss": 0.0859, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.06960873521383075, | |
| "grad_norm": 1.416119617095304, | |
| "learning_rate": 4.997640428070286e-06, | |
| "loss": 0.1051, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.07006369426751592, | |
| "grad_norm": 0.9227148160074169, | |
| "learning_rate": 4.99760928390845e-06, | |
| "loss": 0.0476, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.0705186533212011, | |
| "grad_norm": 0.9417296172183, | |
| "learning_rate": 4.997577935653262e-06, | |
| "loss": 0.0546, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.07097361237488627, | |
| "grad_norm": 0.7429922167271485, | |
| "learning_rate": 4.9975463833072835e-06, | |
| "loss": 0.0438, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.07142857142857142, | |
| "grad_norm": 1.1317968054046752, | |
| "learning_rate": 4.997514626873093e-06, | |
| "loss": 0.0723, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.0718835304822566, | |
| "grad_norm": 0.894309839547546, | |
| "learning_rate": 4.997482666353287e-06, | |
| "loss": 0.0484, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.07233848953594177, | |
| "grad_norm": 1.2064896460901124, | |
| "learning_rate": 4.997450501750476e-06, | |
| "loss": 0.0686, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.07279344858962693, | |
| "grad_norm": 1.0338993985106997, | |
| "learning_rate": 4.997418133067288e-06, | |
| "loss": 0.066, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0732484076433121, | |
| "grad_norm": 1.0854078217047458, | |
| "learning_rate": 4.997385560306368e-06, | |
| "loss": 0.075, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.07370336669699727, | |
| "grad_norm": 0.9955556312708298, | |
| "learning_rate": 4.997352783470379e-06, | |
| "loss": 0.0693, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.07415832575068244, | |
| "grad_norm": 1.1119344699280262, | |
| "learning_rate": 4.997319802561997e-06, | |
| "loss": 0.0687, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.0746132848043676, | |
| "grad_norm": 0.8118992710097626, | |
| "learning_rate": 4.9972866175839196e-06, | |
| "loss": 0.061, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.07506824385805277, | |
| "grad_norm": 1.0509201052861925, | |
| "learning_rate": 4.9972532285388575e-06, | |
| "loss": 0.0738, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.07552320291173795, | |
| "grad_norm": 1.1660685920656126, | |
| "learning_rate": 4.997219635429538e-06, | |
| "loss": 0.1018, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.07597816196542312, | |
| "grad_norm": 0.9894981496034668, | |
| "learning_rate": 4.997185838258709e-06, | |
| "loss": 0.0534, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.07643312101910828, | |
| "grad_norm": 0.9397553289113793, | |
| "learning_rate": 4.997151837029129e-06, | |
| "loss": 0.0527, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.07688808007279345, | |
| "grad_norm": 0.9368221512292729, | |
| "learning_rate": 4.997117631743579e-06, | |
| "loss": 0.0648, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.07734303912647862, | |
| "grad_norm": 0.898690664067523, | |
| "learning_rate": 4.997083222404852e-06, | |
| "loss": 0.0479, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.07779799818016378, | |
| "grad_norm": 0.835250569463016, | |
| "learning_rate": 4.997048609015762e-06, | |
| "loss": 0.0528, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.07825295723384895, | |
| "grad_norm": 0.9098471940978452, | |
| "learning_rate": 4.997013791579136e-06, | |
| "loss": 0.0641, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.07870791628753412, | |
| "grad_norm": 0.9538942863622895, | |
| "learning_rate": 4.996978770097819e-06, | |
| "loss": 0.0648, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.0791628753412193, | |
| "grad_norm": 0.9163372515795332, | |
| "learning_rate": 4.996943544574673e-06, | |
| "loss": 0.0682, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.07961783439490445, | |
| "grad_norm": 0.8193165634479148, | |
| "learning_rate": 4.996908115012576e-06, | |
| "loss": 0.0485, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.08007279344858963, | |
| "grad_norm": 1.0278777387100766, | |
| "learning_rate": 4.996872481414425e-06, | |
| "loss": 0.0741, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.0805277525022748, | |
| "grad_norm": 1.8323226365700802, | |
| "learning_rate": 4.9968366437831305e-06, | |
| "loss": 0.1107, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.08098271155595996, | |
| "grad_norm": 0.5562843681536768, | |
| "learning_rate": 4.99680060212162e-06, | |
| "loss": 0.0379, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.08143767060964513, | |
| "grad_norm": 0.6982410679992989, | |
| "learning_rate": 4.996764356432841e-06, | |
| "loss": 0.0576, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.0818926296633303, | |
| "grad_norm": 0.9996693552976796, | |
| "learning_rate": 4.996727906719754e-06, | |
| "loss": 0.056, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.08234758871701547, | |
| "grad_norm": 0.8092230365331524, | |
| "learning_rate": 4.9966912529853365e-06, | |
| "loss": 0.036, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.08280254777070063, | |
| "grad_norm": 0.8856317784665715, | |
| "learning_rate": 4.996654395232585e-06, | |
| "loss": 0.0546, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.0832575068243858, | |
| "grad_norm": 0.7648943084887926, | |
| "learning_rate": 4.996617333464512e-06, | |
| "loss": 0.0456, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.08371246587807098, | |
| "grad_norm": 0.8896960831413809, | |
| "learning_rate": 4.996580067684145e-06, | |
| "loss": 0.0505, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.08416742493175614, | |
| "grad_norm": 0.8819070603063018, | |
| "learning_rate": 4.996542597894528e-06, | |
| "loss": 0.0833, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.08462238398544131, | |
| "grad_norm": 0.8756856388162975, | |
| "learning_rate": 4.996504924098726e-06, | |
| "loss": 0.0641, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.08507734303912648, | |
| "grad_norm": 0.8527408544485862, | |
| "learning_rate": 4.9964670462998145e-06, | |
| "loss": 0.0553, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.08553230209281165, | |
| "grad_norm": 0.9875356023767464, | |
| "learning_rate": 4.99642896450089e-06, | |
| "loss": 0.0874, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.08598726114649681, | |
| "grad_norm": 2.0664437318649003, | |
| "learning_rate": 4.9963906787050656e-06, | |
| "loss": 0.0901, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.08644222020018198, | |
| "grad_norm": 0.772276028123917, | |
| "learning_rate": 4.996352188915467e-06, | |
| "loss": 0.0457, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.08689717925386715, | |
| "grad_norm": 1.5995533229184502, | |
| "learning_rate": 4.996313495135242e-06, | |
| "loss": 0.0902, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.08735213830755233, | |
| "grad_norm": 1.14262643514501, | |
| "learning_rate": 4.9962745973675505e-06, | |
| "loss": 0.0887, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.08780709736123748, | |
| "grad_norm": 0.653471766542576, | |
| "learning_rate": 4.996235495615572e-06, | |
| "loss": 0.0381, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.08826205641492266, | |
| "grad_norm": 1.21800497391657, | |
| "learning_rate": 4.996196189882503e-06, | |
| "loss": 0.0859, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.08871701546860783, | |
| "grad_norm": 1.2184077345088562, | |
| "learning_rate": 4.996156680171552e-06, | |
| "loss": 0.0858, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.08917197452229299, | |
| "grad_norm": 0.8525171751383268, | |
| "learning_rate": 4.996116966485951e-06, | |
| "loss": 0.0542, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.08962693357597816, | |
| "grad_norm": 1.0438941172842933, | |
| "learning_rate": 4.996077048828944e-06, | |
| "loss": 0.0735, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.09008189262966333, | |
| "grad_norm": 0.9982779135093925, | |
| "learning_rate": 4.996036927203793e-06, | |
| "loss": 0.0773, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.0905368516833485, | |
| "grad_norm": 1.5215875068980074, | |
| "learning_rate": 4.995996601613775e-06, | |
| "loss": 0.0814, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.09099181073703366, | |
| "grad_norm": 0.9525593904667519, | |
| "learning_rate": 4.9959560720621875e-06, | |
| "loss": 0.0631, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.09144676979071883, | |
| "grad_norm": 1.6658936796296464, | |
| "learning_rate": 4.995915338552341e-06, | |
| "loss": 0.0892, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.09190172884440401, | |
| "grad_norm": 1.0100426736293826, | |
| "learning_rate": 4.995874401087565e-06, | |
| "loss": 0.0618, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.09235668789808917, | |
| "grad_norm": 1.2729210933806279, | |
| "learning_rate": 4.9958332596712035e-06, | |
| "loss": 0.0808, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.09281164695177434, | |
| "grad_norm": 1.0142800844722413, | |
| "learning_rate": 4.99579191430662e-06, | |
| "loss": 0.0715, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.09326660600545951, | |
| "grad_norm": 4.237455676216414, | |
| "learning_rate": 4.995750364997192e-06, | |
| "loss": 0.062, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.09372156505914468, | |
| "grad_norm": 7.872559330750363, | |
| "learning_rate": 4.995708611746314e-06, | |
| "loss": 0.0548, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.09417652411282984, | |
| "grad_norm": 1.2028032815765721, | |
| "learning_rate": 4.995666654557399e-06, | |
| "loss": 0.0678, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.09463148316651501, | |
| "grad_norm": 0.9911372243080299, | |
| "learning_rate": 4.995624493433876e-06, | |
| "loss": 0.0728, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.09508644222020018, | |
| "grad_norm": 2.5900155398471942, | |
| "learning_rate": 4.995582128379189e-06, | |
| "loss": 0.0822, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.09554140127388536, | |
| "grad_norm": 1.4214627215980935, | |
| "learning_rate": 4.9955395593968e-06, | |
| "loss": 0.1096, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.09599636032757052, | |
| "grad_norm": 11.75678149199321, | |
| "learning_rate": 4.99549678649019e-06, | |
| "loss": 0.0579, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.09645131938125569, | |
| "grad_norm": 3.8898709501740747, | |
| "learning_rate": 4.99545380966285e-06, | |
| "loss": 0.0695, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.09690627843494086, | |
| "grad_norm": 4.099783756040842, | |
| "learning_rate": 4.995410628918294e-06, | |
| "loss": 0.0711, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.09736123748862602, | |
| "grad_norm": 3.9495811570453445, | |
| "learning_rate": 4.995367244260052e-06, | |
| "loss": 0.0871, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.09781619654231119, | |
| "grad_norm": 0.7508672950199423, | |
| "learning_rate": 4.995323655691667e-06, | |
| "loss": 0.0369, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.09827115559599636, | |
| "grad_norm": 1.3368080010868653, | |
| "learning_rate": 4.995279863216702e-06, | |
| "loss": 0.0752, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.09872611464968153, | |
| "grad_norm": 0.8823975012529762, | |
| "learning_rate": 4.995235866838735e-06, | |
| "loss": 0.0695, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.09918107370336669, | |
| "grad_norm": 0.8099194866460178, | |
| "learning_rate": 4.995191666561361e-06, | |
| "loss": 0.0561, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.09963603275705187, | |
| "grad_norm": 0.6772333028080019, | |
| "learning_rate": 4.995147262388192e-06, | |
| "loss": 0.0441, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.10009099181073704, | |
| "grad_norm": 0.9342067677666205, | |
| "learning_rate": 4.995102654322858e-06, | |
| "loss": 0.0613, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.1005459508644222, | |
| "grad_norm": 0.7594825525973931, | |
| "learning_rate": 4.995057842369002e-06, | |
| "loss": 0.0349, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.10100090991810737, | |
| "grad_norm": 0.8418616902443392, | |
| "learning_rate": 4.995012826530287e-06, | |
| "loss": 0.0693, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.10145586897179254, | |
| "grad_norm": 1.4826966236644097, | |
| "learning_rate": 4.99496760681039e-06, | |
| "loss": 0.0971, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.10191082802547771, | |
| "grad_norm": 1.3244278108579797, | |
| "learning_rate": 4.994922183213009e-06, | |
| "loss": 0.0963, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.10236578707916287, | |
| "grad_norm": 0.5464933779715734, | |
| "learning_rate": 4.9948765557418535e-06, | |
| "loss": 0.0357, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.10282074613284804, | |
| "grad_norm": 1.1325271027713097, | |
| "learning_rate": 4.994830724400653e-06, | |
| "loss": 0.0756, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.10327570518653321, | |
| "grad_norm": 0.7823528354045581, | |
| "learning_rate": 4.994784689193151e-06, | |
| "loss": 0.0609, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.10373066424021839, | |
| "grad_norm": 0.6599438687201707, | |
| "learning_rate": 4.994738450123111e-06, | |
| "loss": 0.046, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.10418562329390355, | |
| "grad_norm": 0.9666854434475629, | |
| "learning_rate": 4.994692007194312e-06, | |
| "loss": 0.0743, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.10464058234758872, | |
| "grad_norm": 0.7151615241659314, | |
| "learning_rate": 4.994645360410547e-06, | |
| "loss": 0.0583, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.10509554140127389, | |
| "grad_norm": 0.7773674174360427, | |
| "learning_rate": 4.99459850977563e-06, | |
| "loss": 0.0618, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.10555050045495905, | |
| "grad_norm": 0.8418236580272198, | |
| "learning_rate": 4.994551455293388e-06, | |
| "loss": 0.046, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.10600545950864422, | |
| "grad_norm": 0.9714541810445473, | |
| "learning_rate": 4.9945041969676654e-06, | |
| "loss": 0.0634, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.10646041856232939, | |
| "grad_norm": 1.0109494927023708, | |
| "learning_rate": 4.994456734802325e-06, | |
| "loss": 0.0551, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.10691537761601456, | |
| "grad_norm": 0.714933750259254, | |
| "learning_rate": 4.994409068801247e-06, | |
| "loss": 0.0593, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.10737033666969972, | |
| "grad_norm": 1.998280137227604, | |
| "learning_rate": 4.994361198968323e-06, | |
| "loss": 0.0632, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.1078252957233849, | |
| "grad_norm": 1.2708633718893245, | |
| "learning_rate": 4.994313125307466e-06, | |
| "loss": 0.0909, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.10828025477707007, | |
| "grad_norm": 0.7903038049799667, | |
| "learning_rate": 4.994264847822605e-06, | |
| "loss": 0.0579, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.10873521383075523, | |
| "grad_norm": 0.7076795429019287, | |
| "learning_rate": 4.994216366517684e-06, | |
| "loss": 0.0419, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.1091901728844404, | |
| "grad_norm": 0.9078047157633448, | |
| "learning_rate": 4.994167681396667e-06, | |
| "loss": 0.0631, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.10964513193812557, | |
| "grad_norm": 1.122407784822992, | |
| "learning_rate": 4.994118792463529e-06, | |
| "loss": 0.0771, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.11010009099181074, | |
| "grad_norm": 1.3544092697698327, | |
| "learning_rate": 4.994069699722267e-06, | |
| "loss": 0.1034, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1105550500454959, | |
| "grad_norm": 1.0823051140179736, | |
| "learning_rate": 4.994020403176893e-06, | |
| "loss": 0.0737, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.11101000909918107, | |
| "grad_norm": 1.4097890081473512, | |
| "learning_rate": 4.9939709028314345e-06, | |
| "loss": 0.0882, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.11146496815286625, | |
| "grad_norm": 3.3536954759034883, | |
| "learning_rate": 4.993921198689935e-06, | |
| "loss": 0.0448, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.11191992720655142, | |
| "grad_norm": 0.8141751078988797, | |
| "learning_rate": 4.993871290756459e-06, | |
| "loss": 0.053, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.11237488626023658, | |
| "grad_norm": 0.7556149519633891, | |
| "learning_rate": 4.9938211790350835e-06, | |
| "loss": 0.053, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.11282984531392175, | |
| "grad_norm": 1.028865867099704, | |
| "learning_rate": 4.993770863529902e-06, | |
| "loss": 0.068, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.11328480436760692, | |
| "grad_norm": 0.8709129466992336, | |
| "learning_rate": 4.993720344245029e-06, | |
| "loss": 0.0533, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.11373976342129208, | |
| "grad_norm": 0.8992015471183187, | |
| "learning_rate": 4.99366962118459e-06, | |
| "loss": 0.0589, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.11419472247497725, | |
| "grad_norm": 0.8276539094244998, | |
| "learning_rate": 4.99361869435273e-06, | |
| "loss": 0.0537, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.11464968152866242, | |
| "grad_norm": 1.1164497627583263, | |
| "learning_rate": 4.993567563753613e-06, | |
| "loss": 0.0627, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1151046405823476, | |
| "grad_norm": 1.049662917063972, | |
| "learning_rate": 4.993516229391414e-06, | |
| "loss": 0.0708, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.11555959963603275, | |
| "grad_norm": 0.8007012653446455, | |
| "learning_rate": 4.993464691270331e-06, | |
| "loss": 0.036, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.11601455868971793, | |
| "grad_norm": 1.0491396902879628, | |
| "learning_rate": 4.993412949394572e-06, | |
| "loss": 0.0564, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.1164695177434031, | |
| "grad_norm": 1.0461746265014504, | |
| "learning_rate": 4.993361003768369e-06, | |
| "loss": 0.0547, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.11692447679708826, | |
| "grad_norm": 0.7167785855145479, | |
| "learning_rate": 4.993308854395963e-06, | |
| "loss": 0.0543, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.11737943585077343, | |
| "grad_norm": 1.7377303264454829, | |
| "learning_rate": 4.993256501281618e-06, | |
| "loss": 0.0385, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.1178343949044586, | |
| "grad_norm": 0.9843734560261626, | |
| "learning_rate": 4.993203944429611e-06, | |
| "loss": 0.0793, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.11828935395814377, | |
| "grad_norm": 0.7687699158665893, | |
| "learning_rate": 4.993151183844236e-06, | |
| "loss": 0.0554, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.11874431301182893, | |
| "grad_norm": 0.8273770606193852, | |
| "learning_rate": 4.9930982195298065e-06, | |
| "loss": 0.0485, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.1191992720655141, | |
| "grad_norm": 0.8576587444889947, | |
| "learning_rate": 4.9930450514906484e-06, | |
| "loss": 0.0668, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.11965423111919928, | |
| "grad_norm": 0.8611188803026584, | |
| "learning_rate": 4.9929916797311075e-06, | |
| "loss": 0.0511, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.12010919017288443, | |
| "grad_norm": 1.5171570240611278, | |
| "learning_rate": 4.992938104255545e-06, | |
| "loss": 0.105, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1205641492265696, | |
| "grad_norm": 0.9293458324727663, | |
| "learning_rate": 4.992884325068339e-06, | |
| "loss": 0.0519, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.12101910828025478, | |
| "grad_norm": 0.854174247687424, | |
| "learning_rate": 4.992830342173882e-06, | |
| "loss": 0.0739, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.12147406733393995, | |
| "grad_norm": 1.185366954699452, | |
| "learning_rate": 4.992776155576589e-06, | |
| "loss": 0.088, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.12192902638762511, | |
| "grad_norm": 0.8266584460330494, | |
| "learning_rate": 4.992721765280884e-06, | |
| "loss": 0.0766, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.12238398544131028, | |
| "grad_norm": 1.4759867391060453, | |
| "learning_rate": 4.992667171291215e-06, | |
| "loss": 0.0935, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.12283894449499545, | |
| "grad_norm": 0.7694789949011869, | |
| "learning_rate": 4.992612373612042e-06, | |
| "loss": 0.0444, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.12329390354868063, | |
| "grad_norm": 1.3788521475642475, | |
| "learning_rate": 4.99255737224784e-06, | |
| "loss": 0.0686, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.12374886260236578, | |
| "grad_norm": 0.6815172893718315, | |
| "learning_rate": 4.9925021672031075e-06, | |
| "loss": 0.0597, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.12420382165605096, | |
| "grad_norm": 1.0361010622054052, | |
| "learning_rate": 4.992446758482353e-06, | |
| "loss": 0.0577, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.12465878070973613, | |
| "grad_norm": 1.3644605952379871, | |
| "learning_rate": 4.992391146090106e-06, | |
| "loss": 0.1058, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.1251137397634213, | |
| "grad_norm": 0.6194563221594529, | |
| "learning_rate": 4.99233533003091e-06, | |
| "loss": 0.0481, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.12556869881710647, | |
| "grad_norm": 0.8571229878526591, | |
| "learning_rate": 4.992279310309326e-06, | |
| "loss": 0.0811, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.12602365787079162, | |
| "grad_norm": 1.543151015857885, | |
| "learning_rate": 4.9922230869299316e-06, | |
| "loss": 0.1184, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.1264786169244768, | |
| "grad_norm": 0.8807612446920655, | |
| "learning_rate": 4.992166659897321e-06, | |
| "loss": 0.0629, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.12693357597816196, | |
| "grad_norm": 0.754844132552438, | |
| "learning_rate": 4.992110029216106e-06, | |
| "loss": 0.0488, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.12738853503184713, | |
| "grad_norm": 0.8268029175550342, | |
| "learning_rate": 4.992053194890914e-06, | |
| "loss": 0.0463, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.1278434940855323, | |
| "grad_norm": 3.022212928870756, | |
| "learning_rate": 4.991996156926388e-06, | |
| "loss": 0.0622, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.12829845313921748, | |
| "grad_norm": 11.09472751599815, | |
| "learning_rate": 4.9919389153271904e-06, | |
| "loss": 0.0631, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.12875341219290265, | |
| "grad_norm": 5.099079327460766, | |
| "learning_rate": 4.991881470097998e-06, | |
| "loss": 0.0666, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.1292083712465878, | |
| "grad_norm": 27.36348462792037, | |
| "learning_rate": 4.991823821243505e-06, | |
| "loss": 0.0601, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.12966333030027297, | |
| "grad_norm": 4.3922238380370375, | |
| "learning_rate": 4.991765968768422e-06, | |
| "loss": 0.0801, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.13011828935395814, | |
| "grad_norm": 2.2745288954264855, | |
| "learning_rate": 4.991707912677477e-06, | |
| "loss": 0.0461, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1305732484076433, | |
| "grad_norm": 4.103515429733392, | |
| "learning_rate": 4.991649652975414e-06, | |
| "loss": 0.0464, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.13102820746132848, | |
| "grad_norm": 8.218943562506432, | |
| "learning_rate": 4.991591189666994e-06, | |
| "loss": 0.048, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.13148316651501366, | |
| "grad_norm": 2.4966842175341917, | |
| "learning_rate": 4.991532522756993e-06, | |
| "loss": 0.0635, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.13193812556869883, | |
| "grad_norm": 2.714522360598833, | |
| "learning_rate": 4.991473652250207e-06, | |
| "loss": 0.0416, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.13239308462238397, | |
| "grad_norm": 0.7496525159208725, | |
| "learning_rate": 4.991414578151445e-06, | |
| "loss": 0.0558, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.13284804367606914, | |
| "grad_norm": 1.9010591397052237, | |
| "learning_rate": 4.991355300465535e-06, | |
| "loss": 0.1319, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.13330300272975432, | |
| "grad_norm": 1.0962153747662344, | |
| "learning_rate": 4.99129581919732e-06, | |
| "loss": 0.0597, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.1337579617834395, | |
| "grad_norm": 0.682643165278525, | |
| "learning_rate": 4.9912361343516616e-06, | |
| "loss": 0.0392, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.13421292083712466, | |
| "grad_norm": 0.8036831706718465, | |
| "learning_rate": 4.991176245933437e-06, | |
| "loss": 0.0572, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.13466787989080983, | |
| "grad_norm": 0.799489770852785, | |
| "learning_rate": 4.9911161539475385e-06, | |
| "loss": 0.0533, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.135122838944495, | |
| "grad_norm": 1.5836938564798495, | |
| "learning_rate": 4.991055858398879e-06, | |
| "loss": 0.0875, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.13557779799818018, | |
| "grad_norm": 1.1468269055025655, | |
| "learning_rate": 4.990995359292384e-06, | |
| "loss": 0.0843, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.13603275705186532, | |
| "grad_norm": 1.025610687153708, | |
| "learning_rate": 4.990934656632997e-06, | |
| "loss": 0.0767, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.1364877161055505, | |
| "grad_norm": 1.135419351562697, | |
| "learning_rate": 4.990873750425679e-06, | |
| "loss": 0.0521, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.13694267515923567, | |
| "grad_norm": 0.7857019349684609, | |
| "learning_rate": 4.990812640675406e-06, | |
| "loss": 0.0577, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.13739763421292084, | |
| "grad_norm": 0.6543121694694685, | |
| "learning_rate": 4.990751327387174e-06, | |
| "loss": 0.0408, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.137852593266606, | |
| "grad_norm": 0.9867579368206506, | |
| "learning_rate": 4.99068981056599e-06, | |
| "loss": 0.0644, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.13830755232029118, | |
| "grad_norm": 0.9387206564680207, | |
| "learning_rate": 4.990628090216885e-06, | |
| "loss": 0.0725, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.13876251137397635, | |
| "grad_norm": 0.6895906486970027, | |
| "learning_rate": 4.990566166344898e-06, | |
| "loss": 0.0444, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1392174704276615, | |
| "grad_norm": 0.8627162803317235, | |
| "learning_rate": 4.990504038955092e-06, | |
| "loss": 0.0639, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.13967242948134667, | |
| "grad_norm": 0.9832854011829437, | |
| "learning_rate": 4.990441708052542e-06, | |
| "loss": 0.067, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.14012738853503184, | |
| "grad_norm": 0.6828895359949346, | |
| "learning_rate": 4.9903791736423435e-06, | |
| "loss": 0.0511, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.14058234758871702, | |
| "grad_norm": 0.949508820368659, | |
| "learning_rate": 4.9903164357296044e-06, | |
| "loss": 0.0586, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.1410373066424022, | |
| "grad_norm": 0.8262401805570133, | |
| "learning_rate": 4.990253494319453e-06, | |
| "loss": 0.072, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.14149226569608736, | |
| "grad_norm": 0.7329455864605506, | |
| "learning_rate": 4.990190349417032e-06, | |
| "loss": 0.0659, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.14194722474977253, | |
| "grad_norm": 1.008005243411958, | |
| "learning_rate": 4.990127001027501e-06, | |
| "loss": 0.0682, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.14240218380345768, | |
| "grad_norm": 1.3159038760119786, | |
| "learning_rate": 4.990063449156037e-06, | |
| "loss": 0.0485, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.14285714285714285, | |
| "grad_norm": 1.053132819530921, | |
| "learning_rate": 4.989999693807832e-06, | |
| "loss": 0.0736, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.14331210191082802, | |
| "grad_norm": 1.2097384821970267, | |
| "learning_rate": 4.989935734988098e-06, | |
| "loss": 0.0752, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.1437670609645132, | |
| "grad_norm": 0.8883071525106219, | |
| "learning_rate": 4.98987157270206e-06, | |
| "loss": 0.0614, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.14422202001819837, | |
| "grad_norm": 1.3457063090752772, | |
| "learning_rate": 4.989807206954961e-06, | |
| "loss": 0.0896, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.14467697907188354, | |
| "grad_norm": 0.6077627024555071, | |
| "learning_rate": 4.9897426377520605e-06, | |
| "loss": 0.0426, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.1451319381255687, | |
| "grad_norm": 1.4177858466419022, | |
| "learning_rate": 4.989677865098636e-06, | |
| "loss": 0.0922, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.14558689717925385, | |
| "grad_norm": 0.5838535924114719, | |
| "learning_rate": 4.989612888999978e-06, | |
| "loss": 0.04, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.14604185623293903, | |
| "grad_norm": 0.7991836602542821, | |
| "learning_rate": 4.9895477094614e-06, | |
| "loss": 0.0644, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.1464968152866242, | |
| "grad_norm": 0.6309987592236359, | |
| "learning_rate": 4.989482326488225e-06, | |
| "loss": 0.0457, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.14695177434030937, | |
| "grad_norm": 0.850157804894001, | |
| "learning_rate": 4.989416740085796e-06, | |
| "loss": 0.0706, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.14740673339399454, | |
| "grad_norm": 0.8703332109039406, | |
| "learning_rate": 4.9893509502594735e-06, | |
| "loss": 0.0503, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.14786169244767972, | |
| "grad_norm": 0.9357603198363387, | |
| "learning_rate": 4.9892849570146335e-06, | |
| "loss": 0.0799, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.1483166515013649, | |
| "grad_norm": 0.9508555727006773, | |
| "learning_rate": 4.989218760356668e-06, | |
| "loss": 0.0703, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.14877161055505003, | |
| "grad_norm": 0.8548982254979315, | |
| "learning_rate": 4.989152360290987e-06, | |
| "loss": 0.0706, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.1492265696087352, | |
| "grad_norm": 1.1548758627037845, | |
| "learning_rate": 4.989085756823015e-06, | |
| "loss": 0.0868, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.14968152866242038, | |
| "grad_norm": 0.872011841531817, | |
| "learning_rate": 4.989018949958197e-06, | |
| "loss": 0.0642, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.15013648771610555, | |
| "grad_norm": 0.7767447334589991, | |
| "learning_rate": 4.98895193970199e-06, | |
| "loss": 0.0428, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.15059144676979072, | |
| "grad_norm": 0.9215786343037755, | |
| "learning_rate": 4.9888847260598705e-06, | |
| "loss": 0.0652, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.1510464058234759, | |
| "grad_norm": 1.0293746869379716, | |
| "learning_rate": 4.98881730903733e-06, | |
| "loss": 0.0768, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.15150136487716107, | |
| "grad_norm": 1.2190824076232663, | |
| "learning_rate": 4.98874968863988e-06, | |
| "loss": 0.0746, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.15195632393084624, | |
| "grad_norm": 0.8899729802614444, | |
| "learning_rate": 4.988681864873044e-06, | |
| "loss": 0.0638, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.15241128298453138, | |
| "grad_norm": 0.8009499929718743, | |
| "learning_rate": 4.988613837742364e-06, | |
| "loss": 0.0556, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.15286624203821655, | |
| "grad_norm": 1.0942561304100769, | |
| "learning_rate": 4.9885456072534015e-06, | |
| "loss": 0.0685, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.15332120109190173, | |
| "grad_norm": 1.1210686024600067, | |
| "learning_rate": 4.988477173411728e-06, | |
| "loss": 0.0649, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.1537761601455869, | |
| "grad_norm": 0.713128381997935, | |
| "learning_rate": 4.988408536222939e-06, | |
| "loss": 0.043, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.15423111919927207, | |
| "grad_norm": 0.8820810335281195, | |
| "learning_rate": 4.9883396956926416e-06, | |
| "loss": 0.0545, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.15468607825295724, | |
| "grad_norm": 0.7198251112806523, | |
| "learning_rate": 4.988270651826462e-06, | |
| "loss": 0.0419, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.15514103730664242, | |
| "grad_norm": 0.9319745452557298, | |
| "learning_rate": 4.988201404630041e-06, | |
| "loss": 0.0556, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.15559599636032756, | |
| "grad_norm": 0.7744733545189804, | |
| "learning_rate": 4.988131954109038e-06, | |
| "loss": 0.0566, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.15605095541401273, | |
| "grad_norm": 1.2609547822192495, | |
| "learning_rate": 4.988062300269128e-06, | |
| "loss": 0.0931, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.1565059144676979, | |
| "grad_norm": 1.0356035457639365, | |
| "learning_rate": 4.987992443116003e-06, | |
| "loss": 0.0592, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.15696087352138308, | |
| "grad_norm": 0.8613107982126194, | |
| "learning_rate": 4.987922382655372e-06, | |
| "loss": 0.0599, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.15741583257506825, | |
| "grad_norm": 1.2274429381178749, | |
| "learning_rate": 4.987852118892958e-06, | |
| "loss": 0.104, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.15787079162875342, | |
| "grad_norm": 0.8982827327342306, | |
| "learning_rate": 4.987781651834503e-06, | |
| "loss": 0.0777, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.1583257506824386, | |
| "grad_norm": 1.124267218302162, | |
| "learning_rate": 4.987710981485768e-06, | |
| "loss": 0.0815, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.15878070973612374, | |
| "grad_norm": 1.1417083606361687, | |
| "learning_rate": 4.987640107852525e-06, | |
| "loss": 0.0968, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.1592356687898089, | |
| "grad_norm": 0.7137928465125194, | |
| "learning_rate": 4.987569030940567e-06, | |
| "loss": 0.0525, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.15969062784349408, | |
| "grad_norm": 0.8074447940975472, | |
| "learning_rate": 4.987497750755702e-06, | |
| "loss": 0.0478, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.16014558689717925, | |
| "grad_norm": 1.320795972993318, | |
| "learning_rate": 4.987426267303753e-06, | |
| "loss": 0.0814, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.16060054595086443, | |
| "grad_norm": 0.956458465296858, | |
| "learning_rate": 4.987354580590563e-06, | |
| "loss": 0.0728, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.1610555050045496, | |
| "grad_norm": 0.9487388071568301, | |
| "learning_rate": 4.987282690621991e-06, | |
| "loss": 0.0778, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.16151046405823477, | |
| "grad_norm": 0.8111568286998416, | |
| "learning_rate": 4.987210597403907e-06, | |
| "loss": 0.0634, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.16196542311191992, | |
| "grad_norm": 0.9291291865293426, | |
| "learning_rate": 4.987138300942208e-06, | |
| "loss": 0.057, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.1624203821656051, | |
| "grad_norm": 0.7796831533037398, | |
| "learning_rate": 4.987065801242798e-06, | |
| "loss": 0.0591, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.16287534121929026, | |
| "grad_norm": 1.0091637666603208, | |
| "learning_rate": 4.986993098311601e-06, | |
| "loss": 0.0712, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.16333030027297543, | |
| "grad_norm": 0.9599752405823201, | |
| "learning_rate": 4.986920192154561e-06, | |
| "loss": 0.0712, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.1637852593266606, | |
| "grad_norm": 0.6975593533750986, | |
| "learning_rate": 4.986847082777632e-06, | |
| "loss": 0.0489, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.16424021838034578, | |
| "grad_norm": 0.8407792898194115, | |
| "learning_rate": 4.986773770186791e-06, | |
| "loss": 0.0687, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.16469517743403095, | |
| "grad_norm": 1.16032280422667, | |
| "learning_rate": 4.986700254388027e-06, | |
| "loss": 0.0814, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.1651501364877161, | |
| "grad_norm": 0.6789989236352713, | |
| "learning_rate": 4.986626535387349e-06, | |
| "loss": 0.0502, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.16560509554140126, | |
| "grad_norm": 0.8858819178004838, | |
| "learning_rate": 4.9865526131907795e-06, | |
| "loss": 0.0584, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.16606005459508644, | |
| "grad_norm": 1.0159257224317502, | |
| "learning_rate": 4.9864784878043595e-06, | |
| "loss": 0.0828, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.1665150136487716, | |
| "grad_norm": 1.1632391007958518, | |
| "learning_rate": 4.986404159234146e-06, | |
| "loss": 0.0693, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.16696997270245678, | |
| "grad_norm": 0.7286212082146628, | |
| "learning_rate": 4.986329627486213e-06, | |
| "loss": 0.048, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.16742493175614195, | |
| "grad_norm": 1.1675091585135315, | |
| "learning_rate": 4.986254892566652e-06, | |
| "loss": 0.0831, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.16787989080982713, | |
| "grad_norm": 0.7791126867293955, | |
| "learning_rate": 4.9861799544815684e-06, | |
| "loss": 0.0511, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.16833484986351227, | |
| "grad_norm": 0.8594476885535768, | |
| "learning_rate": 4.986104813237086e-06, | |
| "loss": 0.0605, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.16878980891719744, | |
| "grad_norm": 0.8510456749795352, | |
| "learning_rate": 4.986029468839346e-06, | |
| "loss": 0.0568, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.16924476797088261, | |
| "grad_norm": 1.1617139473891909, | |
| "learning_rate": 4.985953921294505e-06, | |
| "loss": 0.09, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.1696997270245678, | |
| "grad_norm": 0.6957569576277562, | |
| "learning_rate": 4.985878170608736e-06, | |
| "loss": 0.038, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.17015468607825296, | |
| "grad_norm": 0.8584263131532073, | |
| "learning_rate": 4.985802216788228e-06, | |
| "loss": 0.0517, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.17060964513193813, | |
| "grad_norm": 0.9366771679720911, | |
| "learning_rate": 4.98572605983919e-06, | |
| "loss": 0.063, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.1710646041856233, | |
| "grad_norm": 0.5935251092125957, | |
| "learning_rate": 4.985649699767842e-06, | |
| "loss": 0.0399, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.17151956323930848, | |
| "grad_norm": 0.7556873935071919, | |
| "learning_rate": 4.985573136580427e-06, | |
| "loss": 0.0606, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.17197452229299362, | |
| "grad_norm": 0.723085424895094, | |
| "learning_rate": 4.9854963702832e-06, | |
| "loss": 0.0498, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.1724294813466788, | |
| "grad_norm": 0.9057911616547558, | |
| "learning_rate": 4.985419400882433e-06, | |
| "loss": 0.0733, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.17288444040036396, | |
| "grad_norm": 1.0911724774245748, | |
| "learning_rate": 4.985342228384418e-06, | |
| "loss": 0.0974, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.17333939945404914, | |
| "grad_norm": 15.867955316807802, | |
| "learning_rate": 4.985264852795459e-06, | |
| "loss": 0.4597, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.1737943585077343, | |
| "grad_norm": 0.8242169703714594, | |
| "learning_rate": 4.98518727412188e-06, | |
| "loss": 0.0592, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.17424931756141948, | |
| "grad_norm": 1.189476180626615, | |
| "learning_rate": 4.98510949237002e-06, | |
| "loss": 0.0871, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.17470427661510465, | |
| "grad_norm": 0.9035387689173863, | |
| "learning_rate": 4.985031507546234e-06, | |
| "loss": 0.0659, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.1751592356687898, | |
| "grad_norm": 1.5548450607275692, | |
| "learning_rate": 4.984953319656896e-06, | |
| "loss": 0.102, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.17561419472247497, | |
| "grad_norm": 0.9148861743530409, | |
| "learning_rate": 4.984874928708395e-06, | |
| "loss": 0.0621, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.17606915377616014, | |
| "grad_norm": 1.0088623446062757, | |
| "learning_rate": 4.984796334707136e-06, | |
| "loss": 0.0801, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.17652411282984531, | |
| "grad_norm": 7.099087459170151, | |
| "learning_rate": 4.984717537659542e-06, | |
| "loss": 0.1139, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.1769790718835305, | |
| "grad_norm": 0.6271204554143699, | |
| "learning_rate": 4.984638537572052e-06, | |
| "loss": 0.0362, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.17743403093721566, | |
| "grad_norm": 0.9099126199173307, | |
| "learning_rate": 4.984559334451121e-06, | |
| "loss": 0.0589, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.17788898999090083, | |
| "grad_norm": 0.9635928903258919, | |
| "learning_rate": 4.984479928303221e-06, | |
| "loss": 0.0485, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.17834394904458598, | |
| "grad_norm": 0.8684293064054923, | |
| "learning_rate": 4.984400319134841e-06, | |
| "loss": 0.0488, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.17879890809827115, | |
| "grad_norm": 1.490825595774446, | |
| "learning_rate": 4.984320506952487e-06, | |
| "loss": 0.1164, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.17925386715195632, | |
| "grad_norm": 1.0210666975638372, | |
| "learning_rate": 4.9842404917626796e-06, | |
| "loss": 0.0765, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.1797088262056415, | |
| "grad_norm": 0.7827897024774737, | |
| "learning_rate": 4.984160273571959e-06, | |
| "loss": 0.0627, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.18016378525932666, | |
| "grad_norm": 0.9460976796008799, | |
| "learning_rate": 4.9840798523868785e-06, | |
| "loss": 0.0802, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.18061874431301184, | |
| "grad_norm": 0.6974747481172566, | |
| "learning_rate": 4.983999228214011e-06, | |
| "loss": 0.0483, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.181073703366697, | |
| "grad_norm": 0.7442577439773002, | |
| "learning_rate": 4.983918401059943e-06, | |
| "loss": 0.0501, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.18152866242038215, | |
| "grad_norm": 1.001863981150214, | |
| "learning_rate": 4.983837370931282e-06, | |
| "loss": 0.0866, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.18198362147406733, | |
| "grad_norm": 1.258993794296855, | |
| "learning_rate": 4.983756137834647e-06, | |
| "loss": 0.1164, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.1824385805277525, | |
| "grad_norm": 1.1296307149258726, | |
| "learning_rate": 4.9836747017766765e-06, | |
| "loss": 0.0698, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.18289353958143767, | |
| "grad_norm": 0.9299919208142283, | |
| "learning_rate": 4.983593062764027e-06, | |
| "loss": 0.0767, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.18334849863512284, | |
| "grad_norm": 1.6483380962062835, | |
| "learning_rate": 4.983511220803367e-06, | |
| "loss": 0.0982, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.18380345768880801, | |
| "grad_norm": 0.7951232146562915, | |
| "learning_rate": 4.983429175901386e-06, | |
| "loss": 0.0621, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.1842584167424932, | |
| "grad_norm": 0.7346583458526271, | |
| "learning_rate": 4.983346928064788e-06, | |
| "loss": 0.0485, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.18471337579617833, | |
| "grad_norm": 0.8488964995265393, | |
| "learning_rate": 4.9832644773002935e-06, | |
| "loss": 0.0697, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.1851683348498635, | |
| "grad_norm": 0.637978257841365, | |
| "learning_rate": 4.98318182361464e-06, | |
| "loss": 0.0578, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.18562329390354868, | |
| "grad_norm": 1.006460769017827, | |
| "learning_rate": 4.9830989670145825e-06, | |
| "loss": 0.0741, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.18607825295723385, | |
| "grad_norm": 1.0063850758607982, | |
| "learning_rate": 4.9830159075068905e-06, | |
| "loss": 0.0698, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.18653321201091902, | |
| "grad_norm": 0.9365632618002147, | |
| "learning_rate": 4.9829326450983514e-06, | |
| "loss": 0.0779, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.1869881710646042, | |
| "grad_norm": 0.8773564274313461, | |
| "learning_rate": 4.98284917979577e-06, | |
| "loss": 0.0608, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.18744313011828936, | |
| "grad_norm": 0.9057984183185465, | |
| "learning_rate": 4.9827655116059656e-06, | |
| "loss": 0.0639, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.18789808917197454, | |
| "grad_norm": 0.8657789325497686, | |
| "learning_rate": 4.9826816405357755e-06, | |
| "loss": 0.0749, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.18835304822565968, | |
| "grad_norm": 0.5817294435867961, | |
| "learning_rate": 4.982597566592054e-06, | |
| "loss": 0.0353, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.18880800727934485, | |
| "grad_norm": 1.2277963790590036, | |
| "learning_rate": 4.982513289781671e-06, | |
| "loss": 0.091, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.18926296633303002, | |
| "grad_norm": 0.7616764372047586, | |
| "learning_rate": 4.982428810111512e-06, | |
| "loss": 0.0597, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.1897179253867152, | |
| "grad_norm": 0.710019161677026, | |
| "learning_rate": 4.9823441275884814e-06, | |
| "loss": 0.0535, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.19017288444040037, | |
| "grad_norm": 1.1202371935797844, | |
| "learning_rate": 4.982259242219499e-06, | |
| "loss": 0.0643, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.19062784349408554, | |
| "grad_norm": 0.6803190221634923, | |
| "learning_rate": 4.9821741540115006e-06, | |
| "loss": 0.0483, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.1910828025477707, | |
| "grad_norm": 0.8014131027464055, | |
| "learning_rate": 4.982088862971441e-06, | |
| "loss": 0.0703, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.19153776160145586, | |
| "grad_norm": 0.960552266983122, | |
| "learning_rate": 4.982003369106287e-06, | |
| "loss": 0.0709, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.19199272065514103, | |
| "grad_norm": 0.6179685927519944, | |
| "learning_rate": 4.981917672423028e-06, | |
| "loss": 0.0407, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.1924476797088262, | |
| "grad_norm": 0.9538296833436659, | |
| "learning_rate": 4.981831772928664e-06, | |
| "loss": 0.0681, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.19290263876251137, | |
| "grad_norm": 1.076872796407403, | |
| "learning_rate": 4.981745670630216e-06, | |
| "loss": 0.0918, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.19335759781619655, | |
| "grad_norm": 0.8486267027177018, | |
| "learning_rate": 4.981659365534718e-06, | |
| "loss": 0.081, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.19381255686988172, | |
| "grad_norm": 1.2668354345440433, | |
| "learning_rate": 4.981572857649225e-06, | |
| "loss": 0.0855, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.1942675159235669, | |
| "grad_norm": 0.785685618330662, | |
| "learning_rate": 4.981486146980804e-06, | |
| "loss": 0.0525, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.19472247497725204, | |
| "grad_norm": 0.9012661112199176, | |
| "learning_rate": 4.9813992335365415e-06, | |
| "loss": 0.0616, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.1951774340309372, | |
| "grad_norm": 0.9140326707870835, | |
| "learning_rate": 4.98131211732354e-06, | |
| "loss": 0.0742, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.19563239308462238, | |
| "grad_norm": 0.8802115121731895, | |
| "learning_rate": 4.981224798348917e-06, | |
| "loss": 0.0543, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.19608735213830755, | |
| "grad_norm": 1.2263655680320666, | |
| "learning_rate": 4.981137276619809e-06, | |
| "loss": 0.1, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.19654231119199272, | |
| "grad_norm": 0.7179258520773776, | |
| "learning_rate": 4.9810495521433675e-06, | |
| "loss": 0.0563, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.1969972702456779, | |
| "grad_norm": 1.2006165727982114, | |
| "learning_rate": 4.9809616249267616e-06, | |
| "loss": 0.0919, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.19745222929936307, | |
| "grad_norm": 1.0426641838922892, | |
| "learning_rate": 4.980873494977174e-06, | |
| "loss": 0.0845, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.1979071883530482, | |
| "grad_norm": 0.8009974020959663, | |
| "learning_rate": 4.98078516230181e-06, | |
| "loss": 0.0495, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.19836214740673339, | |
| "grad_norm": 0.8146116166212912, | |
| "learning_rate": 4.980696626907884e-06, | |
| "loss": 0.0656, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.19881710646041856, | |
| "grad_norm": 0.8146964454257942, | |
| "learning_rate": 4.980607888802633e-06, | |
| "loss": 0.0717, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.19927206551410373, | |
| "grad_norm": 0.8473418815819729, | |
| "learning_rate": 4.980518947993307e-06, | |
| "loss": 0.0701, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.1997270245677889, | |
| "grad_norm": 0.8132123262524923, | |
| "learning_rate": 4.980429804487176e-06, | |
| "loss": 0.0657, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.20018198362147407, | |
| "grad_norm": 0.7631308196097977, | |
| "learning_rate": 4.980340458291521e-06, | |
| "loss": 0.0519, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.20063694267515925, | |
| "grad_norm": 0.7710009886187632, | |
| "learning_rate": 4.980250909413646e-06, | |
| "loss": 0.0668, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.2010919017288444, | |
| "grad_norm": 0.8960590823111618, | |
| "learning_rate": 4.980161157860867e-06, | |
| "loss": 0.066, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.20154686078252956, | |
| "grad_norm": 1.0148659081855533, | |
| "learning_rate": 4.980071203640519e-06, | |
| "loss": 0.0666, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.20200181983621474, | |
| "grad_norm": 0.6157365971883945, | |
| "learning_rate": 4.979981046759952e-06, | |
| "loss": 0.0441, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.2024567788898999, | |
| "grad_norm": 0.8862364575439057, | |
| "learning_rate": 4.979890687226533e-06, | |
| "loss": 0.0638, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.20291173794358508, | |
| "grad_norm": 1.051789940808801, | |
| "learning_rate": 4.979800125047647e-06, | |
| "loss": 0.0571, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.20336669699727025, | |
| "grad_norm": 0.8963335794848035, | |
| "learning_rate": 4.979709360230692e-06, | |
| "loss": 0.0706, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.20382165605095542, | |
| "grad_norm": 0.8639092050645877, | |
| "learning_rate": 4.979618392783087e-06, | |
| "loss": 0.0535, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.20427661510464057, | |
| "grad_norm": 0.630704913013139, | |
| "learning_rate": 4.979527222712266e-06, | |
| "loss": 0.0553, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.20473157415832574, | |
| "grad_norm": 0.6653631844503811, | |
| "learning_rate": 4.9794358500256765e-06, | |
| "loss": 0.0438, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2051865332120109, | |
| "grad_norm": 0.8074584078493093, | |
| "learning_rate": 4.979344274730786e-06, | |
| "loss": 0.0607, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.20564149226569609, | |
| "grad_norm": 1.1020725070982913, | |
| "learning_rate": 4.979252496835079e-06, | |
| "loss": 0.0812, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.20609645131938126, | |
| "grad_norm": 1.2231182771798559, | |
| "learning_rate": 4.979160516346054e-06, | |
| "loss": 0.1074, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.20655141037306643, | |
| "grad_norm": 26.716723850026153, | |
| "learning_rate": 4.979068333271227e-06, | |
| "loss": 0.8002, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2070063694267516, | |
| "grad_norm": 1.2123236026672213, | |
| "learning_rate": 4.978975947618131e-06, | |
| "loss": 0.0788, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.20746132848043677, | |
| "grad_norm": 0.8671125203100531, | |
| "learning_rate": 4.978883359394316e-06, | |
| "loss": 0.0902, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.20791628753412192, | |
| "grad_norm": 0.9848601155594614, | |
| "learning_rate": 4.978790568607347e-06, | |
| "loss": 0.0606, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.2083712465878071, | |
| "grad_norm": 1.013839640652733, | |
| "learning_rate": 4.9786975752648076e-06, | |
| "loss": 0.0873, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.20882620564149226, | |
| "grad_norm": 0.7483252407807567, | |
| "learning_rate": 4.978604379374295e-06, | |
| "loss": 0.0592, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.20928116469517744, | |
| "grad_norm": 0.7178737508101655, | |
| "learning_rate": 4.978510980943427e-06, | |
| "loss": 0.0506, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.2097361237488626, | |
| "grad_norm": 1.0919247632044238, | |
| "learning_rate": 4.978417379979834e-06, | |
| "loss": 0.0778, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.21019108280254778, | |
| "grad_norm": 0.8331653357443332, | |
| "learning_rate": 4.978323576491165e-06, | |
| "loss": 0.0577, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.21064604185623295, | |
| "grad_norm": 0.8152928496306786, | |
| "learning_rate": 4.978229570485085e-06, | |
| "loss": 0.072, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.2111010009099181, | |
| "grad_norm": 0.751813285906743, | |
| "learning_rate": 4.978135361969276e-06, | |
| "loss": 0.0649, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.21155595996360327, | |
| "grad_norm": 0.8232278152234197, | |
| "learning_rate": 4.9780409509514375e-06, | |
| "loss": 0.0642, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.21201091901728844, | |
| "grad_norm": 1.5303665195432214, | |
| "learning_rate": 4.977946337439282e-06, | |
| "loss": 0.1217, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.2124658780709736, | |
| "grad_norm": 0.9269370490140525, | |
| "learning_rate": 4.9778515214405436e-06, | |
| "loss": 0.081, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.21292083712465878, | |
| "grad_norm": 0.8830556120481512, | |
| "learning_rate": 4.977756502962967e-06, | |
| "loss": 0.0684, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.21337579617834396, | |
| "grad_norm": 0.6113061227600053, | |
| "learning_rate": 4.97766128201432e-06, | |
| "loss": 0.0446, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.21383075523202913, | |
| "grad_norm": 0.6077789311617329, | |
| "learning_rate": 4.977565858602381e-06, | |
| "loss": 0.0554, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.21428571428571427, | |
| "grad_norm": 0.8598515142264441, | |
| "learning_rate": 4.977470232734949e-06, | |
| "loss": 0.0727, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.21474067333939945, | |
| "grad_norm": 0.8043286169945988, | |
| "learning_rate": 4.977374404419838e-06, | |
| "loss": 0.0592, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.21519563239308462, | |
| "grad_norm": 0.7551382062036437, | |
| "learning_rate": 4.977278373664877e-06, | |
| "loss": 0.0571, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.2156505914467698, | |
| "grad_norm": 1.8211283606473743, | |
| "learning_rate": 4.977182140477916e-06, | |
| "loss": 0.1033, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.21610555050045496, | |
| "grad_norm": 0.7146087276289771, | |
| "learning_rate": 4.977085704866817e-06, | |
| "loss": 0.0462, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.21656050955414013, | |
| "grad_norm": 0.6542895317184714, | |
| "learning_rate": 4.97698906683946e-06, | |
| "loss": 0.061, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.2170154686078253, | |
| "grad_norm": 1.0732518420250663, | |
| "learning_rate": 4.9768922264037435e-06, | |
| "loss": 0.0845, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.21747042766151045, | |
| "grad_norm": 0.6769767303273837, | |
| "learning_rate": 4.976795183567579e-06, | |
| "loss": 0.0484, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.21792538671519562, | |
| "grad_norm": 0.6792925907901064, | |
| "learning_rate": 4.976697938338898e-06, | |
| "loss": 0.0479, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.2183803457688808, | |
| "grad_norm": 0.550587338837319, | |
| "learning_rate": 4.976600490725645e-06, | |
| "loss": 0.0402, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.21883530482256597, | |
| "grad_norm": 0.9934557115485821, | |
| "learning_rate": 4.976502840735785e-06, | |
| "loss": 0.1096, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.21929026387625114, | |
| "grad_norm": 0.7026152824587227, | |
| "learning_rate": 4.976404988377297e-06, | |
| "loss": 0.0442, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.2197452229299363, | |
| "grad_norm": 1.1796498075270252, | |
| "learning_rate": 4.976306933658176e-06, | |
| "loss": 0.0896, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.22020018198362148, | |
| "grad_norm": 0.9196991108702705, | |
| "learning_rate": 4.976208676586435e-06, | |
| "loss": 0.0903, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.22065514103730663, | |
| "grad_norm": 0.9221909008992407, | |
| "learning_rate": 4.976110217170104e-06, | |
| "loss": 0.061, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2211101000909918, | |
| "grad_norm": 0.8446946807888076, | |
| "learning_rate": 4.976011555417228e-06, | |
| "loss": 0.06, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.22156505914467697, | |
| "grad_norm": 0.8008200895651435, | |
| "learning_rate": 4.975912691335869e-06, | |
| "loss": 0.0552, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.22202001819836215, | |
| "grad_norm": 0.7897004108366357, | |
| "learning_rate": 4.975813624934106e-06, | |
| "loss": 0.0524, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.22247497725204732, | |
| "grad_norm": 0.7656059256782066, | |
| "learning_rate": 4.975714356220035e-06, | |
| "loss": 0.0532, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.2229299363057325, | |
| "grad_norm": 0.49990009073007735, | |
| "learning_rate": 4.975614885201766e-06, | |
| "loss": 0.0335, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.22338489535941766, | |
| "grad_norm": 0.7764965839211172, | |
| "learning_rate": 4.975515211887429e-06, | |
| "loss": 0.0663, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.22383985441310283, | |
| "grad_norm": 0.8335023150964008, | |
| "learning_rate": 4.9754153362851684e-06, | |
| "loss": 0.0635, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.22429481346678798, | |
| "grad_norm": 1.362631121260362, | |
| "learning_rate": 4.975315258403145e-06, | |
| "loss": 0.1184, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.22474977252047315, | |
| "grad_norm": 0.8072718888075444, | |
| "learning_rate": 4.975214978249537e-06, | |
| "loss": 0.0575, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.22520473157415832, | |
| "grad_norm": 0.7237599062848806, | |
| "learning_rate": 4.975114495832539e-06, | |
| "loss": 0.0629, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.2256596906278435, | |
| "grad_norm": 0.9013757169049615, | |
| "learning_rate": 4.975013811160362e-06, | |
| "loss": 0.0641, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.22611464968152867, | |
| "grad_norm": 1.046688141426079, | |
| "learning_rate": 4.974912924241233e-06, | |
| "loss": 0.0679, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.22656960873521384, | |
| "grad_norm": 0.7549334371309422, | |
| "learning_rate": 4.974811835083397e-06, | |
| "loss": 0.0619, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.227024567788899, | |
| "grad_norm": 1.4092663615099252, | |
| "learning_rate": 4.974710543695114e-06, | |
| "loss": 0.0907, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.22747952684258416, | |
| "grad_norm": 1.2767203765961839, | |
| "learning_rate": 4.974609050084661e-06, | |
| "loss": 0.1037, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.22793448589626933, | |
| "grad_norm": 0.957265553607594, | |
| "learning_rate": 4.974507354260332e-06, | |
| "loss": 0.0841, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.2283894449499545, | |
| "grad_norm": 1.0285318937850472, | |
| "learning_rate": 4.974405456230436e-06, | |
| "loss": 0.0876, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.22884440400363967, | |
| "grad_norm": 0.9438000836090487, | |
| "learning_rate": 4.974303356003301e-06, | |
| "loss": 0.0618, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.22929936305732485, | |
| "grad_norm": 0.7641433481492992, | |
| "learning_rate": 4.974201053587268e-06, | |
| "loss": 0.0623, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.22975432211101002, | |
| "grad_norm": 0.7211862506979909, | |
| "learning_rate": 4.9740985489907005e-06, | |
| "loss": 0.0458, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2302092811646952, | |
| "grad_norm": 1.3113691041435898, | |
| "learning_rate": 4.973995842221971e-06, | |
| "loss": 0.0865, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.23066424021838033, | |
| "grad_norm": 1.1027187330835053, | |
| "learning_rate": 4.973892933289476e-06, | |
| "loss": 0.0817, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.2311191992720655, | |
| "grad_norm": 0.8000847819873458, | |
| "learning_rate": 4.97378982220162e-06, | |
| "loss": 0.0639, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.23157415832575068, | |
| "grad_norm": 0.5709614643890362, | |
| "learning_rate": 4.973686508966832e-06, | |
| "loss": 0.0427, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.23202911737943585, | |
| "grad_norm": 0.6348346044427912, | |
| "learning_rate": 4.973582993593554e-06, | |
| "loss": 0.0453, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.23248407643312102, | |
| "grad_norm": 0.7080077445614887, | |
| "learning_rate": 4.973479276090244e-06, | |
| "loss": 0.0567, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.2329390354868062, | |
| "grad_norm": 0.586722983901754, | |
| "learning_rate": 4.973375356465378e-06, | |
| "loss": 0.0398, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.23339399454049137, | |
| "grad_norm": 0.9373759345632122, | |
| "learning_rate": 4.973271234727447e-06, | |
| "loss": 0.083, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.2338489535941765, | |
| "grad_norm": 0.7290102387520916, | |
| "learning_rate": 4.97316691088496e-06, | |
| "loss": 0.0573, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.23430391264786168, | |
| "grad_norm": 1.2047650698868653, | |
| "learning_rate": 4.973062384946442e-06, | |
| "loss": 0.0979, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.23475887170154686, | |
| "grad_norm": 0.5553854533375087, | |
| "learning_rate": 4.9729576569204345e-06, | |
| "loss": 0.0493, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.23521383075523203, | |
| "grad_norm": 0.966683679171784, | |
| "learning_rate": 4.972852726815495e-06, | |
| "loss": 0.0744, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.2356687898089172, | |
| "grad_norm": 0.8972567842292303, | |
| "learning_rate": 4.972747594640197e-06, | |
| "loss": 0.0822, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.23612374886260237, | |
| "grad_norm": 0.9532248896529997, | |
| "learning_rate": 4.9726422604031335e-06, | |
| "loss": 0.0628, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.23657870791628755, | |
| "grad_norm": 0.5831731409388041, | |
| "learning_rate": 4.97253672411291e-06, | |
| "loss": 0.0499, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.2370336669699727, | |
| "grad_norm": 0.7629148584956371, | |
| "learning_rate": 4.972430985778152e-06, | |
| "loss": 0.0502, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.23748862602365786, | |
| "grad_norm": 0.8867114815888714, | |
| "learning_rate": 4.972325045407499e-06, | |
| "loss": 0.0551, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.23794358507734303, | |
| "grad_norm": 1.2463480840549028, | |
| "learning_rate": 4.972218903009608e-06, | |
| "loss": 0.0715, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.2383985441310282, | |
| "grad_norm": 0.782156462915191, | |
| "learning_rate": 4.972112558593153e-06, | |
| "loss": 0.0658, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.23885350318471338, | |
| "grad_norm": 0.5674610459457798, | |
| "learning_rate": 4.972006012166823e-06, | |
| "loss": 0.0443, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.23930846223839855, | |
| "grad_norm": 0.6676557313621811, | |
| "learning_rate": 4.971899263739326e-06, | |
| "loss": 0.052, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.23976342129208372, | |
| "grad_norm": 0.8996461781463584, | |
| "learning_rate": 4.971792313319384e-06, | |
| "loss": 0.0761, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.24021838034576887, | |
| "grad_norm": 0.7869388715576839, | |
| "learning_rate": 4.971685160915737e-06, | |
| "loss": 0.059, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.24067333939945404, | |
| "grad_norm": 0.8601250360554993, | |
| "learning_rate": 4.971577806537139e-06, | |
| "loss": 0.058, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.2411282984531392, | |
| "grad_norm": 0.860384363291072, | |
| "learning_rate": 4.971470250192366e-06, | |
| "loss": 0.0746, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.24158325750682438, | |
| "grad_norm": 17.481585256275345, | |
| "learning_rate": 4.9713624918902045e-06, | |
| "loss": 0.3357, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.24203821656050956, | |
| "grad_norm": 1.3228769141545746, | |
| "learning_rate": 4.971254531639461e-06, | |
| "loss": 0.0978, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.24249317561419473, | |
| "grad_norm": 0.9022991420443233, | |
| "learning_rate": 4.971146369448957e-06, | |
| "loss": 0.073, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.2429481346678799, | |
| "grad_norm": 0.8487996347147105, | |
| "learning_rate": 4.971038005327532e-06, | |
| "loss": 0.0772, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.24340309372156507, | |
| "grad_norm": 1.0939700661439853, | |
| "learning_rate": 4.970929439284039e-06, | |
| "loss": 0.1052, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.24385805277525022, | |
| "grad_norm": 1.1117793169544092, | |
| "learning_rate": 4.970820671327351e-06, | |
| "loss": 0.0838, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.2443130118289354, | |
| "grad_norm": 0.5711568883528185, | |
| "learning_rate": 4.9707117014663565e-06, | |
| "loss": 0.0477, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.24476797088262056, | |
| "grad_norm": 0.9911963887990124, | |
| "learning_rate": 4.97060252970996e-06, | |
| "loss": 0.0859, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.24522292993630573, | |
| "grad_norm": 0.8786877928757788, | |
| "learning_rate": 4.970493156067081e-06, | |
| "loss": 0.0672, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.2456778889899909, | |
| "grad_norm": 0.6358718673962386, | |
| "learning_rate": 4.970383580546658e-06, | |
| "loss": 0.049, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.24613284804367608, | |
| "grad_norm": 0.9673038276315246, | |
| "learning_rate": 4.970273803157645e-06, | |
| "loss": 0.0789, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.24658780709736125, | |
| "grad_norm": 0.7896663626576268, | |
| "learning_rate": 4.970163823909013e-06, | |
| "loss": 0.0636, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.2470427661510464, | |
| "grad_norm": 0.7725841407720596, | |
| "learning_rate": 4.970053642809748e-06, | |
| "loss": 0.0591, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.24749772520473157, | |
| "grad_norm": 0.8834486709832678, | |
| "learning_rate": 4.969943259868853e-06, | |
| "loss": 0.0741, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.24795268425841674, | |
| "grad_norm": 0.9862513700188255, | |
| "learning_rate": 4.969832675095351e-06, | |
| "loss": 0.0733, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.2484076433121019, | |
| "grad_norm": 0.9230048911450578, | |
| "learning_rate": 4.969721888498275e-06, | |
| "loss": 0.0784, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.24886260236578708, | |
| "grad_norm": 0.678321429576158, | |
| "learning_rate": 4.96961090008668e-06, | |
| "loss": 0.0548, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.24931756141947226, | |
| "grad_norm": 1.0377618196684284, | |
| "learning_rate": 4.969499709869635e-06, | |
| "loss": 0.0972, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.24977252047315743, | |
| "grad_norm": 1.0401408232919482, | |
| "learning_rate": 4.969388317856225e-06, | |
| "loss": 0.0803, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.2502274795268426, | |
| "grad_norm": 1.1187089275098543, | |
| "learning_rate": 4.969276724055554e-06, | |
| "loss": 0.0959, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.25068243858052774, | |
| "grad_norm": 0.955462869329459, | |
| "learning_rate": 4.969164928476741e-06, | |
| "loss": 0.0676, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.25113739763421294, | |
| "grad_norm": 0.8046461909524141, | |
| "learning_rate": 4.969052931128919e-06, | |
| "loss": 0.0648, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.2515923566878981, | |
| "grad_norm": 0.7081920862352523, | |
| "learning_rate": 4.968940732021243e-06, | |
| "loss": 0.0603, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.25204731574158323, | |
| "grad_norm": 0.9857688144173427, | |
| "learning_rate": 4.9688283311628795e-06, | |
| "loss": 0.0918, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.25250227479526843, | |
| "grad_norm": 0.8534813080817202, | |
| "learning_rate": 4.968715728563014e-06, | |
| "loss": 0.0679, | |
| "step": 555 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 10990, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 555, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3666645319680.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |