| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7512341390280469, | |
| "eval_steps": 500, | |
| "global_step": 1284, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0005850733170000365, | |
| "grad_norm": 0.3784801935906415, | |
| "learning_rate": 7.999998479354072e-06, | |
| "loss": 1.1815, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.001170146634000073, | |
| "grad_norm": 0.10369689761982695, | |
| "learning_rate": 7.999993917417574e-06, | |
| "loss": 1.0085, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0017552199510001097, | |
| "grad_norm": 0.08165678751844849, | |
| "learning_rate": 7.99998631419436e-06, | |
| "loss": 0.934, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.002340293268000146, | |
| "grad_norm": 0.06369675215202451, | |
| "learning_rate": 7.999975669690855e-06, | |
| "loss": 1.0323, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.002925366585000183, | |
| "grad_norm": 0.08076101376684344, | |
| "learning_rate": 7.999961983916048e-06, | |
| "loss": 1.0676, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0035104399020002195, | |
| "grad_norm": 0.06288930819751326, | |
| "learning_rate": 7.999945256881502e-06, | |
| "loss": 1.0421, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004095513219000256, | |
| "grad_norm": 0.06851051488443692, | |
| "learning_rate": 7.999925488601348e-06, | |
| "loss": 0.9544, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.004680586536000292, | |
| "grad_norm": 0.06007707301663692, | |
| "learning_rate": 7.999902679092287e-06, | |
| "loss": 1.0349, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.005265659853000329, | |
| "grad_norm": 0.07674940091671953, | |
| "learning_rate": 7.99987682837359e-06, | |
| "loss": 1.1809, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.005850733170000366, | |
| "grad_norm": 0.06356726822529891, | |
| "learning_rate": 7.999847936467093e-06, | |
| "loss": 0.9502, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.006435806487000402, | |
| "grad_norm": 0.060093989725189235, | |
| "learning_rate": 7.999816003397203e-06, | |
| "loss": 1.0007, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.007020879804000439, | |
| "grad_norm": 0.04797598057222403, | |
| "learning_rate": 7.999781029190902e-06, | |
| "loss": 1.0316, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.007605953121000476, | |
| "grad_norm": 0.08827709210231136, | |
| "learning_rate": 7.999743013877734e-06, | |
| "loss": 1.0688, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.008191026438000511, | |
| "grad_norm": 0.04993529689396162, | |
| "learning_rate": 7.999701957489811e-06, | |
| "loss": 0.9695, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.008776099755000549, | |
| "grad_norm": 0.053685150458545855, | |
| "learning_rate": 7.999657860061823e-06, | |
| "loss": 1.0826, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.009361173072000585, | |
| "grad_norm": 0.04357397086005245, | |
| "learning_rate": 7.99961072163102e-06, | |
| "loss": 0.9788, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.009946246389000622, | |
| "grad_norm": 0.051838589957396514, | |
| "learning_rate": 7.999560542237226e-06, | |
| "loss": 1.0222, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.010531319706000658, | |
| "grad_norm": 0.04948384582310689, | |
| "learning_rate": 7.999507321922833e-06, | |
| "loss": 0.9882, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.011116393023000696, | |
| "grad_norm": 0.05488935540485584, | |
| "learning_rate": 7.9994510607328e-06, | |
| "loss": 1.0124, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.011701466340000731, | |
| "grad_norm": 0.04632979465330071, | |
| "learning_rate": 7.999391758714659e-06, | |
| "loss": 0.9646, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.012286539657000767, | |
| "grad_norm": 0.04102257708769348, | |
| "learning_rate": 7.999329415918508e-06, | |
| "loss": 0.8946, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.012871612974000805, | |
| "grad_norm": 0.045466366302330634, | |
| "learning_rate": 7.999264032397013e-06, | |
| "loss": 0.9973, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.01345668629100084, | |
| "grad_norm": 0.04440538659368297, | |
| "learning_rate": 7.99919560820541e-06, | |
| "loss": 1.0596, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.014041759608000878, | |
| "grad_norm": 0.042500986542824803, | |
| "learning_rate": 7.999124143401507e-06, | |
| "loss": 0.9303, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.014626832925000914, | |
| "grad_norm": 0.05180121412922999, | |
| "learning_rate": 7.999049638045675e-06, | |
| "loss": 0.9412, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.015211906242000951, | |
| "grad_norm": 0.06441302150178038, | |
| "learning_rate": 7.998972092200859e-06, | |
| "loss": 0.9986, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.015796979559000987, | |
| "grad_norm": 0.05297417453471626, | |
| "learning_rate": 7.998891505932565e-06, | |
| "loss": 1.0701, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.016382052876001023, | |
| "grad_norm": 0.04305692283062352, | |
| "learning_rate": 7.998807879308878e-06, | |
| "loss": 0.9823, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.016967126193001062, | |
| "grad_norm": 0.048331091733781785, | |
| "learning_rate": 7.998721212400443e-06, | |
| "loss": 0.9145, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.017552199510001098, | |
| "grad_norm": 0.04249327256576877, | |
| "learning_rate": 7.998631505280477e-06, | |
| "loss": 0.9469, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.018137272827001134, | |
| "grad_norm": 0.042672714299481426, | |
| "learning_rate": 7.998538758024765e-06, | |
| "loss": 0.9814, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.01872234614400117, | |
| "grad_norm": 0.04643044969291229, | |
| "learning_rate": 7.998442970711661e-06, | |
| "loss": 0.9342, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.019307419461001205, | |
| "grad_norm": 0.042411147754236, | |
| "learning_rate": 7.998344143422087e-06, | |
| "loss": 0.8851, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.019892492778001244, | |
| "grad_norm": 0.04407093546490425, | |
| "learning_rate": 7.998242276239527e-06, | |
| "loss": 1.0026, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.02047756609500128, | |
| "grad_norm": 0.044039695433035646, | |
| "learning_rate": 7.998137369250046e-06, | |
| "loss": 0.9637, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.021062639412001316, | |
| "grad_norm": 0.04258725681727216, | |
| "learning_rate": 7.998029422542267e-06, | |
| "loss": 0.9249, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.021647712729001352, | |
| "grad_norm": 0.045486820543060684, | |
| "learning_rate": 7.997918436207383e-06, | |
| "loss": 1.005, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.02223278604600139, | |
| "grad_norm": 0.03843585783476988, | |
| "learning_rate": 7.997804410339156e-06, | |
| "loss": 0.9815, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.022817859363001427, | |
| "grad_norm": 0.05544665751989288, | |
| "learning_rate": 7.997687345033915e-06, | |
| "loss": 0.94, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.023402932680001463, | |
| "grad_norm": 0.041440042889966715, | |
| "learning_rate": 7.99756724039056e-06, | |
| "loss": 0.9337, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0239880059970015, | |
| "grad_norm": 0.040659761048442974, | |
| "learning_rate": 7.997444096510552e-06, | |
| "loss": 0.897, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.024573079314001534, | |
| "grad_norm": 0.06541743477477353, | |
| "learning_rate": 7.997317913497925e-06, | |
| "loss": 0.9325, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.025158152631001574, | |
| "grad_norm": 0.04101226832374059, | |
| "learning_rate": 7.997188691459279e-06, | |
| "loss": 0.9323, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.02574322594800161, | |
| "grad_norm": 0.03954326036578227, | |
| "learning_rate": 7.997056430503782e-06, | |
| "loss": 0.8793, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.026328299265001645, | |
| "grad_norm": 0.0396268421897809, | |
| "learning_rate": 7.996921130743165e-06, | |
| "loss": 0.9849, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.02691337258200168, | |
| "grad_norm": 0.03855187149117414, | |
| "learning_rate": 7.996782792291732e-06, | |
| "loss": 0.9328, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.02749844589900172, | |
| "grad_norm": 0.03791722568157901, | |
| "learning_rate": 7.996641415266355e-06, | |
| "loss": 0.9293, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.028083519216001756, | |
| "grad_norm": 0.04497262810695708, | |
| "learning_rate": 7.996496999786465e-06, | |
| "loss": 0.9884, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.02866859253300179, | |
| "grad_norm": 0.041089653577607424, | |
| "learning_rate": 7.996349545974065e-06, | |
| "loss": 0.9305, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.029253665850001827, | |
| "grad_norm": 0.04162057876282252, | |
| "learning_rate": 7.996199053953729e-06, | |
| "loss": 1.0771, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.029838739167001863, | |
| "grad_norm": 0.045060287442874644, | |
| "learning_rate": 7.996045523852587e-06, | |
| "loss": 0.9819, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.030423812484001903, | |
| "grad_norm": 0.03950486400361273, | |
| "learning_rate": 7.995888955800346e-06, | |
| "loss": 0.8911, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03100888580100194, | |
| "grad_norm": 0.040638774400984866, | |
| "learning_rate": 7.995729349929275e-06, | |
| "loss": 0.9289, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.031593959118001974, | |
| "grad_norm": 0.04034434081592363, | |
| "learning_rate": 7.995566706374206e-06, | |
| "loss": 0.9529, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03217903243500201, | |
| "grad_norm": 0.04315614004870732, | |
| "learning_rate": 7.995401025272545e-06, | |
| "loss": 0.9554, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.032764105752002046, | |
| "grad_norm": 0.03862522293897866, | |
| "learning_rate": 7.995232306764258e-06, | |
| "loss": 0.9604, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.03334917906900208, | |
| "grad_norm": 0.042952485422922036, | |
| "learning_rate": 7.995060550991879e-06, | |
| "loss": 0.9731, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.033934252386002124, | |
| "grad_norm": 0.04399639647859174, | |
| "learning_rate": 7.994885758100508e-06, | |
| "loss": 0.868, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.03451932570300216, | |
| "grad_norm": 0.03794998627596821, | |
| "learning_rate": 7.99470792823781e-06, | |
| "loss": 0.9176, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.035104399020002196, | |
| "grad_norm": 0.046410465067380204, | |
| "learning_rate": 7.99452706155402e-06, | |
| "loss": 0.9456, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.03568947233700223, | |
| "grad_norm": 0.04773216743183266, | |
| "learning_rate": 7.994343158201927e-06, | |
| "loss": 1.0049, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.03627454565400227, | |
| "grad_norm": 0.04395331205981261, | |
| "learning_rate": 7.994156218336901e-06, | |
| "loss": 0.9197, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.0368596189710023, | |
| "grad_norm": 0.04353152162367831, | |
| "learning_rate": 7.993966242116865e-06, | |
| "loss": 0.8978, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.03744469228800234, | |
| "grad_norm": 0.041195331266407585, | |
| "learning_rate": 7.993773229702312e-06, | |
| "loss": 0.965, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.038029765605002375, | |
| "grad_norm": 0.050163343033375746, | |
| "learning_rate": 7.993577181256304e-06, | |
| "loss": 0.8876, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.03861483892200241, | |
| "grad_norm": 0.06660490581550026, | |
| "learning_rate": 7.993378096944456e-06, | |
| "loss": 0.9082, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.03919991223900245, | |
| "grad_norm": 0.03984407119317513, | |
| "learning_rate": 7.99317597693496e-06, | |
| "loss": 0.9233, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.03978498555600249, | |
| "grad_norm": 0.04866369054757869, | |
| "learning_rate": 7.992970821398567e-06, | |
| "loss": 0.9349, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.040370058873002525, | |
| "grad_norm": 0.04990567711730111, | |
| "learning_rate": 7.99276263050859e-06, | |
| "loss": 0.985, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.04095513219000256, | |
| "grad_norm": 0.17411986574945643, | |
| "learning_rate": 7.992551404440914e-06, | |
| "loss": 1.0208, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.041540205507002596, | |
| "grad_norm": 0.04353986815925501, | |
| "learning_rate": 7.992337143373981e-06, | |
| "loss": 0.9633, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.04212527882400263, | |
| "grad_norm": 0.05318272939257367, | |
| "learning_rate": 7.9921198474888e-06, | |
| "loss": 1.0015, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.04271035214100267, | |
| "grad_norm": 0.059411931897509304, | |
| "learning_rate": 7.991899516968942e-06, | |
| "loss": 1.0224, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.043295425458002704, | |
| "grad_norm": 0.05746696657441689, | |
| "learning_rate": 7.991676152000545e-06, | |
| "loss": 0.9817, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.04388049877500274, | |
| "grad_norm": 0.03994063377968752, | |
| "learning_rate": 7.991449752772307e-06, | |
| "loss": 0.899, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.04446557209200278, | |
| "grad_norm": 0.05114807327866483, | |
| "learning_rate": 7.991220319475492e-06, | |
| "loss": 0.9627, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.04505064540900282, | |
| "grad_norm": 0.043413594203031196, | |
| "learning_rate": 7.990987852303923e-06, | |
| "loss": 0.9385, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.045635718726002854, | |
| "grad_norm": 0.044141893159488445, | |
| "learning_rate": 7.990752351453994e-06, | |
| "loss": 0.9214, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.04622079204300289, | |
| "grad_norm": 0.07098639926950194, | |
| "learning_rate": 7.990513817124652e-06, | |
| "loss": 0.9762, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.046805865360002925, | |
| "grad_norm": 0.04487096138718826, | |
| "learning_rate": 7.990272249517416e-06, | |
| "loss": 0.9379, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.04739093867700296, | |
| "grad_norm": 0.040488382771263605, | |
| "learning_rate": 7.990027648836359e-06, | |
| "loss": 0.9563, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.047976011994003, | |
| "grad_norm": 0.04352730030611419, | |
| "learning_rate": 7.989780015288123e-06, | |
| "loss": 0.9488, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.04856108531100303, | |
| "grad_norm": 0.04413441845817798, | |
| "learning_rate": 7.98952934908191e-06, | |
| "loss": 1.0336, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.04914615862800307, | |
| "grad_norm": 0.04193745680850997, | |
| "learning_rate": 7.989275650429482e-06, | |
| "loss": 0.8785, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.04973123194500311, | |
| "grad_norm": 0.04475381091812719, | |
| "learning_rate": 7.989018919545165e-06, | |
| "loss": 0.9443, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05031630526200315, | |
| "grad_norm": 0.04234754821679888, | |
| "learning_rate": 7.988759156645845e-06, | |
| "loss": 0.9564, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.05090137857900318, | |
| "grad_norm": 0.054324472784088765, | |
| "learning_rate": 7.988496361950972e-06, | |
| "loss": 0.9824, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.05148645189600322, | |
| "grad_norm": 0.051967709927586946, | |
| "learning_rate": 7.988230535682556e-06, | |
| "loss": 0.914, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.052071525213003254, | |
| "grad_norm": 0.03996756018971234, | |
| "learning_rate": 7.987961678065169e-06, | |
| "loss": 0.9421, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.05265659853000329, | |
| "grad_norm": 0.07556612961446735, | |
| "learning_rate": 7.987689789325939e-06, | |
| "loss": 0.9791, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.053241671847003326, | |
| "grad_norm": 0.04557130060143385, | |
| "learning_rate": 7.987414869694562e-06, | |
| "loss": 0.9318, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.05382674516400336, | |
| "grad_norm": 0.0424700455078842, | |
| "learning_rate": 7.98713691940329e-06, | |
| "loss": 0.9745, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0544118184810034, | |
| "grad_norm": 0.07879025661937863, | |
| "learning_rate": 7.986855938686935e-06, | |
| "loss": 0.9614, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.05499689179800344, | |
| "grad_norm": 0.04227267907542001, | |
| "learning_rate": 7.986571927782871e-06, | |
| "loss": 0.9317, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.055581965115003476, | |
| "grad_norm": 0.042275976377898476, | |
| "learning_rate": 7.986284886931033e-06, | |
| "loss": 0.8982, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.05616703843200351, | |
| "grad_norm": 0.0457764180229334, | |
| "learning_rate": 7.985994816373913e-06, | |
| "loss": 0.8803, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.05675211174900355, | |
| "grad_norm": 0.04305545472943831, | |
| "learning_rate": 7.985701716356565e-06, | |
| "loss": 0.9786, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.05733718506600358, | |
| "grad_norm": 0.045580685701444475, | |
| "learning_rate": 7.985405587126597e-06, | |
| "loss": 0.9036, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.05792225838300362, | |
| "grad_norm": 0.0442896526405325, | |
| "learning_rate": 7.985106428934183e-06, | |
| "loss": 0.9871, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.058507331700003655, | |
| "grad_norm": 0.04396583794537118, | |
| "learning_rate": 7.984804242032051e-06, | |
| "loss": 1.0145, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.05909240501700369, | |
| "grad_norm": 0.04488641679254391, | |
| "learning_rate": 7.984499026675494e-06, | |
| "loss": 0.9673, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.05967747833400373, | |
| "grad_norm": 0.0402258783966036, | |
| "learning_rate": 7.984190783122351e-06, | |
| "loss": 1.0458, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.06026255165100377, | |
| "grad_norm": 0.048502182555142354, | |
| "learning_rate": 7.983879511633036e-06, | |
| "loss": 0.8879, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.060847624968003805, | |
| "grad_norm": 0.04818474096979939, | |
| "learning_rate": 7.983565212470504e-06, | |
| "loss": 0.9467, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.06143269828500384, | |
| "grad_norm": 0.04176125713160911, | |
| "learning_rate": 7.983247885900283e-06, | |
| "loss": 0.9266, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.06201777160200388, | |
| "grad_norm": 0.04212530605033032, | |
| "learning_rate": 7.982927532190447e-06, | |
| "loss": 0.9179, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.06260284491900392, | |
| "grad_norm": 0.037198964746819226, | |
| "learning_rate": 7.982604151611633e-06, | |
| "loss": 1.0018, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.06318791823600395, | |
| "grad_norm": 0.04090799839826158, | |
| "learning_rate": 7.982277744437035e-06, | |
| "loss": 0.8756, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.06377299155300399, | |
| "grad_norm": 0.03857422273289164, | |
| "learning_rate": 7.981948310942402e-06, | |
| "loss": 0.8855, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.06435806487000402, | |
| "grad_norm": 0.05241752689478532, | |
| "learning_rate": 7.981615851406039e-06, | |
| "loss": 0.8862, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.06494313818700406, | |
| "grad_norm": 0.04086973369607326, | |
| "learning_rate": 7.981280366108814e-06, | |
| "loss": 0.9221, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.06552821150400409, | |
| "grad_norm": 0.03931043694281754, | |
| "learning_rate": 7.98094185533414e-06, | |
| "loss": 0.9417, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.06611328482100413, | |
| "grad_norm": 0.04169878931857935, | |
| "learning_rate": 7.980600319367995e-06, | |
| "loss": 0.958, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.06669835813800416, | |
| "grad_norm": 0.044230077430854955, | |
| "learning_rate": 7.980255758498908e-06, | |
| "loss": 0.9265, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.0672834314550042, | |
| "grad_norm": 0.04488148180330816, | |
| "learning_rate": 7.979908173017968e-06, | |
| "loss": 0.8908, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.06786850477200425, | |
| "grad_norm": 0.042038370275589085, | |
| "learning_rate": 7.979557563218815e-06, | |
| "loss": 0.8961, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.06845357808900428, | |
| "grad_norm": 0.04747064970378541, | |
| "learning_rate": 7.979203929397646e-06, | |
| "loss": 1.0609, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.06903865140600432, | |
| "grad_norm": 0.04392999493678844, | |
| "learning_rate": 7.97884727185321e-06, | |
| "loss": 0.9001, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.06962372472300435, | |
| "grad_norm": 0.040693633998808994, | |
| "learning_rate": 7.978487590886814e-06, | |
| "loss": 0.8562, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.07020879804000439, | |
| "grad_norm": 0.08337676841807191, | |
| "learning_rate": 7.978124886802316e-06, | |
| "loss": 0.9344, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.07079387135700442, | |
| "grad_norm": 0.03951441645023813, | |
| "learning_rate": 7.977759159906134e-06, | |
| "loss": 0.9182, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.07137894467400446, | |
| "grad_norm": 0.04427536962304041, | |
| "learning_rate": 7.977390410507229e-06, | |
| "loss": 0.9079, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.07196401799100449, | |
| "grad_norm": 0.047402666476443076, | |
| "learning_rate": 7.977018638917126e-06, | |
| "loss": 0.9442, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.07254909130800453, | |
| "grad_norm": 0.07806155365092546, | |
| "learning_rate": 7.976643845449897e-06, | |
| "loss": 0.9453, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.07313416462500458, | |
| "grad_norm": 0.04187989433422361, | |
| "learning_rate": 7.97626603042217e-06, | |
| "loss": 0.9762, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.0737192379420046, | |
| "grad_norm": 0.038153971027990764, | |
| "learning_rate": 7.975885194153125e-06, | |
| "loss": 0.9377, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.07430431125900465, | |
| "grad_norm": 0.04398811005912341, | |
| "learning_rate": 7.975501336964492e-06, | |
| "loss": 0.9655, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.07488938457600468, | |
| "grad_norm": 0.04310577216463342, | |
| "learning_rate": 7.975114459180555e-06, | |
| "loss": 0.9082, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.07547445789300472, | |
| "grad_norm": 0.04552741227747631, | |
| "learning_rate": 7.97472456112815e-06, | |
| "loss": 0.8667, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.07605953121000475, | |
| "grad_norm": 0.0406705187810207, | |
| "learning_rate": 7.974331643136666e-06, | |
| "loss": 0.9286, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.07664460452700479, | |
| "grad_norm": 0.16819457832404855, | |
| "learning_rate": 7.973935705538039e-06, | |
| "loss": 0.9724, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.07722967784400482, | |
| "grad_norm": 0.04524796652654962, | |
| "learning_rate": 7.973536748666756e-06, | |
| "loss": 0.961, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.07781475116100486, | |
| "grad_norm": 0.051171072191227115, | |
| "learning_rate": 7.973134772859862e-06, | |
| "loss": 0.9513, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.0783998244780049, | |
| "grad_norm": 0.044436153110071305, | |
| "learning_rate": 7.972729778456946e-06, | |
| "loss": 0.9363, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.07898489779500494, | |
| "grad_norm": 0.04485419817375143, | |
| "learning_rate": 7.97232176580015e-06, | |
| "loss": 0.8583, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.07956997111200498, | |
| "grad_norm": 0.044866876801222304, | |
| "learning_rate": 7.971910735234161e-06, | |
| "loss": 0.9859, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.080155044429005, | |
| "grad_norm": 0.03934716109284772, | |
| "learning_rate": 7.971496687106219e-06, | |
| "loss": 0.8592, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.08074011774600505, | |
| "grad_norm": 0.041852675369480244, | |
| "learning_rate": 7.971079621766117e-06, | |
| "loss": 0.9353, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.08132519106300508, | |
| "grad_norm": 0.3430745253576026, | |
| "learning_rate": 7.97065953956619e-06, | |
| "loss": 0.9602, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.08191026438000512, | |
| "grad_norm": 0.44783710464834237, | |
| "learning_rate": 7.970236440861327e-06, | |
| "loss": 0.9833, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.08249533769700515, | |
| "grad_norm": 0.29750847371388817, | |
| "learning_rate": 7.96981032600896e-06, | |
| "loss": 0.8244, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.08308041101400519, | |
| "grad_norm": 0.169443611740874, | |
| "learning_rate": 7.969381195369076e-06, | |
| "loss": 0.8983, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.08366548433100524, | |
| "grad_norm": 0.04111262427570532, | |
| "learning_rate": 7.968949049304204e-06, | |
| "loss": 0.9552, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.08425055764800526, | |
| "grad_norm": 0.04087231642049412, | |
| "learning_rate": 7.968513888179421e-06, | |
| "loss": 0.9051, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.08483563096500531, | |
| "grad_norm": 0.05663350951363164, | |
| "learning_rate": 7.968075712362356e-06, | |
| "loss": 0.8366, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.08542070428200534, | |
| "grad_norm": 0.04202895629977615, | |
| "learning_rate": 7.967634522223179e-06, | |
| "loss": 0.8115, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.08600577759900538, | |
| "grad_norm": 0.04238111014919017, | |
| "learning_rate": 7.96719031813461e-06, | |
| "loss": 0.9086, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.08659085091600541, | |
| "grad_norm": 0.04226501020383857, | |
| "learning_rate": 7.966743100471913e-06, | |
| "loss": 0.9286, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.08717592423300545, | |
| "grad_norm": 0.046325797442375215, | |
| "learning_rate": 7.9662928696129e-06, | |
| "loss": 0.9393, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.08776099755000548, | |
| "grad_norm": 0.05532068772615188, | |
| "learning_rate": 7.965839625937926e-06, | |
| "loss": 0.9202, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.08834607086700552, | |
| "grad_norm": 0.039360829289514745, | |
| "learning_rate": 7.965383369829894e-06, | |
| "loss": 0.908, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.08893114418400556, | |
| "grad_norm": 0.04965498407233415, | |
| "learning_rate": 7.964924101674252e-06, | |
| "loss": 0.9406, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.0895162175010056, | |
| "grad_norm": 0.04177674200968805, | |
| "learning_rate": 7.964461821858987e-06, | |
| "loss": 0.8933, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.09010129081800564, | |
| "grad_norm": 0.04711456314429998, | |
| "learning_rate": 7.963996530774639e-06, | |
| "loss": 1.0111, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.09068636413500566, | |
| "grad_norm": 0.045100723994096155, | |
| "learning_rate": 7.963528228814285e-06, | |
| "loss": 0.9806, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.09127143745200571, | |
| "grad_norm": 0.09448573408012474, | |
| "learning_rate": 7.96305691637355e-06, | |
| "loss": 0.9142, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.09185651076900574, | |
| "grad_norm": 0.04297907772876167, | |
| "learning_rate": 7.962582593850596e-06, | |
| "loss": 0.8852, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.09244158408600578, | |
| "grad_norm": 0.05151683390684187, | |
| "learning_rate": 7.962105261646138e-06, | |
| "loss": 0.9975, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.09302665740300581, | |
| "grad_norm": 0.07242957126413647, | |
| "learning_rate": 7.961624920163423e-06, | |
| "loss": 0.9196, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.09361173072000585, | |
| "grad_norm": 0.08840439035290122, | |
| "learning_rate": 7.961141569808248e-06, | |
| "loss": 0.92, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.0941968040370059, | |
| "grad_norm": 0.11047843778949552, | |
| "learning_rate": 7.960655210988948e-06, | |
| "loss": 0.9452, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.09478187735400592, | |
| "grad_norm": 0.04275273565605597, | |
| "learning_rate": 7.960165844116399e-06, | |
| "loss": 0.9641, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.09536695067100597, | |
| "grad_norm": 0.05662855927390397, | |
| "learning_rate": 7.959673469604025e-06, | |
| "loss": 0.9354, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.095952023988006, | |
| "grad_norm": 0.04769002643125012, | |
| "learning_rate": 7.959178087867779e-06, | |
| "loss": 0.9087, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.09653709730500604, | |
| "grad_norm": 0.050744023655463925, | |
| "learning_rate": 7.958679699326164e-06, | |
| "loss": 0.9561, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.09712217062200607, | |
| "grad_norm": 0.058451109924341715, | |
| "learning_rate": 7.958178304400222e-06, | |
| "loss": 0.9881, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.09770724393900611, | |
| "grad_norm": 0.04338399471073774, | |
| "learning_rate": 7.95767390351353e-06, | |
| "loss": 0.9705, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.09829231725600614, | |
| "grad_norm": 0.04901291931441639, | |
| "learning_rate": 7.95716649709221e-06, | |
| "loss": 0.9229, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.09887739057300618, | |
| "grad_norm": 0.044284928725944674, | |
| "learning_rate": 7.95665608556492e-06, | |
| "loss": 0.9493, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.09946246389000622, | |
| "grad_norm": 0.04914804115067185, | |
| "learning_rate": 7.956142669362855e-06, | |
| "loss": 0.9879, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.10004753720700625, | |
| "grad_norm": 0.04087870881565583, | |
| "learning_rate": 7.955626248919752e-06, | |
| "loss": 0.9435, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.1006326105240063, | |
| "grad_norm": 0.04938743203389444, | |
| "learning_rate": 7.955106824671888e-06, | |
| "loss": 0.8813, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.10121768384100632, | |
| "grad_norm": 0.04440441336302364, | |
| "learning_rate": 7.95458439705807e-06, | |
| "loss": 0.954, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.10180275715800637, | |
| "grad_norm": 0.04052619663535755, | |
| "learning_rate": 7.954058966519649e-06, | |
| "loss": 0.8759, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.1023878304750064, | |
| "grad_norm": 0.04315943430366373, | |
| "learning_rate": 7.953530533500507e-06, | |
| "loss": 0.8621, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.10297290379200644, | |
| "grad_norm": 0.07065437183104553, | |
| "learning_rate": 7.952999098447072e-06, | |
| "loss": 0.9796, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.10355797710900647, | |
| "grad_norm": 0.04286710979601013, | |
| "learning_rate": 7.952464661808297e-06, | |
| "loss": 0.9187, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.10414305042600651, | |
| "grad_norm": 0.044637305084684484, | |
| "learning_rate": 7.951927224035678e-06, | |
| "loss": 0.8772, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.10472812374300655, | |
| "grad_norm": 0.04369651860095562, | |
| "learning_rate": 7.951386785583244e-06, | |
| "loss": 0.8969, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.10531319706000658, | |
| "grad_norm": 0.05048322425196143, | |
| "learning_rate": 7.950843346907559e-06, | |
| "loss": 0.8907, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.10589827037700662, | |
| "grad_norm": 0.03884181381210202, | |
| "learning_rate": 7.95029690846772e-06, | |
| "loss": 0.931, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.10648334369400665, | |
| "grad_norm": 0.05060670376656713, | |
| "learning_rate": 7.949747470725362e-06, | |
| "loss": 0.8624, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.1070684170110067, | |
| "grad_norm": 0.04162121727818916, | |
| "learning_rate": 7.949195034144653e-06, | |
| "loss": 0.9141, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.10765349032800672, | |
| "grad_norm": 0.04729330740672752, | |
| "learning_rate": 7.94863959919229e-06, | |
| "loss": 0.921, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.10823856364500677, | |
| "grad_norm": 0.04459204791909808, | |
| "learning_rate": 7.948081166337509e-06, | |
| "loss": 0.8993, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1088236369620068, | |
| "grad_norm": 0.04645986236352756, | |
| "learning_rate": 7.947519736052075e-06, | |
| "loss": 0.9158, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.10940871027900684, | |
| "grad_norm": 0.04275967646092167, | |
| "learning_rate": 7.946955308810285e-06, | |
| "loss": 0.9387, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.10999378359600688, | |
| "grad_norm": 0.07421648959437231, | |
| "learning_rate": 7.94638788508897e-06, | |
| "loss": 0.8497, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.11057885691300691, | |
| "grad_norm": 0.04955170539289863, | |
| "learning_rate": 7.945817465367493e-06, | |
| "loss": 0.8525, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.11116393023000695, | |
| "grad_norm": 0.12013696366466048, | |
| "learning_rate": 7.945244050127744e-06, | |
| "loss": 0.9616, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.11174900354700698, | |
| "grad_norm": 0.040829512145039985, | |
| "learning_rate": 7.944667639854148e-06, | |
| "loss": 0.8344, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.11233407686400702, | |
| "grad_norm": 0.05068364591326354, | |
| "learning_rate": 7.944088235033657e-06, | |
| "loss": 0.9403, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.11291915018100705, | |
| "grad_norm": 0.0499939002215986, | |
| "learning_rate": 7.943505836155753e-06, | |
| "loss": 0.9475, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.1135042234980071, | |
| "grad_norm": 0.05407026250866459, | |
| "learning_rate": 7.94292044371245e-06, | |
| "loss": 0.9101, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.11408929681500712, | |
| "grad_norm": 0.06417314751489454, | |
| "learning_rate": 7.94233205819829e-06, | |
| "loss": 0.8787, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.11467437013200717, | |
| "grad_norm": 0.03871241656337873, | |
| "learning_rate": 7.941740680110343e-06, | |
| "loss": 0.9059, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.11525944344900721, | |
| "grad_norm": 0.041501526897382096, | |
| "learning_rate": 7.941146309948205e-06, | |
| "loss": 0.8946, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.11584451676600724, | |
| "grad_norm": 0.037938175729775744, | |
| "learning_rate": 7.940548948214005e-06, | |
| "loss": 0.8534, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.11642959008300728, | |
| "grad_norm": 0.038697315108935856, | |
| "learning_rate": 7.939948595412394e-06, | |
| "loss": 0.9671, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.11701466340000731, | |
| "grad_norm": 0.040128164685532784, | |
| "learning_rate": 7.939345252050552e-06, | |
| "loss": 0.9452, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.11759973671700735, | |
| "grad_norm": 0.04159138776071103, | |
| "learning_rate": 7.938738918638187e-06, | |
| "loss": 0.9525, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.11818481003400738, | |
| "grad_norm": 0.044356876333774484, | |
| "learning_rate": 7.93812959568753e-06, | |
| "loss": 0.8863, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.11876988335100742, | |
| "grad_norm": 0.04110203273326361, | |
| "learning_rate": 7.93751728371334e-06, | |
| "loss": 0.8585, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.11935495666800745, | |
| "grad_norm": 0.03907623380015084, | |
| "learning_rate": 7.9369019832329e-06, | |
| "loss": 0.8901, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1199400299850075, | |
| "grad_norm": 0.06841747716076531, | |
| "learning_rate": 7.936283694766016e-06, | |
| "loss": 0.9382, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.12052510330200754, | |
| "grad_norm": 0.03864267029597622, | |
| "learning_rate": 7.935662418835023e-06, | |
| "loss": 0.9445, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.12111017661900757, | |
| "grad_norm": 0.04187477888552647, | |
| "learning_rate": 7.935038155964775e-06, | |
| "loss": 0.949, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.12169524993600761, | |
| "grad_norm": 0.03844046778107278, | |
| "learning_rate": 7.934410906682653e-06, | |
| "loss": 0.8185, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.12228032325300764, | |
| "grad_norm": 0.03894778215959397, | |
| "learning_rate": 7.933780671518558e-06, | |
| "loss": 0.8226, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.12286539657000768, | |
| "grad_norm": 0.05316470088962357, | |
| "learning_rate": 7.933147451004914e-06, | |
| "loss": 0.9149, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.12345046988700771, | |
| "grad_norm": 0.04054196015487159, | |
| "learning_rate": 7.932511245676669e-06, | |
| "loss": 0.907, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.12403554320400775, | |
| "grad_norm": 0.04197248747401694, | |
| "learning_rate": 7.931872056071292e-06, | |
| "loss": 0.8974, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.12462061652100778, | |
| "grad_norm": 0.04107563523275673, | |
| "learning_rate": 7.931229882728771e-06, | |
| "loss": 0.8758, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.12520568983800784, | |
| "grad_norm": 0.04898006391812857, | |
| "learning_rate": 7.930584726191616e-06, | |
| "loss": 0.9015, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.12579076315500787, | |
| "grad_norm": 0.053828637223887026, | |
| "learning_rate": 7.92993658700486e-06, | |
| "loss": 0.9095, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.1263758364720079, | |
| "grad_norm": 0.041596264391364365, | |
| "learning_rate": 7.929285465716051e-06, | |
| "loss": 0.9324, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.12696090978900793, | |
| "grad_norm": 0.04270202089537964, | |
| "learning_rate": 7.928631362875258e-06, | |
| "loss": 0.9712, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.12754598310600798, | |
| "grad_norm": 0.04508600982672278, | |
| "learning_rate": 7.927974279035069e-06, | |
| "loss": 0.8526, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.128131056423008, | |
| "grad_norm": 0.04210505656160864, | |
| "learning_rate": 7.927314214750592e-06, | |
| "loss": 0.8483, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.12871612974000804, | |
| "grad_norm": 0.04228456764136032, | |
| "learning_rate": 7.926651170579451e-06, | |
| "loss": 1.0049, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.12930120305700807, | |
| "grad_norm": 0.039866451292078504, | |
| "learning_rate": 7.92598514708179e-06, | |
| "loss": 0.9421, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.12988627637400812, | |
| "grad_norm": 0.037613132725661406, | |
| "learning_rate": 7.925316144820263e-06, | |
| "loss": 0.9474, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.13047134969100815, | |
| "grad_norm": 0.04168086524600805, | |
| "learning_rate": 7.92464416436005e-06, | |
| "loss": 0.9058, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.13105642300800818, | |
| "grad_norm": 0.039621964711338775, | |
| "learning_rate": 7.923969206268839e-06, | |
| "loss": 0.9086, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.13164149632500824, | |
| "grad_norm": 0.05209282643387043, | |
| "learning_rate": 7.923291271116838e-06, | |
| "loss": 0.9298, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.13222656964200827, | |
| "grad_norm": 0.044258185244179175, | |
| "learning_rate": 7.92261035947677e-06, | |
| "loss": 0.8925, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1328116429590083, | |
| "grad_norm": 0.039844108391859055, | |
| "learning_rate": 7.92192647192387e-06, | |
| "loss": 0.8392, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.13339671627600833, | |
| "grad_norm": 0.044744651789733476, | |
| "learning_rate": 7.92123960903589e-06, | |
| "loss": 0.9329, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.13398178959300838, | |
| "grad_norm": 0.04494192821446448, | |
| "learning_rate": 7.92054977139309e-06, | |
| "loss": 0.8606, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.1345668629100084, | |
| "grad_norm": 0.05863838322698434, | |
| "learning_rate": 7.919856959578252e-06, | |
| "loss": 0.9302, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.13515193622700844, | |
| "grad_norm": 0.04085984556832983, | |
| "learning_rate": 7.919161174176663e-06, | |
| "loss": 0.9993, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.1357370095440085, | |
| "grad_norm": 0.04500674877406008, | |
| "learning_rate": 7.918462415776125e-06, | |
| "loss": 0.9377, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.13632208286100853, | |
| "grad_norm": 0.039173528958006375, | |
| "learning_rate": 7.917760684966955e-06, | |
| "loss": 0.813, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.13690715617800855, | |
| "grad_norm": 0.04298295272921228, | |
| "learning_rate": 7.91705598234197e-06, | |
| "loss": 0.8952, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.13749222949500858, | |
| "grad_norm": 0.04028408438122686, | |
| "learning_rate": 7.916348308496513e-06, | |
| "loss": 0.9051, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.13807730281200864, | |
| "grad_norm": 0.06743695497866435, | |
| "learning_rate": 7.915637664028423e-06, | |
| "loss": 0.9475, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.13866237612900867, | |
| "grad_norm": 0.03949575625475006, | |
| "learning_rate": 7.914924049538061e-06, | |
| "loss": 0.9316, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.1392474494460087, | |
| "grad_norm": 0.04030623921903529, | |
| "learning_rate": 7.914207465628284e-06, | |
| "loss": 0.885, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.13983252276300873, | |
| "grad_norm": 0.047528726409039795, | |
| "learning_rate": 7.91348791290447e-06, | |
| "loss": 0.918, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.14041759608000878, | |
| "grad_norm": 0.04466991571391728, | |
| "learning_rate": 7.912765391974496e-06, | |
| "loss": 0.9305, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.1410026693970088, | |
| "grad_norm": 0.04186868219039162, | |
| "learning_rate": 7.912039903448752e-06, | |
| "loss": 0.9415, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.14158774271400884, | |
| "grad_norm": 0.05801571080351748, | |
| "learning_rate": 7.91131144794013e-06, | |
| "loss": 0.8787, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.1421728160310089, | |
| "grad_norm": 0.045088484806881386, | |
| "learning_rate": 7.910580026064038e-06, | |
| "loss": 1.0604, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.14275788934800893, | |
| "grad_norm": 0.06574863742707004, | |
| "learning_rate": 7.909845638438377e-06, | |
| "loss": 0.9216, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.14334296266500896, | |
| "grad_norm": 0.0372095564143721, | |
| "learning_rate": 7.909108285683563e-06, | |
| "loss": 0.9336, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.14392803598200898, | |
| "grad_norm": 0.04397627871472844, | |
| "learning_rate": 7.908367968422515e-06, | |
| "loss": 0.9261, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.14451310929900904, | |
| "grad_norm": 0.04276892267104892, | |
| "learning_rate": 7.907624687280654e-06, | |
| "loss": 0.8673, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.14509818261600907, | |
| "grad_norm": 0.045304865664725656, | |
| "learning_rate": 7.906878442885907e-06, | |
| "loss": 0.9475, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1456832559330091, | |
| "grad_norm": 0.04370076931511423, | |
| "learning_rate": 7.906129235868702e-06, | |
| "loss": 0.8433, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.14626832925000915, | |
| "grad_norm": 0.0672588807453601, | |
| "learning_rate": 7.905377066861973e-06, | |
| "loss": 0.9124, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.14685340256700918, | |
| "grad_norm": 0.046912566764935076, | |
| "learning_rate": 7.904621936501156e-06, | |
| "loss": 0.9044, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.1474384758840092, | |
| "grad_norm": 0.07278415346178851, | |
| "learning_rate": 7.903863845424185e-06, | |
| "loss": 0.901, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.14802354920100924, | |
| "grad_norm": 0.04218918671820467, | |
| "learning_rate": 7.9031027942715e-06, | |
| "loss": 0.9019, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.1486086225180093, | |
| "grad_norm": 0.03858325311022567, | |
| "learning_rate": 7.90233878368604e-06, | |
| "loss": 0.8601, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.14919369583500933, | |
| "grad_norm": 0.04209886442727145, | |
| "learning_rate": 7.90157181431324e-06, | |
| "loss": 0.8681, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.14977876915200936, | |
| "grad_norm": 0.049400812285918, | |
| "learning_rate": 7.90080188680104e-06, | |
| "loss": 0.9494, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.15036384246900938, | |
| "grad_norm": 0.047941941474994906, | |
| "learning_rate": 7.900029001799882e-06, | |
| "loss": 0.8439, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.15094891578600944, | |
| "grad_norm": 0.042101153780900284, | |
| "learning_rate": 7.899253159962694e-06, | |
| "loss": 0.917, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.15153398910300947, | |
| "grad_norm": 0.040583990176968884, | |
| "learning_rate": 7.898474361944915e-06, | |
| "loss": 0.9328, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.1521190624200095, | |
| "grad_norm": 0.04000469028913075, | |
| "learning_rate": 7.897692608404474e-06, | |
| "loss": 0.9454, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.15270413573700956, | |
| "grad_norm": 0.04819787772789072, | |
| "learning_rate": 7.8969079000018e-06, | |
| "loss": 1.0061, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.15328920905400958, | |
| "grad_norm": 0.04670811295251141, | |
| "learning_rate": 7.896120237399817e-06, | |
| "loss": 0.8678, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.1538742823710096, | |
| "grad_norm": 0.03876277230266676, | |
| "learning_rate": 7.895329621263945e-06, | |
| "loss": 0.9465, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.15445935568800964, | |
| "grad_norm": 0.06917560168966953, | |
| "learning_rate": 7.894536052262098e-06, | |
| "loss": 0.9114, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.1550444290050097, | |
| "grad_norm": 0.052197549375990714, | |
| "learning_rate": 7.893739531064688e-06, | |
| "loss": 0.8966, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.15562950232200973, | |
| "grad_norm": 0.05089520469658166, | |
| "learning_rate": 7.892940058344615e-06, | |
| "loss": 0.8403, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.15621457563900976, | |
| "grad_norm": 0.04200303069403226, | |
| "learning_rate": 7.89213763477728e-06, | |
| "loss": 0.7954, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.1567996489560098, | |
| "grad_norm": 0.04045997821257577, | |
| "learning_rate": 7.89133226104057e-06, | |
| "loss": 0.9484, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.15738472227300984, | |
| "grad_norm": 0.04355833968928891, | |
| "learning_rate": 7.890523937814872e-06, | |
| "loss": 0.8871, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.15796979559000987, | |
| "grad_norm": 0.038980722774793516, | |
| "learning_rate": 7.889712665783055e-06, | |
| "loss": 0.8242, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.1585548689070099, | |
| "grad_norm": 0.0417431555190703, | |
| "learning_rate": 7.888898445630486e-06, | |
| "loss": 0.918, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.15913994222400996, | |
| "grad_norm": 0.04420422353440596, | |
| "learning_rate": 7.888081278045022e-06, | |
| "loss": 0.9345, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.15972501554100998, | |
| "grad_norm": 0.043453256463112454, | |
| "learning_rate": 7.88726116371701e-06, | |
| "loss": 0.8439, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.16031008885801, | |
| "grad_norm": 0.04018335492133053, | |
| "learning_rate": 7.88643810333928e-06, | |
| "loss": 0.9024, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.16089516217501004, | |
| "grad_norm": 0.03885288803364117, | |
| "learning_rate": 7.885612097607161e-06, | |
| "loss": 0.9005, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.1614802354920101, | |
| "grad_norm": 0.03940189643105726, | |
| "learning_rate": 7.884783147218464e-06, | |
| "loss": 0.8726, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.16206530880901013, | |
| "grad_norm": 0.04322545289358093, | |
| "learning_rate": 7.88395125287349e-06, | |
| "loss": 0.9309, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.16265038212601016, | |
| "grad_norm": 0.03991615308835092, | |
| "learning_rate": 7.883116415275022e-06, | |
| "loss": 0.9319, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.1632354554430102, | |
| "grad_norm": 0.04774591346437184, | |
| "learning_rate": 7.882278635128339e-06, | |
| "loss": 0.9976, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.16382052876001024, | |
| "grad_norm": 0.043726280435400645, | |
| "learning_rate": 7.881437913141196e-06, | |
| "loss": 0.9041, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.16440560207701027, | |
| "grad_norm": 0.05349577122745657, | |
| "learning_rate": 7.880594250023842e-06, | |
| "loss": 0.9109, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.1649906753940103, | |
| "grad_norm": 0.04664425873865413, | |
| "learning_rate": 7.879747646489002e-06, | |
| "loss": 0.8872, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.16557574871101036, | |
| "grad_norm": 0.04689991827127376, | |
| "learning_rate": 7.878898103251891e-06, | |
| "loss": 0.946, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.16616082202801039, | |
| "grad_norm": 0.04419788368438809, | |
| "learning_rate": 7.87804562103021e-06, | |
| "loss": 0.8699, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.16674589534501041, | |
| "grad_norm": 0.0498909584325992, | |
| "learning_rate": 7.877190200544131e-06, | |
| "loss": 0.8396, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.16733096866201047, | |
| "grad_norm": 0.04446094526551824, | |
| "learning_rate": 7.876331842516323e-06, | |
| "loss": 0.887, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.1679160419790105, | |
| "grad_norm": 0.06094680175561847, | |
| "learning_rate": 7.875470547671926e-06, | |
| "loss": 0.8834, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.16850111529601053, | |
| "grad_norm": 0.038876474999689326, | |
| "learning_rate": 7.874606316738566e-06, | |
| "loss": 0.8975, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.16908618861301056, | |
| "grad_norm": 0.04076135396799628, | |
| "learning_rate": 7.873739150446349e-06, | |
| "loss": 0.9094, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.16967126193001061, | |
| "grad_norm": 0.04242085203333459, | |
| "learning_rate": 7.872869049527855e-06, | |
| "loss": 0.9346, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.17025633524701064, | |
| "grad_norm": 0.04192270928126719, | |
| "learning_rate": 7.871996014718154e-06, | |
| "loss": 0.916, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.17084140856401067, | |
| "grad_norm": 0.06296131776401025, | |
| "learning_rate": 7.871120046754787e-06, | |
| "loss": 0.7869, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.1714264818810107, | |
| "grad_norm": 0.04208658542590707, | |
| "learning_rate": 7.870241146377773e-06, | |
| "loss": 0.863, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.17201155519801076, | |
| "grad_norm": 0.04221040890826715, | |
| "learning_rate": 7.869359314329613e-06, | |
| "loss": 0.8125, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.17259662851501079, | |
| "grad_norm": 0.044062682914531, | |
| "learning_rate": 7.868474551355277e-06, | |
| "loss": 0.8283, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.17318170183201081, | |
| "grad_norm": 0.053136044791769796, | |
| "learning_rate": 7.867586858202221e-06, | |
| "loss": 0.9321, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.17376677514901087, | |
| "grad_norm": 0.041998835388755755, | |
| "learning_rate": 7.866696235620367e-06, | |
| "loss": 0.9435, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.1743518484660109, | |
| "grad_norm": 0.04432061620173052, | |
| "learning_rate": 7.865802684362119e-06, | |
| "loss": 0.944, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.17493692178301093, | |
| "grad_norm": 0.03816132379303917, | |
| "learning_rate": 7.864906205182347e-06, | |
| "loss": 0.9222, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.17552199510001096, | |
| "grad_norm": 0.04061878988742196, | |
| "learning_rate": 7.864006798838405e-06, | |
| "loss": 0.9344, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.17610706841701101, | |
| "grad_norm": 0.038725999488975066, | |
| "learning_rate": 7.863104466090113e-06, | |
| "loss": 0.9477, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.17669214173401104, | |
| "grad_norm": 0.0675670307551766, | |
| "learning_rate": 7.862199207699763e-06, | |
| "loss": 0.8939, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.17727721505101107, | |
| "grad_norm": 0.04866290151842816, | |
| "learning_rate": 7.861291024432122e-06, | |
| "loss": 0.944, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.17786228836801113, | |
| "grad_norm": 0.04231401792052211, | |
| "learning_rate": 7.860379917054426e-06, | |
| "loss": 0.9108, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.17844736168501116, | |
| "grad_norm": 0.21645206187990054, | |
| "learning_rate": 7.859465886336381e-06, | |
| "loss": 0.9328, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.1790324350020112, | |
| "grad_norm": 0.04212212891416765, | |
| "learning_rate": 7.858548933050162e-06, | |
| "loss": 0.9755, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.17961750831901122, | |
| "grad_norm": 0.047331820432207364, | |
| "learning_rate": 7.857629057970417e-06, | |
| "loss": 0.8702, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.18020258163601127, | |
| "grad_norm": 0.040260553510288316, | |
| "learning_rate": 7.856706261874258e-06, | |
| "loss": 0.8934, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.1807876549530113, | |
| "grad_norm": 0.04326897093604986, | |
| "learning_rate": 7.855780545541264e-06, | |
| "loss": 0.877, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.18137272827001133, | |
| "grad_norm": 0.03936456606263684, | |
| "learning_rate": 7.854851909753487e-06, | |
| "loss": 0.9206, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.18195780158701136, | |
| "grad_norm": 0.040529920237504666, | |
| "learning_rate": 7.853920355295438e-06, | |
| "loss": 0.8469, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.18254287490401142, | |
| "grad_norm": 0.07984199172508148, | |
| "learning_rate": 7.852985882954102e-06, | |
| "loss": 0.856, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.18312794822101144, | |
| "grad_norm": 0.04341308255910565, | |
| "learning_rate": 7.85204849351892e-06, | |
| "loss": 0.8975, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.18371302153801147, | |
| "grad_norm": 0.05163482815007264, | |
| "learning_rate": 7.851108187781802e-06, | |
| "loss": 0.8516, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.18429809485501153, | |
| "grad_norm": 0.04363599571939765, | |
| "learning_rate": 7.850164966537124e-06, | |
| "loss": 0.9088, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.18488316817201156, | |
| "grad_norm": 0.043024652012164384, | |
| "learning_rate": 7.84921883058172e-06, | |
| "loss": 0.8291, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.1854682414890116, | |
| "grad_norm": 0.041659608271311335, | |
| "learning_rate": 7.848269780714892e-06, | |
| "loss": 0.9719, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.18605331480601162, | |
| "grad_norm": 0.04557805725835434, | |
| "learning_rate": 7.847317817738394e-06, | |
| "loss": 0.9638, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.18663838812301167, | |
| "grad_norm": 0.04388413396186285, | |
| "learning_rate": 7.846362942456455e-06, | |
| "loss": 0.93, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.1872234614400117, | |
| "grad_norm": 0.04052950822615268, | |
| "learning_rate": 7.845405155675752e-06, | |
| "loss": 0.8951, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.18780853475701173, | |
| "grad_norm": 0.03852369264523642, | |
| "learning_rate": 7.844444458205428e-06, | |
| "loss": 0.8521, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.1883936080740118, | |
| "grad_norm": 0.045799254500923765, | |
| "learning_rate": 7.843480850857083e-06, | |
| "loss": 0.8966, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.18897868139101182, | |
| "grad_norm": 0.04373024374645557, | |
| "learning_rate": 7.842514334444776e-06, | |
| "loss": 0.973, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.18956375470801184, | |
| "grad_norm": 0.037372310076302304, | |
| "learning_rate": 7.841544909785022e-06, | |
| "loss": 1.0232, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.19014882802501187, | |
| "grad_norm": 0.044911079932608254, | |
| "learning_rate": 7.840572577696798e-06, | |
| "loss": 0.8351, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.19073390134201193, | |
| "grad_norm": 0.04820487217400144, | |
| "learning_rate": 7.839597339001529e-06, | |
| "loss": 0.9381, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.19131897465901196, | |
| "grad_norm": 0.04460622903204633, | |
| "learning_rate": 7.8386191945231e-06, | |
| "loss": 0.9047, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.191904047976012, | |
| "grad_norm": 0.04344227987010105, | |
| "learning_rate": 7.837638145087855e-06, | |
| "loss": 0.8882, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.19248912129301204, | |
| "grad_norm": 0.041162516007716705, | |
| "learning_rate": 7.836654191524583e-06, | |
| "loss": 0.888, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.19307419461001207, | |
| "grad_norm": 0.050217278727892056, | |
| "learning_rate": 7.835667334664533e-06, | |
| "loss": 0.9425, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.1936592679270121, | |
| "grad_norm": 0.045239661384039016, | |
| "learning_rate": 7.834677575341407e-06, | |
| "loss": 0.845, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.19424434124401213, | |
| "grad_norm": 0.04069762643444013, | |
| "learning_rate": 7.833684914391354e-06, | |
| "loss": 0.9045, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.1948294145610122, | |
| "grad_norm": 0.03659391253836006, | |
| "learning_rate": 7.832689352652978e-06, | |
| "loss": 0.8415, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.19541448787801222, | |
| "grad_norm": 0.0478253523475305, | |
| "learning_rate": 7.831690890967332e-06, | |
| "loss": 0.9023, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.19599956119501225, | |
| "grad_norm": 0.03931532297283958, | |
| "learning_rate": 7.830689530177923e-06, | |
| "loss": 0.8757, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.19658463451201227, | |
| "grad_norm": 0.04480666786944768, | |
| "learning_rate": 7.8296852711307e-06, | |
| "loss": 0.8393, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.19716970782901233, | |
| "grad_norm": 0.0734058976406723, | |
| "learning_rate": 7.828678114674066e-06, | |
| "loss": 0.9038, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.19775478114601236, | |
| "grad_norm": 0.03967258990394233, | |
| "learning_rate": 7.827668061658871e-06, | |
| "loss": 0.8009, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.1983398544630124, | |
| "grad_norm": 0.0418553799297778, | |
| "learning_rate": 7.82665511293841e-06, | |
| "loss": 0.8865, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.19892492778001244, | |
| "grad_norm": 0.0384561013270465, | |
| "learning_rate": 7.825639269368426e-06, | |
| "loss": 0.872, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.19951000109701247, | |
| "grad_norm": 0.04182210062250734, | |
| "learning_rate": 7.824620531807106e-06, | |
| "loss": 0.8974, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2000950744140125, | |
| "grad_norm": 0.0402445680829306, | |
| "learning_rate": 7.823598901115085e-06, | |
| "loss": 0.8017, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.20068014773101253, | |
| "grad_norm": 0.03976795416941979, | |
| "learning_rate": 7.822574378155436e-06, | |
| "loss": 0.9298, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.2012652210480126, | |
| "grad_norm": 0.043031163568613286, | |
| "learning_rate": 7.821546963793683e-06, | |
| "loss": 0.9508, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.20185029436501262, | |
| "grad_norm": 0.0485982687560341, | |
| "learning_rate": 7.82051665889779e-06, | |
| "loss": 0.8536, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.20243536768201265, | |
| "grad_norm": 0.04149899672912665, | |
| "learning_rate": 7.819483464338156e-06, | |
| "loss": 0.8767, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.2030204409990127, | |
| "grad_norm": 0.0971131140419449, | |
| "learning_rate": 7.818447380987634e-06, | |
| "loss": 0.9271, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.20360551431601273, | |
| "grad_norm": 0.04183099462603424, | |
| "learning_rate": 7.817408409721506e-06, | |
| "loss": 0.8362, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.20419058763301276, | |
| "grad_norm": 0.03719681766847752, | |
| "learning_rate": 7.8163665514175e-06, | |
| "loss": 0.8544, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.2047756609500128, | |
| "grad_norm": 0.08061763199476867, | |
| "learning_rate": 7.815321806955782e-06, | |
| "loss": 0.8335, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20536073426701285, | |
| "grad_norm": 0.053441055551360056, | |
| "learning_rate": 7.814274177218955e-06, | |
| "loss": 0.8602, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.20594580758401287, | |
| "grad_norm": 0.1775803458323184, | |
| "learning_rate": 7.81322366309206e-06, | |
| "loss": 0.8998, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2065308809010129, | |
| "grad_norm": 0.0637002971995377, | |
| "learning_rate": 7.812170265462573e-06, | |
| "loss": 0.9737, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.20711595421801293, | |
| "grad_norm": 0.04274639590829472, | |
| "learning_rate": 7.81111398522041e-06, | |
| "loss": 1.009, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.207701027535013, | |
| "grad_norm": 0.041282961278004064, | |
| "learning_rate": 7.81005482325792e-06, | |
| "loss": 0.9035, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.20828610085201302, | |
| "grad_norm": 0.04195399372218902, | |
| "learning_rate": 7.808992780469889e-06, | |
| "loss": 0.9128, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.20887117416901305, | |
| "grad_norm": 0.03698717701423449, | |
| "learning_rate": 7.807927857753527e-06, | |
| "loss": 0.8562, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.2094562474860131, | |
| "grad_norm": 0.1081591722204418, | |
| "learning_rate": 7.80686005600849e-06, | |
| "loss": 0.8931, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.21004132080301313, | |
| "grad_norm": 0.04152721891763337, | |
| "learning_rate": 7.80578937613686e-06, | |
| "loss": 0.8633, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.21062639412001316, | |
| "grad_norm": 0.04200660652374662, | |
| "learning_rate": 7.804715819043148e-06, | |
| "loss": 0.8773, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2112114674370132, | |
| "grad_norm": 0.04405111681557889, | |
| "learning_rate": 7.803639385634302e-06, | |
| "loss": 0.8587, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.21179654075401325, | |
| "grad_norm": 0.04571477197647962, | |
| "learning_rate": 7.802560076819694e-06, | |
| "loss": 0.8334, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.21238161407101327, | |
| "grad_norm": 0.041607118224879065, | |
| "learning_rate": 7.80147789351113e-06, | |
| "loss": 0.8739, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.2129666873880133, | |
| "grad_norm": 0.06194034993642153, | |
| "learning_rate": 7.800392836622838e-06, | |
| "loss": 0.8956, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.21355176070501336, | |
| "grad_norm": 0.03798806008002444, | |
| "learning_rate": 7.79930490707148e-06, | |
| "loss": 0.8966, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2141368340220134, | |
| "grad_norm": 0.04739784630899101, | |
| "learning_rate": 7.798214105776146e-06, | |
| "loss": 0.9552, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.21472190733901342, | |
| "grad_norm": 0.04497114482691456, | |
| "learning_rate": 7.797120433658343e-06, | |
| "loss": 0.8666, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.21530698065601345, | |
| "grad_norm": 0.04176901867071411, | |
| "learning_rate": 7.796023891642011e-06, | |
| "loss": 0.9051, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.2158920539730135, | |
| "grad_norm": 0.056698801623745465, | |
| "learning_rate": 7.794924480653513e-06, | |
| "loss": 0.8745, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.21647712729001353, | |
| "grad_norm": 0.04267067239376988, | |
| "learning_rate": 7.793822201621633e-06, | |
| "loss": 0.9129, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.21706220060701356, | |
| "grad_norm": 0.03999203881276761, | |
| "learning_rate": 7.79271705547758e-06, | |
| "loss": 0.8814, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.2176472739240136, | |
| "grad_norm": 0.04206496870855173, | |
| "learning_rate": 7.79160904315499e-06, | |
| "loss": 0.8936, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.21823234724101365, | |
| "grad_norm": 0.040567992793796616, | |
| "learning_rate": 7.79049816558991e-06, | |
| "loss": 0.8961, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.21881742055801368, | |
| "grad_norm": 0.04165915715538525, | |
| "learning_rate": 7.789384423720815e-06, | |
| "loss": 0.901, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2194024938750137, | |
| "grad_norm": 0.04311161567240108, | |
| "learning_rate": 7.788267818488597e-06, | |
| "loss": 0.8571, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.21998756719201376, | |
| "grad_norm": 0.04090984120457054, | |
| "learning_rate": 7.78714835083657e-06, | |
| "loss": 0.879, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.2205726405090138, | |
| "grad_norm": 0.03723653818234615, | |
| "learning_rate": 7.786026021710462e-06, | |
| "loss": 0.8687, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.22115771382601382, | |
| "grad_norm": 0.040732056413017595, | |
| "learning_rate": 7.78490083205842e-06, | |
| "loss": 0.9033, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.22174278714301385, | |
| "grad_norm": 0.03755340315603773, | |
| "learning_rate": 7.783772782831008e-06, | |
| "loss": 0.8919, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.2223278604600139, | |
| "grad_norm": 0.04513035465018611, | |
| "learning_rate": 7.782641874981207e-06, | |
| "loss": 0.8766, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.22291293377701393, | |
| "grad_norm": 0.042927929340526826, | |
| "learning_rate": 7.78150810946441e-06, | |
| "loss": 0.8692, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.22349800709401396, | |
| "grad_norm": 0.03548281821425231, | |
| "learning_rate": 7.780371487238428e-06, | |
| "loss": 0.7295, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.22408308041101402, | |
| "grad_norm": 0.03978070402906236, | |
| "learning_rate": 7.779232009263484e-06, | |
| "loss": 0.8555, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.22466815372801405, | |
| "grad_norm": 0.036709565386030436, | |
| "learning_rate": 7.778089676502209e-06, | |
| "loss": 0.7492, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.22525322704501408, | |
| "grad_norm": 0.04547788495521431, | |
| "learning_rate": 7.776944489919649e-06, | |
| "loss": 0.8334, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.2258383003620141, | |
| "grad_norm": 0.04220442842369723, | |
| "learning_rate": 7.775796450483267e-06, | |
| "loss": 0.8244, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.22642337367901416, | |
| "grad_norm": 0.04778060193840243, | |
| "learning_rate": 7.774645559162927e-06, | |
| "loss": 0.8511, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2270084469960142, | |
| "grad_norm": 0.05014682109980806, | |
| "learning_rate": 7.773491816930904e-06, | |
| "loss": 0.8334, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.22759352031301422, | |
| "grad_norm": 0.04001986651909544, | |
| "learning_rate": 7.772335224761886e-06, | |
| "loss": 0.8224, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.22817859363001425, | |
| "grad_norm": 0.040094874463681825, | |
| "learning_rate": 7.771175783632966e-06, | |
| "loss": 0.9069, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2287636669470143, | |
| "grad_norm": 0.03678306586668835, | |
| "learning_rate": 7.770013494523641e-06, | |
| "loss": 0.8758, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.22934874026401433, | |
| "grad_norm": 0.04444193067206238, | |
| "learning_rate": 7.768848358415819e-06, | |
| "loss": 0.946, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.22993381358101436, | |
| "grad_norm": 0.042386218395158284, | |
| "learning_rate": 7.767680376293811e-06, | |
| "loss": 0.8395, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.23051888689801442, | |
| "grad_norm": 0.051874603825963005, | |
| "learning_rate": 7.766509549144332e-06, | |
| "loss": 0.8867, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.23110396021501445, | |
| "grad_norm": 0.04115935591406729, | |
| "learning_rate": 7.765335877956498e-06, | |
| "loss": 0.8509, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.23168903353201448, | |
| "grad_norm": 0.045377388296697053, | |
| "learning_rate": 7.764159363721833e-06, | |
| "loss": 0.8783, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.2322741068490145, | |
| "grad_norm": 0.03845331275197764, | |
| "learning_rate": 7.762980007434261e-06, | |
| "loss": 0.8721, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.23285918016601456, | |
| "grad_norm": 0.06130607399623932, | |
| "learning_rate": 7.761797810090103e-06, | |
| "loss": 0.896, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.2334442534830146, | |
| "grad_norm": 0.0653248890033106, | |
| "learning_rate": 7.760612772688086e-06, | |
| "loss": 0.9239, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.23402932680001462, | |
| "grad_norm": 0.04386903271065406, | |
| "learning_rate": 7.759424896229329e-06, | |
| "loss": 0.9055, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.23461440011701468, | |
| "grad_norm": 0.05112057938888221, | |
| "learning_rate": 7.758234181717359e-06, | |
| "loss": 0.8179, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.2351994734340147, | |
| "grad_norm": 0.03919834965990091, | |
| "learning_rate": 7.757040630158094e-06, | |
| "loss": 0.9131, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.23578454675101473, | |
| "grad_norm": 0.04080472273046829, | |
| "learning_rate": 7.75584424255985e-06, | |
| "loss": 0.8772, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.23636962006801476, | |
| "grad_norm": 0.036834620421617906, | |
| "learning_rate": 7.754645019933338e-06, | |
| "loss": 0.8155, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.23695469338501482, | |
| "grad_norm": 0.03654317750178369, | |
| "learning_rate": 7.753442963291668e-06, | |
| "loss": 0.8346, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.23753976670201485, | |
| "grad_norm": 0.04519798929105044, | |
| "learning_rate": 7.752238073650339e-06, | |
| "loss": 0.912, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.23812484001901488, | |
| "grad_norm": 0.03826695253269902, | |
| "learning_rate": 7.751030352027246e-06, | |
| "loss": 0.8772, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.2387099133360149, | |
| "grad_norm": 0.04486993387188878, | |
| "learning_rate": 7.749819799442676e-06, | |
| "loss": 0.8826, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.23929498665301496, | |
| "grad_norm": 0.0776681654995442, | |
| "learning_rate": 7.74860641691931e-06, | |
| "loss": 0.8987, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.239880059970015, | |
| "grad_norm": 0.05169098865704706, | |
| "learning_rate": 7.747390205482216e-06, | |
| "loss": 0.7904, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.24046513328701502, | |
| "grad_norm": 0.042053672534972886, | |
| "learning_rate": 7.746171166158855e-06, | |
| "loss": 0.9188, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.24105020660401508, | |
| "grad_norm": 0.044614916290635534, | |
| "learning_rate": 7.744949299979071e-06, | |
| "loss": 0.9118, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.2416352799210151, | |
| "grad_norm": 0.04594381480411999, | |
| "learning_rate": 7.743724607975105e-06, | |
| "loss": 0.8547, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.24222035323801513, | |
| "grad_norm": 0.037664202042716706, | |
| "learning_rate": 7.742497091181578e-06, | |
| "loss": 0.8446, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.24280542655501516, | |
| "grad_norm": 0.04794778502796113, | |
| "learning_rate": 7.741266750635502e-06, | |
| "loss": 0.897, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.24339049987201522, | |
| "grad_norm": 0.04188595026753968, | |
| "learning_rate": 7.740033587376275e-06, | |
| "loss": 0.9061, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.24397557318901525, | |
| "grad_norm": 0.04407359629305258, | |
| "learning_rate": 7.738797602445671e-06, | |
| "loss": 0.9146, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.24456064650601528, | |
| "grad_norm": 0.04077311126531583, | |
| "learning_rate": 7.73755879688786e-06, | |
| "loss": 0.8515, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.24514571982301533, | |
| "grad_norm": 0.055784116703384304, | |
| "learning_rate": 7.736317171749385e-06, | |
| "loss": 0.851, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.24573079314001536, | |
| "grad_norm": 0.049743142106565615, | |
| "learning_rate": 7.735072728079179e-06, | |
| "loss": 0.8718, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2463158664570154, | |
| "grad_norm": 0.04470234941233142, | |
| "learning_rate": 7.73382546692855e-06, | |
| "loss": 0.9624, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.24690093977401542, | |
| "grad_norm": 0.040010409109375616, | |
| "learning_rate": 7.732575389351187e-06, | |
| "loss": 0.8925, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.24748601309101548, | |
| "grad_norm": 0.04513853855792064, | |
| "learning_rate": 7.731322496403161e-06, | |
| "loss": 0.8163, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.2480710864080155, | |
| "grad_norm": 0.04305001532204258, | |
| "learning_rate": 7.730066789142922e-06, | |
| "loss": 0.818, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.24865615972501554, | |
| "grad_norm": 0.0381328984157233, | |
| "learning_rate": 7.728808268631291e-06, | |
| "loss": 0.8655, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.24924123304201556, | |
| "grad_norm": 0.04141777979618345, | |
| "learning_rate": 7.727546935931473e-06, | |
| "loss": 0.8447, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.24982630635901562, | |
| "grad_norm": 0.0441532278076972, | |
| "learning_rate": 7.726282792109049e-06, | |
| "loss": 0.7839, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.2504113796760157, | |
| "grad_norm": 0.04369415131884431, | |
| "learning_rate": 7.725015838231966e-06, | |
| "loss": 0.8375, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.2509964529930157, | |
| "grad_norm": 0.04101978650087937, | |
| "learning_rate": 7.723746075370553e-06, | |
| "loss": 0.8001, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.25158152631001574, | |
| "grad_norm": 0.04974984131766339, | |
| "learning_rate": 7.722473504597512e-06, | |
| "loss": 0.8914, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.25216659962701576, | |
| "grad_norm": 0.042172544165836386, | |
| "learning_rate": 7.721198126987914e-06, | |
| "loss": 0.8266, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.2527516729440158, | |
| "grad_norm": 0.04137736709510405, | |
| "learning_rate": 7.719919943619202e-06, | |
| "loss": 0.9043, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.2533367462610158, | |
| "grad_norm": 0.04296090636863713, | |
| "learning_rate": 7.718638955571187e-06, | |
| "loss": 0.8638, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.25392181957801585, | |
| "grad_norm": 0.04617407215473608, | |
| "learning_rate": 7.717355163926054e-06, | |
| "loss": 0.8223, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.2545068928950159, | |
| "grad_norm": 0.05470709039494341, | |
| "learning_rate": 7.716068569768357e-06, | |
| "loss": 0.8702, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.25509196621201596, | |
| "grad_norm": 0.05481374522257052, | |
| "learning_rate": 7.714779174185011e-06, | |
| "loss": 0.8644, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.255677039529016, | |
| "grad_norm": 0.047154937968022254, | |
| "learning_rate": 7.713486978265303e-06, | |
| "loss": 0.8656, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.256262112846016, | |
| "grad_norm": 0.04351286538068476, | |
| "learning_rate": 7.712191983100885e-06, | |
| "loss": 0.9139, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.25684718616301605, | |
| "grad_norm": 0.036225679985398614, | |
| "learning_rate": 7.710894189785773e-06, | |
| "loss": 0.8589, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.2574322594800161, | |
| "grad_norm": 0.036619323150318614, | |
| "learning_rate": 7.709593599416346e-06, | |
| "loss": 0.8355, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.2580173327970161, | |
| "grad_norm": 0.052556029410115106, | |
| "learning_rate": 7.708290213091348e-06, | |
| "loss": 0.9407, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.25860240611401614, | |
| "grad_norm": 0.04045383405114572, | |
| "learning_rate": 7.706984031911884e-06, | |
| "loss": 0.8224, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.2591874794310162, | |
| "grad_norm": 0.03850958419485924, | |
| "learning_rate": 7.705675056981419e-06, | |
| "loss": 0.8289, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.25977255274801625, | |
| "grad_norm": 0.042025364903943374, | |
| "learning_rate": 7.704363289405782e-06, | |
| "loss": 0.8295, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.2603576260650163, | |
| "grad_norm": 0.18295957147270098, | |
| "learning_rate": 7.703048730293156e-06, | |
| "loss": 0.8591, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.2609426993820163, | |
| "grad_norm": 0.08559741878328085, | |
| "learning_rate": 7.701731380754086e-06, | |
| "loss": 0.8841, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.26152777269901634, | |
| "grad_norm": 0.04030996945096956, | |
| "learning_rate": 7.700411241901473e-06, | |
| "loss": 0.927, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.26211284601601637, | |
| "grad_norm": 0.04080969851872162, | |
| "learning_rate": 7.699088314850574e-06, | |
| "loss": 0.8448, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.2626979193330164, | |
| "grad_norm": 0.0378951860436084, | |
| "learning_rate": 7.697762600719002e-06, | |
| "loss": 0.8806, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.2632829926500165, | |
| "grad_norm": 0.03623506747860633, | |
| "learning_rate": 7.696434100626727e-06, | |
| "loss": 0.8899, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2638680659670165, | |
| "grad_norm": 0.03949638769998007, | |
| "learning_rate": 7.695102815696068e-06, | |
| "loss": 0.896, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.26445313928401654, | |
| "grad_norm": 0.039703852605046996, | |
| "learning_rate": 7.6937687470517e-06, | |
| "loss": 0.9092, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.26503821260101657, | |
| "grad_norm": 0.04067815287205374, | |
| "learning_rate": 7.692431895820648e-06, | |
| "loss": 0.8948, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.2656232859180166, | |
| "grad_norm": 0.0498733499092084, | |
| "learning_rate": 7.691092263132289e-06, | |
| "loss": 0.8407, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.2662083592350166, | |
| "grad_norm": 0.06147495248603913, | |
| "learning_rate": 7.689749850118347e-06, | |
| "loss": 0.8343, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.26679343255201665, | |
| "grad_norm": 0.2184373267208327, | |
| "learning_rate": 7.6884046579129e-06, | |
| "loss": 0.909, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.26737850586901674, | |
| "grad_norm": 0.038431478494613806, | |
| "learning_rate": 7.68705668765237e-06, | |
| "loss": 0.8685, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.26796357918601676, | |
| "grad_norm": 0.15936476995911328, | |
| "learning_rate": 7.685705940475523e-06, | |
| "loss": 0.8176, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.2685486525030168, | |
| "grad_norm": 0.04683404608346142, | |
| "learning_rate": 7.68435241752348e-06, | |
| "loss": 0.8523, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.2691337258200168, | |
| "grad_norm": 0.04603467291785015, | |
| "learning_rate": 7.6829961199397e-06, | |
| "loss": 0.8134, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.26971879913701685, | |
| "grad_norm": 0.0502568206105835, | |
| "learning_rate": 7.681637048869985e-06, | |
| "loss": 0.9043, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.2703038724540169, | |
| "grad_norm": 0.03828249024856938, | |
| "learning_rate": 7.680275205462485e-06, | |
| "loss": 0.843, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.2708889457710169, | |
| "grad_norm": 0.043272538310479496, | |
| "learning_rate": 7.67891059086769e-06, | |
| "loss": 0.9456, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.271474019088017, | |
| "grad_norm": 0.04015211847803301, | |
| "learning_rate": 7.67754320623843e-06, | |
| "loss": 0.9414, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.272059092405017, | |
| "grad_norm": 0.04064423849996686, | |
| "learning_rate": 7.676173052729877e-06, | |
| "loss": 0.8705, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.27264416572201705, | |
| "grad_norm": 0.04969071039366313, | |
| "learning_rate": 7.67480013149954e-06, | |
| "loss": 0.8922, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.2732292390390171, | |
| "grad_norm": 0.05260125052428257, | |
| "learning_rate": 7.67342444370727e-06, | |
| "loss": 0.9733, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.2738143123560171, | |
| "grad_norm": 0.040268950739110694, | |
| "learning_rate": 7.672045990515248e-06, | |
| "loss": 0.8806, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.27439938567301714, | |
| "grad_norm": 0.042577022398831814, | |
| "learning_rate": 7.670664773088e-06, | |
| "loss": 0.9561, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.27498445899001717, | |
| "grad_norm": 0.04618878912895806, | |
| "learning_rate": 7.669280792592383e-06, | |
| "loss": 0.8403, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.2755695323070172, | |
| "grad_norm": 0.04343593914167416, | |
| "learning_rate": 7.667894050197583e-06, | |
| "loss": 0.8355, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.2761546056240173, | |
| "grad_norm": 0.040556959908718666, | |
| "learning_rate": 7.66650454707513e-06, | |
| "loss": 0.8608, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.2767396789410173, | |
| "grad_norm": 0.054697588596535354, | |
| "learning_rate": 7.665112284398881e-06, | |
| "loss": 0.839, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.27732475225801734, | |
| "grad_norm": 0.04393307328991595, | |
| "learning_rate": 7.66371726334502e-06, | |
| "loss": 0.8494, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.27790982557501737, | |
| "grad_norm": 0.04275309783082017, | |
| "learning_rate": 7.662319485092067e-06, | |
| "loss": 0.8259, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.2784948988920174, | |
| "grad_norm": 0.03953402161699839, | |
| "learning_rate": 7.66091895082087e-06, | |
| "loss": 0.7773, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.2790799722090174, | |
| "grad_norm": 0.03860846601474072, | |
| "learning_rate": 7.659515661714608e-06, | |
| "loss": 0.7962, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.27966504552601745, | |
| "grad_norm": 0.05366294387360709, | |
| "learning_rate": 7.658109618958779e-06, | |
| "loss": 0.9233, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.28025011884301754, | |
| "grad_norm": 0.04420018919513911, | |
| "learning_rate": 7.656700823741216e-06, | |
| "loss": 0.8156, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.28083519216001757, | |
| "grad_norm": 0.039049327120388326, | |
| "learning_rate": 7.655289277252074e-06, | |
| "loss": 0.8856, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.2814202654770176, | |
| "grad_norm": 0.26425632822736106, | |
| "learning_rate": 7.653874980683828e-06, | |
| "loss": 0.9503, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.2820053387940176, | |
| "grad_norm": 0.04289481203658443, | |
| "learning_rate": 7.652457935231285e-06, | |
| "loss": 0.8937, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.28259041211101765, | |
| "grad_norm": 0.04293700432214028, | |
| "learning_rate": 7.651038142091568e-06, | |
| "loss": 0.8571, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.2831754854280177, | |
| "grad_norm": 0.038244633724887706, | |
| "learning_rate": 7.649615602464123e-06, | |
| "loss": 0.7906, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.2837605587450177, | |
| "grad_norm": 0.03944291933937196, | |
| "learning_rate": 7.648190317550717e-06, | |
| "loss": 0.926, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.2843456320620178, | |
| "grad_norm": 0.10913228375108804, | |
| "learning_rate": 7.646762288555433e-06, | |
| "loss": 0.8592, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.2849307053790178, | |
| "grad_norm": 0.038286199243376444, | |
| "learning_rate": 7.645331516684676e-06, | |
| "loss": 0.8418, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.28551577869601785, | |
| "grad_norm": 0.043890441209433355, | |
| "learning_rate": 7.643898003147167e-06, | |
| "loss": 0.9614, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.2861008520130179, | |
| "grad_norm": 0.0382147078144481, | |
| "learning_rate": 7.642461749153943e-06, | |
| "loss": 0.83, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.2866859253300179, | |
| "grad_norm": 0.10202631306826099, | |
| "learning_rate": 7.641022755918357e-06, | |
| "loss": 0.9484, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.28727099864701794, | |
| "grad_norm": 0.04017584960145629, | |
| "learning_rate": 7.639581024656072e-06, | |
| "loss": 0.8278, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.28785607196401797, | |
| "grad_norm": 0.05571278346236126, | |
| "learning_rate": 7.638136556585071e-06, | |
| "loss": 0.9093, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.28844114528101805, | |
| "grad_norm": 0.04145030654143878, | |
| "learning_rate": 7.636689352925643e-06, | |
| "loss": 0.913, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.2890262185980181, | |
| "grad_norm": 0.05058778086388885, | |
| "learning_rate": 7.635239414900393e-06, | |
| "loss": 0.9366, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.2896112919150181, | |
| "grad_norm": 0.047476857954303966, | |
| "learning_rate": 7.63378674373423e-06, | |
| "loss": 0.8528, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.29019636523201814, | |
| "grad_norm": 0.043805250682478876, | |
| "learning_rate": 7.632331340654377e-06, | |
| "loss": 0.9953, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.29078143854901817, | |
| "grad_norm": 0.04691649904039108, | |
| "learning_rate": 7.630873206890365e-06, | |
| "loss": 0.8893, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.2913665118660182, | |
| "grad_norm": 0.061366723803107565, | |
| "learning_rate": 7.629412343674026e-06, | |
| "loss": 0.8895, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.2919515851830182, | |
| "grad_norm": 0.052047839285183604, | |
| "learning_rate": 7.627948752239508e-06, | |
| "loss": 0.8322, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.2925366585000183, | |
| "grad_norm": 0.04858401908225131, | |
| "learning_rate": 7.6264824338232515e-06, | |
| "loss": 0.7962, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.29312173181701834, | |
| "grad_norm": 0.05167192214353387, | |
| "learning_rate": 7.625013389664012e-06, | |
| "loss": 0.8178, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.29370680513401837, | |
| "grad_norm": 0.04571347435775933, | |
| "learning_rate": 7.623541621002841e-06, | |
| "loss": 0.9531, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.2942918784510184, | |
| "grad_norm": 0.05170428092011692, | |
| "learning_rate": 7.622067129083092e-06, | |
| "loss": 0.863, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.2948769517680184, | |
| "grad_norm": 0.04769020808960418, | |
| "learning_rate": 7.620589915150423e-06, | |
| "loss": 0.8693, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.29546202508501845, | |
| "grad_norm": 0.036447168096671895, | |
| "learning_rate": 7.619109980452789e-06, | |
| "loss": 0.8263, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.2960470984020185, | |
| "grad_norm": 0.04697422924818202, | |
| "learning_rate": 7.617627326240441e-06, | |
| "loss": 0.9127, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.29663217171901857, | |
| "grad_norm": 0.05026264821034648, | |
| "learning_rate": 7.6161419537659345e-06, | |
| "loss": 0.9163, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.2972172450360186, | |
| "grad_norm": 0.044605972655997896, | |
| "learning_rate": 7.614653864284114e-06, | |
| "loss": 0.8998, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.2978023183530186, | |
| "grad_norm": 0.06437245062396214, | |
| "learning_rate": 7.613163059052123e-06, | |
| "loss": 0.8039, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.29838739167001865, | |
| "grad_norm": 0.04554747767372419, | |
| "learning_rate": 7.611669539329398e-06, | |
| "loss": 0.8662, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.2989724649870187, | |
| "grad_norm": 0.0802330744693163, | |
| "learning_rate": 7.610173306377671e-06, | |
| "loss": 0.8343, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.2995575383040187, | |
| "grad_norm": 0.04192153945182111, | |
| "learning_rate": 7.608674361460963e-06, | |
| "loss": 0.8983, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.30014261162101874, | |
| "grad_norm": 0.04902593536059904, | |
| "learning_rate": 7.607172705845589e-06, | |
| "loss": 0.9242, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.30072768493801877, | |
| "grad_norm": 0.05436876049704265, | |
| "learning_rate": 7.605668340800153e-06, | |
| "loss": 0.834, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.30131275825501885, | |
| "grad_norm": 0.04047347680328035, | |
| "learning_rate": 7.604161267595545e-06, | |
| "loss": 0.8359, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.3018978315720189, | |
| "grad_norm": 0.03848909199643286, | |
| "learning_rate": 7.602651487504946e-06, | |
| "loss": 0.8126, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.3024829048890189, | |
| "grad_norm": 0.046207806514092946, | |
| "learning_rate": 7.601139001803825e-06, | |
| "loss": 0.869, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.30306797820601894, | |
| "grad_norm": 0.03852808749767389, | |
| "learning_rate": 7.5996238117699344e-06, | |
| "loss": 0.9808, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.30365305152301897, | |
| "grad_norm": 0.04005257123878126, | |
| "learning_rate": 7.5981059186833114e-06, | |
| "loss": 0.7767, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.304238124840019, | |
| "grad_norm": 0.05159848088030845, | |
| "learning_rate": 7.596585323826277e-06, | |
| "loss": 0.8932, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.304823198157019, | |
| "grad_norm": 0.040391024202221286, | |
| "learning_rate": 7.595062028483434e-06, | |
| "loss": 0.9093, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.3054082714740191, | |
| "grad_norm": 0.040428764785829324, | |
| "learning_rate": 7.593536033941669e-06, | |
| "loss": 0.9087, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.30599334479101914, | |
| "grad_norm": 0.09158364294178328, | |
| "learning_rate": 7.592007341490145e-06, | |
| "loss": 0.8601, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.30657841810801917, | |
| "grad_norm": 0.04136317840574456, | |
| "learning_rate": 7.590475952420309e-06, | |
| "loss": 0.8953, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.3071634914250192, | |
| "grad_norm": 0.06396409974499655, | |
| "learning_rate": 7.588941868025881e-06, | |
| "loss": 0.8297, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3077485647420192, | |
| "grad_norm": 0.040333070246341814, | |
| "learning_rate": 7.587405089602862e-06, | |
| "loss": 0.7719, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.30833363805901925, | |
| "grad_norm": 0.04148918223122552, | |
| "learning_rate": 7.585865618449528e-06, | |
| "loss": 0.8007, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.3089187113760193, | |
| "grad_norm": 0.048132396423435676, | |
| "learning_rate": 7.584323455866427e-06, | |
| "loss": 0.8579, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.30950378469301937, | |
| "grad_norm": 0.04328208405834297, | |
| "learning_rate": 7.582778603156387e-06, | |
| "loss": 0.8071, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.3100888580100194, | |
| "grad_norm": 0.04318172969759895, | |
| "learning_rate": 7.5812310616245e-06, | |
| "loss": 0.83, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.3106739313270194, | |
| "grad_norm": 0.03768227605900526, | |
| "learning_rate": 7.579680832578137e-06, | |
| "loss": 0.8344, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.31125900464401945, | |
| "grad_norm": 0.04790982152353994, | |
| "learning_rate": 7.578127917326936e-06, | |
| "loss": 0.8974, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.3118440779610195, | |
| "grad_norm": 0.038002618502432514, | |
| "learning_rate": 7.576572317182805e-06, | |
| "loss": 0.792, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.3124291512780195, | |
| "grad_norm": 0.036571897121817694, | |
| "learning_rate": 7.575014033459921e-06, | |
| "loss": 0.8418, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.31301422459501954, | |
| "grad_norm": 0.03670017559323694, | |
| "learning_rate": 7.573453067474724e-06, | |
| "loss": 0.8834, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.3135992979120196, | |
| "grad_norm": 0.03621350431630049, | |
| "learning_rate": 7.5718894205459284e-06, | |
| "loss": 0.8549, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.31418437122901965, | |
| "grad_norm": 0.04296675197248163, | |
| "learning_rate": 7.570323093994503e-06, | |
| "loss": 0.7894, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.3147694445460197, | |
| "grad_norm": 0.044503765611357125, | |
| "learning_rate": 7.568754089143688e-06, | |
| "loss": 0.8655, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.3153545178630197, | |
| "grad_norm": 0.03715684392321602, | |
| "learning_rate": 7.5671824073189845e-06, | |
| "loss": 0.8697, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.31593959118001974, | |
| "grad_norm": 0.04056871817206291, | |
| "learning_rate": 7.5656080498481535e-06, | |
| "loss": 0.8803, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.31652466449701977, | |
| "grad_norm": 0.08089424036325778, | |
| "learning_rate": 7.564031018061219e-06, | |
| "loss": 0.8098, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.3171097378140198, | |
| "grad_norm": 0.04027917264460558, | |
| "learning_rate": 7.562451313290459e-06, | |
| "loss": 0.8939, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.3176948111310199, | |
| "grad_norm": 0.042852152415068484, | |
| "learning_rate": 7.560868936870418e-06, | |
| "loss": 0.8901, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.3182798844480199, | |
| "grad_norm": 0.05886634493495861, | |
| "learning_rate": 7.559283890137889e-06, | |
| "loss": 0.9286, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.31886495776501994, | |
| "grad_norm": 0.04216130498342499, | |
| "learning_rate": 7.557696174431927e-06, | |
| "loss": 0.8311, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.31945003108201997, | |
| "grad_norm": 0.053197981320438834, | |
| "learning_rate": 7.556105791093838e-06, | |
| "loss": 0.8952, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.32003510439902, | |
| "grad_norm": 0.038738558056749535, | |
| "learning_rate": 7.554512741467183e-06, | |
| "loss": 0.87, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.32062017771602, | |
| "grad_norm": 0.04255317774837396, | |
| "learning_rate": 7.552917026897778e-06, | |
| "loss": 0.8386, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.32120525103302006, | |
| "grad_norm": 0.06530091195329979, | |
| "learning_rate": 7.551318648733684e-06, | |
| "loss": 0.8862, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.3217903243500201, | |
| "grad_norm": 0.04640020878919504, | |
| "learning_rate": 7.549717608325219e-06, | |
| "loss": 0.8615, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.32237539766702017, | |
| "grad_norm": 0.038491606071055146, | |
| "learning_rate": 7.548113907024948e-06, | |
| "loss": 0.8581, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.3229604709840202, | |
| "grad_norm": 0.05470482648340031, | |
| "learning_rate": 7.54650754618768e-06, | |
| "loss": 0.8104, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3235455443010202, | |
| "grad_norm": 0.04194329994008872, | |
| "learning_rate": 7.544898527170475e-06, | |
| "loss": 0.7725, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.32413061761802026, | |
| "grad_norm": 0.059518370147140835, | |
| "learning_rate": 7.543286851332641e-06, | |
| "loss": 0.8814, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3247156909350203, | |
| "grad_norm": 0.041939649514720574, | |
| "learning_rate": 7.5416725200357215e-06, | |
| "loss": 0.8785, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3253007642520203, | |
| "grad_norm": 0.038119584353072236, | |
| "learning_rate": 7.540055534643512e-06, | |
| "loss": 0.8787, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.32588583756902034, | |
| "grad_norm": 0.35043742170562575, | |
| "learning_rate": 7.538435896522048e-06, | |
| "loss": 0.9425, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.3264709108860204, | |
| "grad_norm": 0.037804212753556964, | |
| "learning_rate": 7.536813607039603e-06, | |
| "loss": 0.9275, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.32705598420302046, | |
| "grad_norm": 0.04797991190424691, | |
| "learning_rate": 7.535188667566693e-06, | |
| "loss": 0.8349, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.3276410575200205, | |
| "grad_norm": 0.07131490446157496, | |
| "learning_rate": 7.533561079476073e-06, | |
| "loss": 0.7922, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.3282261308370205, | |
| "grad_norm": 0.04273451205950798, | |
| "learning_rate": 7.531930844142734e-06, | |
| "loss": 0.9063, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.32881120415402054, | |
| "grad_norm": 0.04761650491169362, | |
| "learning_rate": 7.5302979629439044e-06, | |
| "loss": 0.874, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.32939627747102057, | |
| "grad_norm": 0.053621412028226095, | |
| "learning_rate": 7.528662437259048e-06, | |
| "loss": 0.8908, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.3299813507880206, | |
| "grad_norm": 0.08172735146274197, | |
| "learning_rate": 7.527024268469862e-06, | |
| "loss": 0.7873, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.3305664241050207, | |
| "grad_norm": 0.045936897060612714, | |
| "learning_rate": 7.525383457960277e-06, | |
| "loss": 0.8582, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.3311514974220207, | |
| "grad_norm": 0.043038357564032, | |
| "learning_rate": 7.523740007116453e-06, | |
| "loss": 0.8864, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.33173657073902074, | |
| "grad_norm": 0.0595338473222242, | |
| "learning_rate": 7.5220939173267855e-06, | |
| "loss": 0.8398, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.33232164405602077, | |
| "grad_norm": 0.03876298862367819, | |
| "learning_rate": 7.520445189981897e-06, | |
| "loss": 0.7608, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.3329067173730208, | |
| "grad_norm": 0.04137701061941132, | |
| "learning_rate": 7.518793826474636e-06, | |
| "loss": 0.9109, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.33349179069002083, | |
| "grad_norm": 0.04054950689452163, | |
| "learning_rate": 7.517139828200079e-06, | |
| "loss": 0.9109, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.33407686400702086, | |
| "grad_norm": 0.045942175432546556, | |
| "learning_rate": 7.5154831965555315e-06, | |
| "loss": 0.8569, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.33466193732402094, | |
| "grad_norm": 0.0412369088341243, | |
| "learning_rate": 7.51382393294052e-06, | |
| "loss": 0.837, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.33524701064102097, | |
| "grad_norm": 0.03984087759584128, | |
| "learning_rate": 7.5121620387567955e-06, | |
| "loss": 0.8173, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.335832083958021, | |
| "grad_norm": 0.03855595716614889, | |
| "learning_rate": 7.510497515408333e-06, | |
| "loss": 0.7942, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.33641715727502103, | |
| "grad_norm": 0.04123539453768172, | |
| "learning_rate": 7.508830364301327e-06, | |
| "loss": 0.9384, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.33700223059202106, | |
| "grad_norm": 0.047137253838708155, | |
| "learning_rate": 7.507160586844191e-06, | |
| "loss": 0.8449, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.3375873039090211, | |
| "grad_norm": 0.03987110874806033, | |
| "learning_rate": 7.50548818444756e-06, | |
| "loss": 0.8678, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.3381723772260211, | |
| "grad_norm": 0.05156972953667277, | |
| "learning_rate": 7.503813158524284e-06, | |
| "loss": 0.8369, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.3387574505430212, | |
| "grad_norm": 0.05429963825180294, | |
| "learning_rate": 7.502135510489432e-06, | |
| "loss": 0.9386, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.33934252386002123, | |
| "grad_norm": 0.04563390307441152, | |
| "learning_rate": 7.500455241760284e-06, | |
| "loss": 0.7892, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.33992759717702126, | |
| "grad_norm": 0.03772537720365855, | |
| "learning_rate": 7.4987723537563395e-06, | |
| "loss": 0.8171, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.3405126704940213, | |
| "grad_norm": 0.14591090698165343, | |
| "learning_rate": 7.497086847899305e-06, | |
| "loss": 0.9374, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.3410977438110213, | |
| "grad_norm": 0.04274276349409338, | |
| "learning_rate": 7.495398725613103e-06, | |
| "loss": 0.8886, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.34168281712802134, | |
| "grad_norm": 0.045419293797600416, | |
| "learning_rate": 7.4937079883238644e-06, | |
| "loss": 0.8874, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.34226789044502137, | |
| "grad_norm": 0.039369245868335276, | |
| "learning_rate": 7.4920146374599305e-06, | |
| "loss": 0.8515, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.3428529637620214, | |
| "grad_norm": 0.041038361870929824, | |
| "learning_rate": 7.490318674451848e-06, | |
| "loss": 0.8352, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.3434380370790215, | |
| "grad_norm": 0.03546313415290626, | |
| "learning_rate": 7.488620100732373e-06, | |
| "loss": 0.8126, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.3440231103960215, | |
| "grad_norm": 0.047686593783821035, | |
| "learning_rate": 7.486918917736467e-06, | |
| "loss": 0.8438, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.34460818371302154, | |
| "grad_norm": 0.03860874121220213, | |
| "learning_rate": 7.485215126901294e-06, | |
| "loss": 0.796, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.34519325703002157, | |
| "grad_norm": 0.04131878299287184, | |
| "learning_rate": 7.483508729666222e-06, | |
| "loss": 0.8787, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.3457783303470216, | |
| "grad_norm": 0.0626727371222094, | |
| "learning_rate": 7.481799727472821e-06, | |
| "loss": 0.8556, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.34636340366402163, | |
| "grad_norm": 0.046182038395419095, | |
| "learning_rate": 7.480088121764862e-06, | |
| "loss": 0.8362, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.34694847698102166, | |
| "grad_norm": 0.04057968397212824, | |
| "learning_rate": 7.478373913988314e-06, | |
| "loss": 0.8382, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.34753355029802174, | |
| "grad_norm": 0.04060993045831877, | |
| "learning_rate": 7.476657105591347e-06, | |
| "loss": 0.844, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.34811862361502177, | |
| "grad_norm": 0.042757904869809, | |
| "learning_rate": 7.474937698024326e-06, | |
| "loss": 0.8323, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.3487036969320218, | |
| "grad_norm": 0.042358138791525404, | |
| "learning_rate": 7.4732156927398134e-06, | |
| "loss": 0.8055, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.34928877024902183, | |
| "grad_norm": 0.04569798776629126, | |
| "learning_rate": 7.4714910911925614e-06, | |
| "loss": 0.7941, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.34987384356602186, | |
| "grad_norm": 0.0438874811170573, | |
| "learning_rate": 7.469763894839523e-06, | |
| "loss": 0.919, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.3504589168830219, | |
| "grad_norm": 0.06653725512945165, | |
| "learning_rate": 7.468034105139836e-06, | |
| "loss": 0.8574, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.3510439902000219, | |
| "grad_norm": 0.04995025638554342, | |
| "learning_rate": 7.466301723554835e-06, | |
| "loss": 0.8496, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.351629063517022, | |
| "grad_norm": 0.04283576163163911, | |
| "learning_rate": 7.46456675154804e-06, | |
| "loss": 0.8757, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.35221413683402203, | |
| "grad_norm": 0.039849657265063225, | |
| "learning_rate": 7.462829190585162e-06, | |
| "loss": 0.8945, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.35279921015102206, | |
| "grad_norm": 0.0440391834022003, | |
| "learning_rate": 7.461089042134098e-06, | |
| "loss": 0.8571, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.3533842834680221, | |
| "grad_norm": 0.043132549255425784, | |
| "learning_rate": 7.45934630766493e-06, | |
| "loss": 0.8668, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.3539693567850221, | |
| "grad_norm": 0.04249197374690922, | |
| "learning_rate": 7.4576009886499285e-06, | |
| "loss": 0.8797, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.35455443010202214, | |
| "grad_norm": 0.05324832438497396, | |
| "learning_rate": 7.455853086563542e-06, | |
| "loss": 0.9684, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.3551395034190222, | |
| "grad_norm": 0.04904764924574238, | |
| "learning_rate": 7.454102602882405e-06, | |
| "loss": 0.7753, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.35572457673602226, | |
| "grad_norm": 0.039014384352298336, | |
| "learning_rate": 7.452349539085334e-06, | |
| "loss": 0.8561, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.3563096500530223, | |
| "grad_norm": 0.03934058564150346, | |
| "learning_rate": 7.4505938966533175e-06, | |
| "loss": 0.8438, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.3568947233700223, | |
| "grad_norm": 0.05279799201688822, | |
| "learning_rate": 7.448835677069536e-06, | |
| "loss": 0.8912, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.35747979668702234, | |
| "grad_norm": 0.04001456755669222, | |
| "learning_rate": 7.447074881819332e-06, | |
| "loss": 0.8553, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.3580648700040224, | |
| "grad_norm": 0.042425970420024564, | |
| "learning_rate": 7.445311512390233e-06, | |
| "loss": 0.8327, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.3586499433210224, | |
| "grad_norm": 0.061113675404079935, | |
| "learning_rate": 7.443545570271942e-06, | |
| "loss": 0.8842, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.35923501663802243, | |
| "grad_norm": 0.046390573358408176, | |
| "learning_rate": 7.44177705695633e-06, | |
| "loss": 0.869, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.3598200899550225, | |
| "grad_norm": 0.04323809663682179, | |
| "learning_rate": 7.440005973937445e-06, | |
| "loss": 0.9184, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.36040516327202254, | |
| "grad_norm": 0.04915044460354856, | |
| "learning_rate": 7.4382323227115e-06, | |
| "loss": 0.8376, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.3609902365890226, | |
| "grad_norm": 0.0385546720018955, | |
| "learning_rate": 7.436456104776885e-06, | |
| "loss": 0.8515, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.3615753099060226, | |
| "grad_norm": 0.042848988320383044, | |
| "learning_rate": 7.4346773216341545e-06, | |
| "loss": 0.867, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.36216038322302263, | |
| "grad_norm": 0.04584623182025803, | |
| "learning_rate": 7.432895974786029e-06, | |
| "loss": 0.8974, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.36274545654002266, | |
| "grad_norm": 0.04282342241688822, | |
| "learning_rate": 7.431112065737397e-06, | |
| "loss": 0.8623, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.3633305298570227, | |
| "grad_norm": 0.04076113650735267, | |
| "learning_rate": 7.429325595995311e-06, | |
| "loss": 0.8682, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.3639156031740227, | |
| "grad_norm": 0.05126027130856797, | |
| "learning_rate": 7.427536567068985e-06, | |
| "loss": 0.906, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.3645006764910228, | |
| "grad_norm": 0.049318751888490414, | |
| "learning_rate": 7.4257449804697975e-06, | |
| "loss": 0.8397, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.36508574980802283, | |
| "grad_norm": 0.043254726523200684, | |
| "learning_rate": 7.423950837711287e-06, | |
| "loss": 0.8622, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.36567082312502286, | |
| "grad_norm": 0.05038943712513383, | |
| "learning_rate": 7.422154140309151e-06, | |
| "loss": 0.8775, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3662558964420229, | |
| "grad_norm": 0.04134166981874639, | |
| "learning_rate": 7.420354889781245e-06, | |
| "loss": 0.8226, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.3668409697590229, | |
| "grad_norm": 0.03866147658091302, | |
| "learning_rate": 7.418553087647582e-06, | |
| "loss": 0.8664, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.36742604307602295, | |
| "grad_norm": 0.038894973852294755, | |
| "learning_rate": 7.416748735430332e-06, | |
| "loss": 0.8796, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.368011116393023, | |
| "grad_norm": 0.05664602547094577, | |
| "learning_rate": 7.4149418346538144e-06, | |
| "loss": 0.9451, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.36859618971002306, | |
| "grad_norm": 0.03682659768264277, | |
| "learning_rate": 7.413132386844507e-06, | |
| "loss": 0.7877, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.3691812630270231, | |
| "grad_norm": 0.04070102583713315, | |
| "learning_rate": 7.411320393531038e-06, | |
| "loss": 0.8647, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.3697663363440231, | |
| "grad_norm": 0.037583772238652974, | |
| "learning_rate": 7.4095058562441835e-06, | |
| "loss": 0.8984, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.37035140966102315, | |
| "grad_norm": 0.03745618837783022, | |
| "learning_rate": 7.407688776516873e-06, | |
| "loss": 0.8077, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.3709364829780232, | |
| "grad_norm": 0.04719274188930425, | |
| "learning_rate": 7.405869155884178e-06, | |
| "loss": 0.7846, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.3715215562950232, | |
| "grad_norm": 0.04726483533088948, | |
| "learning_rate": 7.404046995883322e-06, | |
| "loss": 0.9625, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.37210662961202323, | |
| "grad_norm": 0.03718098121534363, | |
| "learning_rate": 7.402222298053672e-06, | |
| "loss": 0.7673, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.3726917029290233, | |
| "grad_norm": 0.041699609984716224, | |
| "learning_rate": 7.400395063936738e-06, | |
| "loss": 0.8846, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.37327677624602335, | |
| "grad_norm": 0.03781340458428561, | |
| "learning_rate": 7.3985652950761734e-06, | |
| "loss": 0.8589, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.3738618495630234, | |
| "grad_norm": 0.05169204738245716, | |
| "learning_rate": 7.396732993017774e-06, | |
| "loss": 0.8737, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.3744469228800234, | |
| "grad_norm": 0.040199437093993116, | |
| "learning_rate": 7.394898159309474e-06, | |
| "loss": 0.8432, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.37503199619702343, | |
| "grad_norm": 0.04129820957000115, | |
| "learning_rate": 7.393060795501346e-06, | |
| "loss": 0.8363, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.37561706951402346, | |
| "grad_norm": 0.04346950641443829, | |
| "learning_rate": 7.391220903145602e-06, | |
| "loss": 0.8008, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.3762021428310235, | |
| "grad_norm": 0.043967535702394124, | |
| "learning_rate": 7.389378483796589e-06, | |
| "loss": 0.8695, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.3767872161480236, | |
| "grad_norm": 0.037870985542036356, | |
| "learning_rate": 7.387533539010789e-06, | |
| "loss": 0.7874, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.3773722894650236, | |
| "grad_norm": 0.0394925511025262, | |
| "learning_rate": 7.385686070346818e-06, | |
| "loss": 0.8427, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.37795736278202363, | |
| "grad_norm": 0.03932506659821688, | |
| "learning_rate": 7.383836079365423e-06, | |
| "loss": 0.8773, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.37854243609902366, | |
| "grad_norm": 0.04174103140864924, | |
| "learning_rate": 7.381983567629482e-06, | |
| "loss": 0.8532, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.3791275094160237, | |
| "grad_norm": 0.03777546260763163, | |
| "learning_rate": 7.380128536704003e-06, | |
| "loss": 0.842, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.3797125827330237, | |
| "grad_norm": 0.038903962254009807, | |
| "learning_rate": 7.378270988156122e-06, | |
| "loss": 0.9141, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.38029765605002375, | |
| "grad_norm": 0.03931172416456551, | |
| "learning_rate": 7.376410923555104e-06, | |
| "loss": 0.8382, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.38088272936702383, | |
| "grad_norm": 0.03830193944287262, | |
| "learning_rate": 7.374548344472336e-06, | |
| "loss": 0.913, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.38146780268402386, | |
| "grad_norm": 0.05689973145188307, | |
| "learning_rate": 7.372683252481333e-06, | |
| "loss": 0.9233, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.3820528760010239, | |
| "grad_norm": 0.11927191175958904, | |
| "learning_rate": 7.370815649157728e-06, | |
| "loss": 0.8497, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.3826379493180239, | |
| "grad_norm": 0.04879685015775887, | |
| "learning_rate": 7.36894553607928e-06, | |
| "loss": 0.8902, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.38322302263502395, | |
| "grad_norm": 0.038545163591213454, | |
| "learning_rate": 7.3670729148258655e-06, | |
| "loss": 0.8101, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.383808095952024, | |
| "grad_norm": 0.07018595700302209, | |
| "learning_rate": 7.365197786979483e-06, | |
| "loss": 0.8732, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.384393169269024, | |
| "grad_norm": 0.03883816516578689, | |
| "learning_rate": 7.3633201541242465e-06, | |
| "loss": 0.9438, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.3849782425860241, | |
| "grad_norm": 0.045681207579253334, | |
| "learning_rate": 7.3614400178463834e-06, | |
| "loss": 0.8083, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.3855633159030241, | |
| "grad_norm": 0.03536545477300677, | |
| "learning_rate": 7.359557379734242e-06, | |
| "loss": 0.7559, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.38614838922002415, | |
| "grad_norm": 0.03989482911665265, | |
| "learning_rate": 7.357672241378282e-06, | |
| "loss": 0.8969, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.3867334625370242, | |
| "grad_norm": 0.03858611847647411, | |
| "learning_rate": 7.355784604371071e-06, | |
| "loss": 0.835, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.3873185358540242, | |
| "grad_norm": 0.04523699088789738, | |
| "learning_rate": 7.353894470307294e-06, | |
| "loss": 0.8641, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.38790360917102423, | |
| "grad_norm": 0.041661369856111206, | |
| "learning_rate": 7.352001840783741e-06, | |
| "loss": 0.834, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.38848868248802426, | |
| "grad_norm": 0.041538283343954825, | |
| "learning_rate": 7.3501067173993115e-06, | |
| "loss": 0.9114, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.3890737558050243, | |
| "grad_norm": 0.039602491788351755, | |
| "learning_rate": 7.348209101755012e-06, | |
| "loss": 0.8479, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.3896588291220244, | |
| "grad_norm": 0.04077290004316095, | |
| "learning_rate": 7.346308995453956e-06, | |
| "loss": 0.8511, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.3902439024390244, | |
| "grad_norm": 0.044595088605505394, | |
| "learning_rate": 7.344406400101358e-06, | |
| "loss": 0.8121, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.39082897575602443, | |
| "grad_norm": 0.04520053665259006, | |
| "learning_rate": 7.342501317304538e-06, | |
| "loss": 0.8916, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.39141404907302446, | |
| "grad_norm": 0.04048302305644283, | |
| "learning_rate": 7.340593748672915e-06, | |
| "loss": 0.9092, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.3919991223900245, | |
| "grad_norm": 0.04037233961111267, | |
| "learning_rate": 7.33868369581801e-06, | |
| "loss": 0.881, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.3925841957070245, | |
| "grad_norm": 0.05051075945293654, | |
| "learning_rate": 7.336771160353441e-06, | |
| "loss": 0.8109, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.39316926902402455, | |
| "grad_norm": 0.049551953529107955, | |
| "learning_rate": 7.334856143894927e-06, | |
| "loss": 0.9017, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.39375434234102463, | |
| "grad_norm": 0.03993075169550292, | |
| "learning_rate": 7.332938648060276e-06, | |
| "loss": 0.8542, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.39433941565802466, | |
| "grad_norm": 0.038451189858571745, | |
| "learning_rate": 7.331018674469396e-06, | |
| "loss": 0.8112, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.3949244889750247, | |
| "grad_norm": 0.0505499896286193, | |
| "learning_rate": 7.329096224744291e-06, | |
| "loss": 0.8813, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3955095622920247, | |
| "grad_norm": 0.071098260548373, | |
| "learning_rate": 7.3271713005090494e-06, | |
| "loss": 0.7823, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.39609463560902475, | |
| "grad_norm": 0.039554913835090445, | |
| "learning_rate": 7.325243903389853e-06, | |
| "loss": 0.8741, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.3966797089260248, | |
| "grad_norm": 0.03860100123135944, | |
| "learning_rate": 7.323314035014974e-06, | |
| "loss": 0.8042, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.3972647822430248, | |
| "grad_norm": 0.043534126452199624, | |
| "learning_rate": 7.321381697014771e-06, | |
| "loss": 0.8062, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.3978498555600249, | |
| "grad_norm": 0.04429511697906778, | |
| "learning_rate": 7.319446891021693e-06, | |
| "loss": 0.8726, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.3984349288770249, | |
| "grad_norm": 0.03812425577668118, | |
| "learning_rate": 7.317509618670267e-06, | |
| "loss": 0.8136, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.39902000219402495, | |
| "grad_norm": 0.07345134546444418, | |
| "learning_rate": 7.315569881597106e-06, | |
| "loss": 0.8483, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.399605075511025, | |
| "grad_norm": 0.038808207078641584, | |
| "learning_rate": 7.313627681440909e-06, | |
| "loss": 0.9122, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.400190148828025, | |
| "grad_norm": 0.03882381554371386, | |
| "learning_rate": 7.311683019842453e-06, | |
| "loss": 0.8767, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.40077522214502503, | |
| "grad_norm": 0.09532542796696841, | |
| "learning_rate": 7.309735898444593e-06, | |
| "loss": 0.817, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.40136029546202506, | |
| "grad_norm": 0.0401829342578114, | |
| "learning_rate": 7.307786318892265e-06, | |
| "loss": 0.9071, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.40194536877902515, | |
| "grad_norm": 0.03903463743661068, | |
| "learning_rate": 7.305834282832478e-06, | |
| "loss": 0.9161, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.4025304420960252, | |
| "grad_norm": 0.03813349508001005, | |
| "learning_rate": 7.303879791914321e-06, | |
| "loss": 0.7688, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.4031155154130252, | |
| "grad_norm": 0.03801357430065186, | |
| "learning_rate": 7.301922847788953e-06, | |
| "loss": 0.7473, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.40370058873002523, | |
| "grad_norm": 0.0391358430740653, | |
| "learning_rate": 7.299963452109607e-06, | |
| "loss": 0.84, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.40428566204702526, | |
| "grad_norm": 0.04237700699339236, | |
| "learning_rate": 7.298001606531588e-06, | |
| "loss": 0.7707, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.4048707353640253, | |
| "grad_norm": 0.04484671718594054, | |
| "learning_rate": 7.296037312712267e-06, | |
| "loss": 0.8456, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.4054558086810253, | |
| "grad_norm": 0.04025195813747376, | |
| "learning_rate": 7.2940705723110895e-06, | |
| "loss": 0.8882, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.4060408819980254, | |
| "grad_norm": 0.2399660293834239, | |
| "learning_rate": 7.292101386989561e-06, | |
| "loss": 0.8086, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.40662595531502543, | |
| "grad_norm": 0.03485270510298816, | |
| "learning_rate": 7.290129758411258e-06, | |
| "loss": 0.8997, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.40721102863202546, | |
| "grad_norm": 0.03819929313036679, | |
| "learning_rate": 7.288155688241819e-06, | |
| "loss": 0.9212, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.4077961019490255, | |
| "grad_norm": 0.03510738864060292, | |
| "learning_rate": 7.286179178148942e-06, | |
| "loss": 0.8754, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.4083811752660255, | |
| "grad_norm": 0.043632341643109356, | |
| "learning_rate": 7.284200229802391e-06, | |
| "loss": 0.7717, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.40896624858302555, | |
| "grad_norm": 0.04089651755929757, | |
| "learning_rate": 7.28221884487399e-06, | |
| "loss": 0.88, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.4095513219000256, | |
| "grad_norm": 0.037780685942199126, | |
| "learning_rate": 7.280235025037616e-06, | |
| "loss": 0.8197, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4101363952170256, | |
| "grad_norm": 0.03655073966270369, | |
| "learning_rate": 7.27824877196921e-06, | |
| "loss": 0.8279, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.4107214685340257, | |
| "grad_norm": 0.05600910511927575, | |
| "learning_rate": 7.2762600873467624e-06, | |
| "loss": 0.8476, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.4113065418510257, | |
| "grad_norm": 0.034951743393564536, | |
| "learning_rate": 7.274268972850321e-06, | |
| "loss": 0.7609, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.41189161516802575, | |
| "grad_norm": 0.09106547558232417, | |
| "learning_rate": 7.272275430161988e-06, | |
| "loss": 0.8996, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.4124766884850258, | |
| "grad_norm": 0.051359026093647085, | |
| "learning_rate": 7.270279460965912e-06, | |
| "loss": 0.9052, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.4130617618020258, | |
| "grad_norm": 0.03905089647983441, | |
| "learning_rate": 7.268281066948296e-06, | |
| "loss": 0.918, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.41364683511902584, | |
| "grad_norm": 0.04206925775516373, | |
| "learning_rate": 7.2662802497973875e-06, | |
| "loss": 0.8053, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.41423190843602586, | |
| "grad_norm": 0.040474218310822804, | |
| "learning_rate": 7.264277011203488e-06, | |
| "loss": 0.891, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.41481698175302595, | |
| "grad_norm": 0.0442335071782456, | |
| "learning_rate": 7.262271352858936e-06, | |
| "loss": 0.8593, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.415402055070026, | |
| "grad_norm": 0.047122148951545366, | |
| "learning_rate": 7.26026327645812e-06, | |
| "loss": 0.844, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.415987128387026, | |
| "grad_norm": 0.03850054120717149, | |
| "learning_rate": 7.258252783697469e-06, | |
| "loss": 0.7795, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.41657220170402603, | |
| "grad_norm": 0.050566843036294336, | |
| "learning_rate": 7.2562398762754554e-06, | |
| "loss": 0.7794, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.41715727502102606, | |
| "grad_norm": 0.058106581796564256, | |
| "learning_rate": 7.254224555892587e-06, | |
| "loss": 0.9735, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.4177423483380261, | |
| "grad_norm": 0.03831983478639148, | |
| "learning_rate": 7.252206824251416e-06, | |
| "loss": 0.7832, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.4183274216550261, | |
| "grad_norm": 0.0431158884889512, | |
| "learning_rate": 7.250186683056527e-06, | |
| "loss": 0.7865, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.4189124949720262, | |
| "grad_norm": 0.046081855255802225, | |
| "learning_rate": 7.248164134014544e-06, | |
| "loss": 0.8512, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.41949756828902623, | |
| "grad_norm": 0.04502443182272067, | |
| "learning_rate": 7.246139178834119e-06, | |
| "loss": 0.853, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.42008264160602626, | |
| "grad_norm": 0.045461255479903595, | |
| "learning_rate": 7.244111819225946e-06, | |
| "loss": 0.8262, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.4206677149230263, | |
| "grad_norm": 0.038558457568846585, | |
| "learning_rate": 7.24208205690274e-06, | |
| "loss": 0.7975, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.4212527882400263, | |
| "grad_norm": 0.04654917549136812, | |
| "learning_rate": 7.240049893579256e-06, | |
| "loss": 0.8849, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.42183786155702635, | |
| "grad_norm": 0.04307894480353439, | |
| "learning_rate": 7.238015330972268e-06, | |
| "loss": 0.805, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.4224229348740264, | |
| "grad_norm": 0.04281636756195477, | |
| "learning_rate": 7.235978370800583e-06, | |
| "loss": 0.8471, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.42300800819102646, | |
| "grad_norm": 0.04507551309506065, | |
| "learning_rate": 7.233939014785032e-06, | |
| "loss": 0.8468, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.4235930815080265, | |
| "grad_norm": 0.06918610285412721, | |
| "learning_rate": 7.2318972646484685e-06, | |
| "loss": 0.8655, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.4241781548250265, | |
| "grad_norm": 0.04721791843997281, | |
| "learning_rate": 7.229853122115772e-06, | |
| "loss": 0.7927, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.42476322814202655, | |
| "grad_norm": 0.041834283551617495, | |
| "learning_rate": 7.227806588913838e-06, | |
| "loss": 0.8712, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.4253483014590266, | |
| "grad_norm": 0.05505118490346748, | |
| "learning_rate": 7.225757666771585e-06, | |
| "loss": 0.8584, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.4259333747760266, | |
| "grad_norm": 0.047773194756069906, | |
| "learning_rate": 7.223706357419951e-06, | |
| "loss": 0.7893, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.42651844809302664, | |
| "grad_norm": 0.04822106540139186, | |
| "learning_rate": 7.221652662591887e-06, | |
| "loss": 0.8277, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.4271035214100267, | |
| "grad_norm": 0.03969595772236796, | |
| "learning_rate": 7.219596584022363e-06, | |
| "loss": 0.8394, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.42768859472702675, | |
| "grad_norm": 0.0422480401855194, | |
| "learning_rate": 7.217538123448359e-06, | |
| "loss": 0.8094, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.4282736680440268, | |
| "grad_norm": 0.0394391284081235, | |
| "learning_rate": 7.215477282608871e-06, | |
| "loss": 0.8847, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.4288587413610268, | |
| "grad_norm": 0.05956332380443304, | |
| "learning_rate": 7.213414063244903e-06, | |
| "loss": 0.8427, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.42944381467802684, | |
| "grad_norm": 0.03974032496622027, | |
| "learning_rate": 7.21134846709947e-06, | |
| "loss": 0.7986, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.43002888799502687, | |
| "grad_norm": 0.061979287274211155, | |
| "learning_rate": 7.209280495917594e-06, | |
| "loss": 0.8178, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.4306139613120269, | |
| "grad_norm": 0.04099124105139921, | |
| "learning_rate": 7.2072101514463045e-06, | |
| "loss": 0.7936, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.4311990346290269, | |
| "grad_norm": 0.04101203671080088, | |
| "learning_rate": 7.205137435434634e-06, | |
| "loss": 0.8607, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.431784107946027, | |
| "grad_norm": 0.04063679329152861, | |
| "learning_rate": 7.203062349633622e-06, | |
| "loss": 0.8066, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.43236918126302704, | |
| "grad_norm": 0.045567608816787585, | |
| "learning_rate": 7.200984895796305e-06, | |
| "loss": 0.8558, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.43295425458002706, | |
| "grad_norm": 0.0514210080543997, | |
| "learning_rate": 7.198905075677726e-06, | |
| "loss": 0.7855, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.4335393278970271, | |
| "grad_norm": 0.0404180379498117, | |
| "learning_rate": 7.196822891034922e-06, | |
| "loss": 0.9028, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.4341244012140271, | |
| "grad_norm": 0.03964126441343688, | |
| "learning_rate": 7.1947383436269295e-06, | |
| "loss": 0.872, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.43470947453102715, | |
| "grad_norm": 0.04863222203139086, | |
| "learning_rate": 7.192651435214781e-06, | |
| "loss": 0.9288, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.4352945478480272, | |
| "grad_norm": 0.04412474170590896, | |
| "learning_rate": 7.190562167561505e-06, | |
| "loss": 0.836, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.43587962116502726, | |
| "grad_norm": 0.16048273545352948, | |
| "learning_rate": 7.188470542432119e-06, | |
| "loss": 0.7639, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.4364646944820273, | |
| "grad_norm": 0.04767484164607536, | |
| "learning_rate": 7.1863765615936375e-06, | |
| "loss": 0.8481, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.4370497677990273, | |
| "grad_norm": 0.06989126022408862, | |
| "learning_rate": 7.184280226815061e-06, | |
| "loss": 0.8569, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.43763484111602735, | |
| "grad_norm": 0.041900678395983416, | |
| "learning_rate": 7.18218153986738e-06, | |
| "loss": 0.931, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.4382199144330274, | |
| "grad_norm": 0.04216846873986442, | |
| "learning_rate": 7.180080502523572e-06, | |
| "loss": 0.8646, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.4388049877500274, | |
| "grad_norm": 0.050264349955050885, | |
| "learning_rate": 7.177977116558601e-06, | |
| "loss": 0.8199, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.43939006106702744, | |
| "grad_norm": 0.03843848162959958, | |
| "learning_rate": 7.175871383749415e-06, | |
| "loss": 0.8097, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.4399751343840275, | |
| "grad_norm": 0.05545552486681674, | |
| "learning_rate": 7.173763305874942e-06, | |
| "loss": 0.9036, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.44056020770102755, | |
| "grad_norm": 0.040372444436672136, | |
| "learning_rate": 7.1716528847160944e-06, | |
| "loss": 0.7861, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.4411452810180276, | |
| "grad_norm": 0.03823936143241982, | |
| "learning_rate": 7.169540122055764e-06, | |
| "loss": 0.7976, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.4417303543350276, | |
| "grad_norm": 0.040595268808395715, | |
| "learning_rate": 7.167425019678817e-06, | |
| "loss": 0.8007, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.44231542765202764, | |
| "grad_norm": 0.04580612084615541, | |
| "learning_rate": 7.1653075793721e-06, | |
| "loss": 0.741, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.44290050096902767, | |
| "grad_norm": 0.044516769340694984, | |
| "learning_rate": 7.163187802924435e-06, | |
| "loss": 0.911, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.4434855742860277, | |
| "grad_norm": 0.04502988747564978, | |
| "learning_rate": 7.161065692126614e-06, | |
| "loss": 0.8775, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.4440706476030278, | |
| "grad_norm": 0.041288645539491076, | |
| "learning_rate": 7.1589412487714055e-06, | |
| "loss": 0.8393, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.4446557209200278, | |
| "grad_norm": 0.04219916867709284, | |
| "learning_rate": 7.156814474653542e-06, | |
| "loss": 0.8203, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.44524079423702784, | |
| "grad_norm": 0.04510176879579754, | |
| "learning_rate": 7.154685371569736e-06, | |
| "loss": 0.7882, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.44582586755402787, | |
| "grad_norm": 0.03956297558604167, | |
| "learning_rate": 7.152553941318655e-06, | |
| "loss": 0.8313, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.4464109408710279, | |
| "grad_norm": 0.03781666670962381, | |
| "learning_rate": 7.15042018570094e-06, | |
| "loss": 0.9057, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.4469960141880279, | |
| "grad_norm": 0.04278185971588543, | |
| "learning_rate": 7.148284106519195e-06, | |
| "loss": 0.842, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.44758108750502795, | |
| "grad_norm": 0.03726277158845805, | |
| "learning_rate": 7.1461457055779875e-06, | |
| "loss": 0.8003, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.44816616082202804, | |
| "grad_norm": 0.044396475499465786, | |
| "learning_rate": 7.144004984683844e-06, | |
| "loss": 0.8393, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.44875123413902807, | |
| "grad_norm": 0.03484609580213948, | |
| "learning_rate": 7.141861945645254e-06, | |
| "loss": 0.8255, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.4493363074560281, | |
| "grad_norm": 0.04524092857108677, | |
| "learning_rate": 7.139716590272663e-06, | |
| "loss": 0.7811, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.4499213807730281, | |
| "grad_norm": 0.03734702819768394, | |
| "learning_rate": 7.1375689203784755e-06, | |
| "loss": 0.8644, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.45050645409002815, | |
| "grad_norm": 0.05019144085295421, | |
| "learning_rate": 7.135418937777049e-06, | |
| "loss": 0.9044, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.4510915274070282, | |
| "grad_norm": 0.04029746203545264, | |
| "learning_rate": 7.133266644284696e-06, | |
| "loss": 0.7769, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.4516766007240282, | |
| "grad_norm": 0.04262838461849833, | |
| "learning_rate": 7.131112041719681e-06, | |
| "loss": 0.9143, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.45226167404102824, | |
| "grad_norm": 0.043876149904780545, | |
| "learning_rate": 7.1289551319022195e-06, | |
| "loss": 0.8828, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.4528467473580283, | |
| "grad_norm": 0.04711071987260168, | |
| "learning_rate": 7.126795916654477e-06, | |
| "loss": 0.7762, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.45343182067502835, | |
| "grad_norm": 0.04116549252293238, | |
| "learning_rate": 7.124634397800565e-06, | |
| "loss": 0.7778, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4540168939920284, | |
| "grad_norm": 0.0393006386390472, | |
| "learning_rate": 7.1224705771665405e-06, | |
| "loss": 0.8465, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.4546019673090284, | |
| "grad_norm": 0.03826784736833335, | |
| "learning_rate": 7.120304456580408e-06, | |
| "loss": 0.8359, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.45518704062602844, | |
| "grad_norm": 0.04985454473324124, | |
| "learning_rate": 7.118136037872112e-06, | |
| "loss": 0.8552, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.45577211394302847, | |
| "grad_norm": 0.04012812213905606, | |
| "learning_rate": 7.115965322873541e-06, | |
| "loss": 0.8249, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.4563571872600285, | |
| "grad_norm": 0.03736935441616661, | |
| "learning_rate": 7.113792313418522e-06, | |
| "loss": 0.8399, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.4569422605770286, | |
| "grad_norm": 0.036669437710784104, | |
| "learning_rate": 7.1116170113428194e-06, | |
| "loss": 0.79, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.4575273338940286, | |
| "grad_norm": 0.03882415136861196, | |
| "learning_rate": 7.109439418484137e-06, | |
| "loss": 0.8016, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.45811240721102864, | |
| "grad_norm": 0.0452764493144211, | |
| "learning_rate": 7.107259536682111e-06, | |
| "loss": 0.8138, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.45869748052802867, | |
| "grad_norm": 0.04729394522582173, | |
| "learning_rate": 7.105077367778313e-06, | |
| "loss": 0.7596, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.4592825538450287, | |
| "grad_norm": 0.05019146434388651, | |
| "learning_rate": 7.102892913616248e-06, | |
| "loss": 0.8015, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.4598676271620287, | |
| "grad_norm": 0.039099244411788574, | |
| "learning_rate": 7.100706176041348e-06, | |
| "loss": 0.8098, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.46045270047902875, | |
| "grad_norm": 0.03895433813179543, | |
| "learning_rate": 7.098517156900978e-06, | |
| "loss": 0.8851, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.46103777379602884, | |
| "grad_norm": 0.03731969942911145, | |
| "learning_rate": 7.096325858044427e-06, | |
| "loss": 0.7721, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.46162284711302887, | |
| "grad_norm": 0.04097182225913861, | |
| "learning_rate": 7.094132281322912e-06, | |
| "loss": 0.8223, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.4622079204300289, | |
| "grad_norm": 0.03696891399238777, | |
| "learning_rate": 7.091936428589576e-06, | |
| "loss": 0.8938, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.4627929937470289, | |
| "grad_norm": 0.03792585935917287, | |
| "learning_rate": 7.089738301699479e-06, | |
| "loss": 0.8393, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.46337806706402895, | |
| "grad_norm": 0.10218168425909542, | |
| "learning_rate": 7.087537902509607e-06, | |
| "loss": 0.8016, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.463963140381029, | |
| "grad_norm": 0.03989943757222258, | |
| "learning_rate": 7.085335232878865e-06, | |
| "loss": 0.7431, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.464548213698029, | |
| "grad_norm": 0.03900627784304605, | |
| "learning_rate": 7.083130294668076e-06, | |
| "loss": 0.8431, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.4651332870150291, | |
| "grad_norm": 0.05897148270799823, | |
| "learning_rate": 7.080923089739978e-06, | |
| "loss": 0.83, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.4657183603320291, | |
| "grad_norm": 0.03888063452127548, | |
| "learning_rate": 7.078713619959228e-06, | |
| "loss": 0.7915, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.46630343364902915, | |
| "grad_norm": 0.04604225990759911, | |
| "learning_rate": 7.076501887192387e-06, | |
| "loss": 0.8419, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.4668885069660292, | |
| "grad_norm": 0.040295564919243125, | |
| "learning_rate": 7.074287893307941e-06, | |
| "loss": 0.8764, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.4674735802830292, | |
| "grad_norm": 0.03769433658091701, | |
| "learning_rate": 7.072071640176274e-06, | |
| "loss": 0.8566, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.46805865360002924, | |
| "grad_norm": 0.03728814454795478, | |
| "learning_rate": 7.069853129669688e-06, | |
| "loss": 0.771, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.46864372691702927, | |
| "grad_norm": 0.03547834831253728, | |
| "learning_rate": 7.067632363662386e-06, | |
| "loss": 0.7874, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.46922880023402935, | |
| "grad_norm": 0.04116629646593246, | |
| "learning_rate": 7.065409344030479e-06, | |
| "loss": 0.8579, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.4698138735510294, | |
| "grad_norm": 0.04284181743182457, | |
| "learning_rate": 7.063184072651981e-06, | |
| "loss": 0.874, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.4703989468680294, | |
| "grad_norm": 0.5453076047222728, | |
| "learning_rate": 7.060956551406807e-06, | |
| "loss": 0.8275, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.47098402018502944, | |
| "grad_norm": 0.04285257240888046, | |
| "learning_rate": 7.058726782176778e-06, | |
| "loss": 0.8748, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.47156909350202947, | |
| "grad_norm": 0.0426593418584649, | |
| "learning_rate": 7.056494766845606e-06, | |
| "loss": 0.805, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.4721541668190295, | |
| "grad_norm": 0.03749564999202563, | |
| "learning_rate": 7.05426050729891e-06, | |
| "loss": 0.8289, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.4727392401360295, | |
| "grad_norm": 0.044556182570763894, | |
| "learning_rate": 7.052024005424194e-06, | |
| "loss": 0.8757, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.4733243134530296, | |
| "grad_norm": 0.04281051498943111, | |
| "learning_rate": 7.049785263110867e-06, | |
| "loss": 0.8838, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.47390938677002964, | |
| "grad_norm": 0.04188599336409731, | |
| "learning_rate": 7.047544282250223e-06, | |
| "loss": 0.8371, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.47449446008702967, | |
| "grad_norm": 0.04079367612711138, | |
| "learning_rate": 7.045301064735451e-06, | |
| "loss": 0.8383, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.4750795334040297, | |
| "grad_norm": 0.037411483434431944, | |
| "learning_rate": 7.0430556124616294e-06, | |
| "loss": 0.7866, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.4756646067210297, | |
| "grad_norm": 0.0497395887833391, | |
| "learning_rate": 7.040807927325723e-06, | |
| "loss": 0.836, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.47624968003802975, | |
| "grad_norm": 0.039340018189295584, | |
| "learning_rate": 7.038558011226583e-06, | |
| "loss": 0.7925, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.4768347533550298, | |
| "grad_norm": 0.04067266509967937, | |
| "learning_rate": 7.036305866064947e-06, | |
| "loss": 0.8246, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.4774198266720298, | |
| "grad_norm": 0.03756080114515082, | |
| "learning_rate": 7.0340514937434334e-06, | |
| "loss": 0.8091, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.4780048999890299, | |
| "grad_norm": 0.04009793142877897, | |
| "learning_rate": 7.031794896166544e-06, | |
| "loss": 0.8367, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.4785899733060299, | |
| "grad_norm": 0.045719600380512315, | |
| "learning_rate": 7.029536075240659e-06, | |
| "loss": 0.8698, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.47917504662302995, | |
| "grad_norm": 0.06146708297478866, | |
| "learning_rate": 7.0272750328740394e-06, | |
| "loss": 0.7769, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.47976011994003, | |
| "grad_norm": 0.0379549936030894, | |
| "learning_rate": 7.025011770976821e-06, | |
| "loss": 0.8307, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.48034519325703, | |
| "grad_norm": 0.046629898254568806, | |
| "learning_rate": 7.022746291461013e-06, | |
| "loss": 0.8296, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.48093026657403004, | |
| "grad_norm": 0.04903726722961758, | |
| "learning_rate": 7.020478596240503e-06, | |
| "loss": 0.8578, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.48151533989103007, | |
| "grad_norm": 0.044482172504589565, | |
| "learning_rate": 7.018208687231045e-06, | |
| "loss": 0.8339, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.48210041320803015, | |
| "grad_norm": 0.04652606756149258, | |
| "learning_rate": 7.015936566350267e-06, | |
| "loss": 0.8629, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.4826854865250302, | |
| "grad_norm": 0.03952788329765694, | |
| "learning_rate": 7.013662235517661e-06, | |
| "loss": 0.8851, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.4832705598420302, | |
| "grad_norm": 0.04737073432575187, | |
| "learning_rate": 7.011385696654594e-06, | |
| "loss": 0.8662, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.48385563315903024, | |
| "grad_norm": 0.03751881453871839, | |
| "learning_rate": 7.0091069516842915e-06, | |
| "loss": 0.8559, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.48444070647603027, | |
| "grad_norm": 0.03716116535695275, | |
| "learning_rate": 7.006826002531843e-06, | |
| "loss": 0.7718, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.4850257797930303, | |
| "grad_norm": 0.056266031939443406, | |
| "learning_rate": 7.004542851124203e-06, | |
| "loss": 0.7714, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.4856108531100303, | |
| "grad_norm": 0.05535556673447491, | |
| "learning_rate": 7.0022574993901865e-06, | |
| "loss": 0.8676, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.4861959264270304, | |
| "grad_norm": 0.03845486724104416, | |
| "learning_rate": 6.999969949260464e-06, | |
| "loss": 0.7885, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.48678099974403044, | |
| "grad_norm": 0.04062056962499875, | |
| "learning_rate": 6.99768020266757e-06, | |
| "loss": 0.8322, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.48736607306103047, | |
| "grad_norm": 0.06424079693979534, | |
| "learning_rate": 6.995388261545884e-06, | |
| "loss": 0.905, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.4879511463780305, | |
| "grad_norm": 0.04236693639090112, | |
| "learning_rate": 6.993094127831649e-06, | |
| "loss": 0.8726, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.4885362196950305, | |
| "grad_norm": 0.038818701632739046, | |
| "learning_rate": 6.990797803462955e-06, | |
| "loss": 0.7753, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.48912129301203056, | |
| "grad_norm": 0.05513690717963214, | |
| "learning_rate": 6.988499290379746e-06, | |
| "loss": 0.888, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.4897063663290306, | |
| "grad_norm": 0.040849030445044794, | |
| "learning_rate": 6.986198590523812e-06, | |
| "loss": 0.8466, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.49029143964603067, | |
| "grad_norm": 0.040919168417802985, | |
| "learning_rate": 6.983895705838793e-06, | |
| "loss": 0.8589, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.4908765129630307, | |
| "grad_norm": 0.038496795188997325, | |
| "learning_rate": 6.9815906382701725e-06, | |
| "loss": 0.862, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.4914615862800307, | |
| "grad_norm": 0.043613843698726576, | |
| "learning_rate": 6.97928338976528e-06, | |
| "loss": 0.8702, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.49204665959703076, | |
| "grad_norm": 0.03941143092937186, | |
| "learning_rate": 6.9769739622732855e-06, | |
| "loss": 0.7831, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.4926317329140308, | |
| "grad_norm": 0.06878143755782574, | |
| "learning_rate": 6.974662357745203e-06, | |
| "loss": 0.9225, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.4932168062310308, | |
| "grad_norm": 0.05850718491585764, | |
| "learning_rate": 6.972348578133881e-06, | |
| "loss": 0.8781, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.49380187954803084, | |
| "grad_norm": 0.039118395131995005, | |
| "learning_rate": 6.9700326253940095e-06, | |
| "loss": 0.7985, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.4943869528650309, | |
| "grad_norm": 0.057835827757031166, | |
| "learning_rate": 6.967714501482114e-06, | |
| "loss": 0.781, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.49497202618203096, | |
| "grad_norm": 0.05588852958063585, | |
| "learning_rate": 6.965394208356551e-06, | |
| "loss": 0.8423, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.495557099499031, | |
| "grad_norm": 0.039374530415438855, | |
| "learning_rate": 6.9630717479775145e-06, | |
| "loss": 0.8456, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.496142172816031, | |
| "grad_norm": 0.0358193083222334, | |
| "learning_rate": 6.960747122307025e-06, | |
| "loss": 0.7992, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.49672724613303104, | |
| "grad_norm": 0.03788661691723569, | |
| "learning_rate": 6.9584203333089325e-06, | |
| "loss": 0.8037, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.49731231945003107, | |
| "grad_norm": 0.04201649774003793, | |
| "learning_rate": 6.956091382948918e-06, | |
| "loss": 0.8882, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4978973927670311, | |
| "grad_norm": 0.04434150316820075, | |
| "learning_rate": 6.953760273194487e-06, | |
| "loss": 0.8166, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.49848246608403113, | |
| "grad_norm": 0.07426388002691353, | |
| "learning_rate": 6.951427006014967e-06, | |
| "loss": 0.8424, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.4990675394010312, | |
| "grad_norm": 0.037544290713932796, | |
| "learning_rate": 6.949091583381511e-06, | |
| "loss": 0.7957, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.49965261271803124, | |
| "grad_norm": 0.05489041840699949, | |
| "learning_rate": 6.946754007267091e-06, | |
| "loss": 0.9257, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.5002376860350313, | |
| "grad_norm": 0.042595883415080985, | |
| "learning_rate": 6.944414279646499e-06, | |
| "loss": 0.8277, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5008227593520314, | |
| "grad_norm": 0.08019312917038988, | |
| "learning_rate": 6.942072402496345e-06, | |
| "loss": 0.9115, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.5014078326690313, | |
| "grad_norm": 0.05086333614690917, | |
| "learning_rate": 6.9397283777950545e-06, | |
| "loss": 0.9041, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.5019929059860314, | |
| "grad_norm": 0.04214362484163065, | |
| "learning_rate": 6.937382207522867e-06, | |
| "loss": 0.8299, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.5025779793030314, | |
| "grad_norm": 0.04083088848742396, | |
| "learning_rate": 6.935033893661835e-06, | |
| "loss": 0.8356, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.5031630526200315, | |
| "grad_norm": 0.04687863044141472, | |
| "learning_rate": 6.932683438195821e-06, | |
| "loss": 0.865, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5037481259370314, | |
| "grad_norm": 0.04280142790393131, | |
| "learning_rate": 6.9303308431105e-06, | |
| "loss": 0.8862, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.5043331992540315, | |
| "grad_norm": 0.04180226845557804, | |
| "learning_rate": 6.92797611039335e-06, | |
| "loss": 0.8046, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.5049182725710316, | |
| "grad_norm": 0.035262278854158474, | |
| "learning_rate": 6.925619242033656e-06, | |
| "loss": 0.8197, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.5055033458880316, | |
| "grad_norm": 0.03754079468139451, | |
| "learning_rate": 6.92326024002251e-06, | |
| "loss": 0.8101, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.5060884192050317, | |
| "grad_norm": 0.039581024469053656, | |
| "learning_rate": 6.9208991063528045e-06, | |
| "loss": 0.8607, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.5066734925220316, | |
| "grad_norm": 0.0371798589671245, | |
| "learning_rate": 6.918535843019233e-06, | |
| "loss": 0.8102, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.5072585658390317, | |
| "grad_norm": 0.037768636271352095, | |
| "learning_rate": 6.916170452018288e-06, | |
| "loss": 0.8418, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.5078436391560317, | |
| "grad_norm": 0.10093759593186688, | |
| "learning_rate": 6.913802935348258e-06, | |
| "loss": 0.8629, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.5084287124730318, | |
| "grad_norm": 0.03663052824269648, | |
| "learning_rate": 6.911433295009232e-06, | |
| "loss": 0.8162, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.5090137857900318, | |
| "grad_norm": 0.04855772810380649, | |
| "learning_rate": 6.909061533003088e-06, | |
| "loss": 0.8616, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.5095988591070318, | |
| "grad_norm": 0.043219673612599416, | |
| "learning_rate": 6.906687651333498e-06, | |
| "loss": 0.7216, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.5101839324240319, | |
| "grad_norm": 0.042711861232268646, | |
| "learning_rate": 6.904311652005925e-06, | |
| "loss": 0.7547, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.5107690057410319, | |
| "grad_norm": 0.05574403716352006, | |
| "learning_rate": 6.9019335370276225e-06, | |
| "loss": 0.795, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.511354079058032, | |
| "grad_norm": 0.06264056674635722, | |
| "learning_rate": 6.899553308407629e-06, | |
| "loss": 0.85, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.511939152375032, | |
| "grad_norm": 0.04875354726056701, | |
| "learning_rate": 6.89717096815677e-06, | |
| "loss": 0.8407, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.512524225692032, | |
| "grad_norm": 0.038870649597744084, | |
| "learning_rate": 6.894786518287653e-06, | |
| "loss": 0.8076, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.513109299009032, | |
| "grad_norm": 0.05118848362688543, | |
| "learning_rate": 6.8923999608146705e-06, | |
| "loss": 0.8363, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.5136943723260321, | |
| "grad_norm": 0.0376112846207315, | |
| "learning_rate": 6.890011297753994e-06, | |
| "loss": 0.7743, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.5142794456430322, | |
| "grad_norm": 0.06896727299920731, | |
| "learning_rate": 6.887620531123574e-06, | |
| "loss": 0.8359, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.5148645189600322, | |
| "grad_norm": 0.03963337869499397, | |
| "learning_rate": 6.885227662943136e-06, | |
| "loss": 0.8206, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.5154495922770322, | |
| "grad_norm": 0.04870453554962347, | |
| "learning_rate": 6.882832695234186e-06, | |
| "loss": 0.8189, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.5160346655940322, | |
| "grad_norm": 0.03835351064840454, | |
| "learning_rate": 6.880435630019998e-06, | |
| "loss": 0.8973, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.5166197389110323, | |
| "grad_norm": 0.058777196577975345, | |
| "learning_rate": 6.8780364693256224e-06, | |
| "loss": 0.8733, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.5172048122280323, | |
| "grad_norm": 0.04023477251259275, | |
| "learning_rate": 6.875635215177878e-06, | |
| "loss": 0.7674, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.5177898855450324, | |
| "grad_norm": 0.10180465827282513, | |
| "learning_rate": 6.873231869605351e-06, | |
| "loss": 0.8238, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.5183749588620324, | |
| "grad_norm": 0.03697008145978301, | |
| "learning_rate": 6.870826434638396e-06, | |
| "loss": 0.8113, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.5189600321790324, | |
| "grad_norm": 0.03765250989596081, | |
| "learning_rate": 6.868418912309133e-06, | |
| "loss": 0.8409, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.5195451054960325, | |
| "grad_norm": 0.03863967736287724, | |
| "learning_rate": 6.866009304651444e-06, | |
| "loss": 0.7622, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.5201301788130325, | |
| "grad_norm": 0.04809769182496589, | |
| "learning_rate": 6.8635976137009735e-06, | |
| "loss": 0.838, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.5207152521300326, | |
| "grad_norm": 0.04589536972341525, | |
| "learning_rate": 6.861183841495127e-06, | |
| "loss": 0.9291, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.5213003254470325, | |
| "grad_norm": 0.04495994452664241, | |
| "learning_rate": 6.858767990073066e-06, | |
| "loss": 0.9015, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.5218853987640326, | |
| "grad_norm": 0.03736417738835852, | |
| "learning_rate": 6.856350061475712e-06, | |
| "loss": 0.7575, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.5224704720810327, | |
| "grad_norm": 0.039650470820273634, | |
| "learning_rate": 6.853930057745735e-06, | |
| "loss": 0.7939, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.5230555453980327, | |
| "grad_norm": 0.04072420592259055, | |
| "learning_rate": 6.8515079809275656e-06, | |
| "loss": 0.8026, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.5236406187150328, | |
| "grad_norm": 0.04022954757166665, | |
| "learning_rate": 6.849083833067381e-06, | |
| "loss": 0.7891, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.5242256920320327, | |
| "grad_norm": 0.04135409431135059, | |
| "learning_rate": 6.846657616213109e-06, | |
| "loss": 0.758, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.5248107653490328, | |
| "grad_norm": 0.03960521359539724, | |
| "learning_rate": 6.844229332414427e-06, | |
| "loss": 0.8038, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.5253958386660328, | |
| "grad_norm": 0.03765896158372491, | |
| "learning_rate": 6.841798983722755e-06, | |
| "loss": 0.7877, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.5259809119830329, | |
| "grad_norm": 0.058839179066597226, | |
| "learning_rate": 6.839366572191262e-06, | |
| "loss": 0.7523, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.526565985300033, | |
| "grad_norm": 0.040054239033310936, | |
| "learning_rate": 6.836932099874856e-06, | |
| "loss": 0.8321, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5271510586170329, | |
| "grad_norm": 0.0627577307413516, | |
| "learning_rate": 6.834495568830187e-06, | |
| "loss": 0.8389, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.527736131934033, | |
| "grad_norm": 0.036542541933322875, | |
| "learning_rate": 6.832056981115644e-06, | |
| "loss": 0.8108, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.528321205251033, | |
| "grad_norm": 0.039969851332150676, | |
| "learning_rate": 6.8296163387913545e-06, | |
| "loss": 0.8144, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.5289062785680331, | |
| "grad_norm": 0.04091122066442238, | |
| "learning_rate": 6.827173643919181e-06, | |
| "loss": 0.8865, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.529491351885033, | |
| "grad_norm": 0.04077179519179353, | |
| "learning_rate": 6.824728898562721e-06, | |
| "loss": 0.8353, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.5300764252020331, | |
| "grad_norm": 0.045472282387591885, | |
| "learning_rate": 6.822282104787305e-06, | |
| "loss": 0.8175, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.5306614985190332, | |
| "grad_norm": 0.03919660004429457, | |
| "learning_rate": 6.819833264659988e-06, | |
| "loss": 0.7968, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.5312465718360332, | |
| "grad_norm": 0.0550475630156648, | |
| "learning_rate": 6.81738238024956e-06, | |
| "loss": 0.8005, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.5318316451530333, | |
| "grad_norm": 0.04044516211789697, | |
| "learning_rate": 6.814929453626538e-06, | |
| "loss": 0.8056, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.5324167184700332, | |
| "grad_norm": 0.04512920702041598, | |
| "learning_rate": 6.81247448686316e-06, | |
| "loss": 0.8061, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.5330017917870333, | |
| "grad_norm": 0.06440782132528662, | |
| "learning_rate": 6.810017482033392e-06, | |
| "loss": 0.8471, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.5335868651040333, | |
| "grad_norm": 0.03695317579916004, | |
| "learning_rate": 6.8075584412129205e-06, | |
| "loss": 0.8222, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.5341719384210334, | |
| "grad_norm": 0.03666660940321042, | |
| "learning_rate": 6.805097366479148e-06, | |
| "loss": 0.7822, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.5347570117380335, | |
| "grad_norm": 0.055445109178676184, | |
| "learning_rate": 6.802634259911201e-06, | |
| "loss": 0.844, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.5353420850550334, | |
| "grad_norm": 0.05279543181613761, | |
| "learning_rate": 6.800169123589919e-06, | |
| "loss": 0.7463, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.5359271583720335, | |
| "grad_norm": 0.041859624286024466, | |
| "learning_rate": 6.797701959597859e-06, | |
| "loss": 0.8604, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.5365122316890335, | |
| "grad_norm": 0.0383422427552055, | |
| "learning_rate": 6.795232770019286e-06, | |
| "loss": 0.7703, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.5370973050060336, | |
| "grad_norm": 0.043028959290976825, | |
| "learning_rate": 6.7927615569401815e-06, | |
| "loss": 0.8212, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.5376823783230336, | |
| "grad_norm": 0.03804962859282499, | |
| "learning_rate": 6.790288322448235e-06, | |
| "loss": 0.8366, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.5382674516400336, | |
| "grad_norm": 0.039087833943377275, | |
| "learning_rate": 6.787813068632843e-06, | |
| "loss": 0.7831, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.5388525249570337, | |
| "grad_norm": 0.04141823524439823, | |
| "learning_rate": 6.785335797585107e-06, | |
| "loss": 0.8828, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.5394375982740337, | |
| "grad_norm": 0.05007681264530146, | |
| "learning_rate": 6.782856511397835e-06, | |
| "loss": 0.8005, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.5400226715910338, | |
| "grad_norm": 0.0358413269090128, | |
| "learning_rate": 6.780375212165535e-06, | |
| "loss": 0.8488, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.5406077449080338, | |
| "grad_norm": 0.039662947137159635, | |
| "learning_rate": 6.777891901984417e-06, | |
| "loss": 0.8269, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.5411928182250338, | |
| "grad_norm": 0.03708918191821332, | |
| "learning_rate": 6.775406582952389e-06, | |
| "loss": 0.8561, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5417778915420338, | |
| "grad_norm": 0.0512337126087228, | |
| "learning_rate": 6.772919257169059e-06, | |
| "loss": 0.8225, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.5423629648590339, | |
| "grad_norm": 0.15259566476683822, | |
| "learning_rate": 6.770429926735727e-06, | |
| "loss": 0.7892, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.542948038176034, | |
| "grad_norm": 0.036454692983525445, | |
| "learning_rate": 6.767938593755386e-06, | |
| "loss": 0.87, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.543533111493034, | |
| "grad_norm": 0.08503045559657693, | |
| "learning_rate": 6.765445260332723e-06, | |
| "loss": 0.878, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.544118184810034, | |
| "grad_norm": 0.04291403088850566, | |
| "learning_rate": 6.7629499285741155e-06, | |
| "loss": 0.8633, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.544703258127034, | |
| "grad_norm": 0.04417233325326183, | |
| "learning_rate": 6.7604526005876265e-06, | |
| "loss": 0.7777, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.5452883314440341, | |
| "grad_norm": 0.04685514307801011, | |
| "learning_rate": 6.7579532784830075e-06, | |
| "loss": 0.8233, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.5458734047610341, | |
| "grad_norm": 0.04216821721923777, | |
| "learning_rate": 6.755451964371696e-06, | |
| "loss": 0.8055, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.5464584780780342, | |
| "grad_norm": 0.05899439548668003, | |
| "learning_rate": 6.752948660366807e-06, | |
| "loss": 0.7423, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.5470435513950342, | |
| "grad_norm": 0.039547439339394544, | |
| "learning_rate": 6.750443368583141e-06, | |
| "loss": 0.7959, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.5476286247120342, | |
| "grad_norm": 0.046082967838638036, | |
| "learning_rate": 6.747936091137179e-06, | |
| "loss": 0.7691, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.5482136980290343, | |
| "grad_norm": 0.04724571857592231, | |
| "learning_rate": 6.745426830147074e-06, | |
| "loss": 0.7716, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.5487987713460343, | |
| "grad_norm": 0.049126540248396626, | |
| "learning_rate": 6.74291558773266e-06, | |
| "loss": 0.8051, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.5493838446630344, | |
| "grad_norm": 0.038162530207834874, | |
| "learning_rate": 6.740402366015442e-06, | |
| "loss": 0.8182, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.5499689179800343, | |
| "grad_norm": 0.07362395009585133, | |
| "learning_rate": 6.737887167118597e-06, | |
| "loss": 0.8025, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.5505539912970344, | |
| "grad_norm": 0.03855880618683095, | |
| "learning_rate": 6.735369993166977e-06, | |
| "loss": 0.8257, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.5511390646140344, | |
| "grad_norm": 0.04403565508981498, | |
| "learning_rate": 6.732850846287096e-06, | |
| "loss": 0.7377, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 0.039173991395047666, | |
| "learning_rate": 6.730329728607137e-06, | |
| "loss": 0.8063, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.5523092112480346, | |
| "grad_norm": 0.042531239336748856, | |
| "learning_rate": 6.72780664225695e-06, | |
| "loss": 0.8237, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.5528942845650345, | |
| "grad_norm": 0.039715480086954504, | |
| "learning_rate": 6.725281589368046e-06, | |
| "loss": 0.8229, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.5534793578820346, | |
| "grad_norm": 0.04910369460983757, | |
| "learning_rate": 6.722754572073599e-06, | |
| "loss": 0.8503, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.5540644311990346, | |
| "grad_norm": 0.03827200257320983, | |
| "learning_rate": 6.720225592508439e-06, | |
| "loss": 0.9069, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.5546495045160347, | |
| "grad_norm": 0.06511818499208381, | |
| "learning_rate": 6.7176946528090585e-06, | |
| "loss": 0.9065, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.5552345778330346, | |
| "grad_norm": 0.03694464014581456, | |
| "learning_rate": 6.715161755113604e-06, | |
| "loss": 0.8588, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.5558196511500347, | |
| "grad_norm": 0.03684150463907197, | |
| "learning_rate": 6.712626901561876e-06, | |
| "loss": 0.8272, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5564047244670348, | |
| "grad_norm": 0.04096608730346328, | |
| "learning_rate": 6.710090094295323e-06, | |
| "loss": 0.9031, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.5569897977840348, | |
| "grad_norm": 0.04008860398814125, | |
| "learning_rate": 6.707551335457054e-06, | |
| "loss": 0.8452, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.5575748711010349, | |
| "grad_norm": 0.06233285275932101, | |
| "learning_rate": 6.705010627191816e-06, | |
| "loss": 0.8813, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.5581599444180348, | |
| "grad_norm": 0.04077134247799943, | |
| "learning_rate": 6.7024679716460114e-06, | |
| "loss": 0.8493, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.5587450177350349, | |
| "grad_norm": 0.046352641350363474, | |
| "learning_rate": 6.699923370967682e-06, | |
| "loss": 0.8309, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.5593300910520349, | |
| "grad_norm": 0.041407416461248854, | |
| "learning_rate": 6.6973768273065145e-06, | |
| "loss": 0.8149, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.559915164369035, | |
| "grad_norm": 0.03903915190219945, | |
| "learning_rate": 6.694828342813839e-06, | |
| "loss": 0.8669, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.5605002376860351, | |
| "grad_norm": 0.05659630453049191, | |
| "learning_rate": 6.692277919642623e-06, | |
| "loss": 0.8291, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.561085311003035, | |
| "grad_norm": 0.048419920107990906, | |
| "learning_rate": 6.6897255599474705e-06, | |
| "loss": 0.7891, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.5616703843200351, | |
| "grad_norm": 0.04526622141241147, | |
| "learning_rate": 6.6871712658846255e-06, | |
| "loss": 0.82, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.5622554576370351, | |
| "grad_norm": 0.05616992844910552, | |
| "learning_rate": 6.684615039611963e-06, | |
| "loss": 0.819, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.5628405309540352, | |
| "grad_norm": 0.05533671965109322, | |
| "learning_rate": 6.682056883288993e-06, | |
| "loss": 0.8278, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.5634256042710352, | |
| "grad_norm": 0.04723246972770138, | |
| "learning_rate": 6.679496799076853e-06, | |
| "loss": 0.8255, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.5640106775880352, | |
| "grad_norm": 0.04762545006180037, | |
| "learning_rate": 6.67693478913831e-06, | |
| "loss": 0.8867, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.5645957509050353, | |
| "grad_norm": 0.04368900157724721, | |
| "learning_rate": 6.674370855637759e-06, | |
| "loss": 0.8527, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.5651808242220353, | |
| "grad_norm": 0.04639910967539116, | |
| "learning_rate": 6.671805000741221e-06, | |
| "loss": 0.8147, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.5657658975390354, | |
| "grad_norm": 0.04122171566933528, | |
| "learning_rate": 6.6692372266163365e-06, | |
| "loss": 0.8176, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.5663509708560354, | |
| "grad_norm": 0.04160364438101482, | |
| "learning_rate": 6.666667535432371e-06, | |
| "loss": 0.8588, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.5669360441730354, | |
| "grad_norm": 0.04172707626334754, | |
| "learning_rate": 6.664095929360207e-06, | |
| "loss": 0.8315, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.5675211174900354, | |
| "grad_norm": 0.04646561250778188, | |
| "learning_rate": 6.661522410572346e-06, | |
| "loss": 0.7923, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.5681061908070355, | |
| "grad_norm": 0.04188298952966753, | |
| "learning_rate": 6.658946981242906e-06, | |
| "loss": 0.7711, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.5686912641240356, | |
| "grad_norm": 0.03908796787695422, | |
| "learning_rate": 6.656369643547617e-06, | |
| "loss": 0.8856, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.5692763374410356, | |
| "grad_norm": 0.042788844509774104, | |
| "learning_rate": 6.653790399663823e-06, | |
| "loss": 0.7808, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.5698614107580356, | |
| "grad_norm": 0.05201703823511508, | |
| "learning_rate": 6.651209251770478e-06, | |
| "loss": 0.8618, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.5704464840750356, | |
| "grad_norm": 0.04585801993937681, | |
| "learning_rate": 6.648626202048144e-06, | |
| "loss": 0.8373, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5710315573920357, | |
| "grad_norm": 0.040661578495326846, | |
| "learning_rate": 6.646041252678989e-06, | |
| "loss": 0.7641, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.5716166307090357, | |
| "grad_norm": 0.035144726376477184, | |
| "learning_rate": 6.643454405846788e-06, | |
| "loss": 0.7272, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.5722017040260358, | |
| "grad_norm": 0.04251759434748344, | |
| "learning_rate": 6.640865663736917e-06, | |
| "loss": 0.8063, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.5727867773430358, | |
| "grad_norm": 0.037521500861554015, | |
| "learning_rate": 6.638275028536356e-06, | |
| "loss": 0.7797, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.5733718506600358, | |
| "grad_norm": 0.041622067012318506, | |
| "learning_rate": 6.6356825024336784e-06, | |
| "loss": 0.8679, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.5739569239770359, | |
| "grad_norm": 0.04290995619424604, | |
| "learning_rate": 6.63308808761906e-06, | |
| "loss": 0.8477, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.5745419972940359, | |
| "grad_norm": 0.0429619795472441, | |
| "learning_rate": 6.630491786284273e-06, | |
| "loss": 0.8145, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.575127070611036, | |
| "grad_norm": 0.03771407315818989, | |
| "learning_rate": 6.6278936006226795e-06, | |
| "loss": 0.796, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.5757121439280359, | |
| "grad_norm": 0.0720666677058357, | |
| "learning_rate": 6.625293532829236e-06, | |
| "loss": 0.7856, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.576297217245036, | |
| "grad_norm": 0.05035693118475441, | |
| "learning_rate": 6.622691585100488e-06, | |
| "loss": 0.8212, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.5768822905620361, | |
| "grad_norm": 0.0661002129965687, | |
| "learning_rate": 6.620087759634569e-06, | |
| "loss": 0.8248, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.5774673638790361, | |
| "grad_norm": 0.043048564438655845, | |
| "learning_rate": 6.617482058631201e-06, | |
| "loss": 0.7865, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.5780524371960362, | |
| "grad_norm": 0.04716626817210766, | |
| "learning_rate": 6.614874484291688e-06, | |
| "loss": 0.7806, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.5786375105130361, | |
| "grad_norm": 0.043537127489980836, | |
| "learning_rate": 6.612265038818915e-06, | |
| "loss": 0.8248, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.5792225838300362, | |
| "grad_norm": 0.036528767071679374, | |
| "learning_rate": 6.609653724417354e-06, | |
| "loss": 0.8464, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.5798076571470362, | |
| "grad_norm": 0.041985098940740775, | |
| "learning_rate": 6.6070405432930495e-06, | |
| "loss": 0.8371, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.5803927304640363, | |
| "grad_norm": 0.037661629039364965, | |
| "learning_rate": 6.604425497653627e-06, | |
| "loss": 0.8133, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.5809778037810364, | |
| "grad_norm": 0.04709785126617865, | |
| "learning_rate": 6.6018085897082845e-06, | |
| "loss": 0.7926, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.5815628770980363, | |
| "grad_norm": 0.04920382998315035, | |
| "learning_rate": 6.5991898216677945e-06, | |
| "loss": 0.8511, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.5821479504150364, | |
| "grad_norm": 0.0384231269239162, | |
| "learning_rate": 6.596569195744502e-06, | |
| "loss": 0.8767, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.5827330237320364, | |
| "grad_norm": 0.0469334210563224, | |
| "learning_rate": 6.59394671415232e-06, | |
| "loss": 0.8445, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.5833180970490365, | |
| "grad_norm": 0.07961405813390293, | |
| "learning_rate": 6.591322379106728e-06, | |
| "loss": 0.7951, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.5839031703660365, | |
| "grad_norm": 0.039593707752364676, | |
| "learning_rate": 6.588696192824775e-06, | |
| "loss": 0.8325, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.5844882436830365, | |
| "grad_norm": 0.04184294201712494, | |
| "learning_rate": 6.5860681575250706e-06, | |
| "loss": 0.8599, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.5850733170000366, | |
| "grad_norm": 0.10237775503964221, | |
| "learning_rate": 6.5834382754277885e-06, | |
| "loss": 0.8159, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5856583903170366, | |
| "grad_norm": 0.03881481319706578, | |
| "learning_rate": 6.580806548754661e-06, | |
| "loss": 0.7969, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.5862434636340367, | |
| "grad_norm": 0.043457660890875695, | |
| "learning_rate": 6.578172979728979e-06, | |
| "loss": 0.8012, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.5868285369510367, | |
| "grad_norm": 0.037934756068100706, | |
| "learning_rate": 6.5755375705755924e-06, | |
| "loss": 0.8248, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.5874136102680367, | |
| "grad_norm": 0.12610177824785532, | |
| "learning_rate": 6.572900323520901e-06, | |
| "loss": 0.7342, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.5879986835850367, | |
| "grad_norm": 0.03661390969286209, | |
| "learning_rate": 6.570261240792861e-06, | |
| "loss": 0.7682, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.5885837569020368, | |
| "grad_norm": 0.06798106192365762, | |
| "learning_rate": 6.5676203246209785e-06, | |
| "loss": 0.793, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.5891688302190369, | |
| "grad_norm": 0.05106297642369706, | |
| "learning_rate": 6.564977577236309e-06, | |
| "loss": 0.881, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.5897539035360368, | |
| "grad_norm": 0.04384540012407079, | |
| "learning_rate": 6.5623330008714505e-06, | |
| "loss": 0.7453, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.5903389768530369, | |
| "grad_norm": 0.10758927483483961, | |
| "learning_rate": 6.559686597760555e-06, | |
| "loss": 0.8367, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.5909240501700369, | |
| "grad_norm": 0.04079443571028819, | |
| "learning_rate": 6.557038370139307e-06, | |
| "loss": 0.8972, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.591509123487037, | |
| "grad_norm": 0.04314093124710063, | |
| "learning_rate": 6.554388320244943e-06, | |
| "loss": 0.8316, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.592094196804037, | |
| "grad_norm": 0.037939172015033606, | |
| "learning_rate": 6.5517364503162315e-06, | |
| "loss": 0.8587, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.592679270121037, | |
| "grad_norm": 0.08510073848850286, | |
| "learning_rate": 6.549082762593481e-06, | |
| "loss": 0.8269, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.5932643434380371, | |
| "grad_norm": 0.04708547345385738, | |
| "learning_rate": 6.546427259318535e-06, | |
| "loss": 0.8102, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.5938494167550371, | |
| "grad_norm": 0.053444431590032856, | |
| "learning_rate": 6.543769942734772e-06, | |
| "loss": 0.7774, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.5944344900720372, | |
| "grad_norm": 0.04005959184610443, | |
| "learning_rate": 6.541110815087104e-06, | |
| "loss": 0.7808, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.5950195633890372, | |
| "grad_norm": 0.05077795167298267, | |
| "learning_rate": 6.538449878621966e-06, | |
| "loss": 0.8758, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.5956046367060372, | |
| "grad_norm": 0.03833355241397989, | |
| "learning_rate": 6.535787135587331e-06, | |
| "loss": 0.8331, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.5961897100230372, | |
| "grad_norm": 0.053233926527690635, | |
| "learning_rate": 6.533122588232689e-06, | |
| "loss": 0.8339, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.5967747833400373, | |
| "grad_norm": 0.044734695814882965, | |
| "learning_rate": 6.530456238809062e-06, | |
| "loss": 0.7773, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.5973598566570373, | |
| "grad_norm": 0.05575094862913427, | |
| "learning_rate": 6.527788089568987e-06, | |
| "loss": 0.8303, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.5979449299740374, | |
| "grad_norm": 0.04961953370443646, | |
| "learning_rate": 6.525118142766527e-06, | |
| "loss": 0.7392, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.5985300032910374, | |
| "grad_norm": 0.09625684788536548, | |
| "learning_rate": 6.522446400657264e-06, | |
| "loss": 0.9054, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.5991150766080374, | |
| "grad_norm": 0.05229897404990849, | |
| "learning_rate": 6.519772865498291e-06, | |
| "loss": 0.7961, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.5997001499250375, | |
| "grad_norm": 0.043742861050743945, | |
| "learning_rate": 6.51709753954822e-06, | |
| "loss": 0.8543, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.6002852232420375, | |
| "grad_norm": 0.04421257742872319, | |
| "learning_rate": 6.514420425067179e-06, | |
| "loss": 0.9387, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.6008702965590376, | |
| "grad_norm": 0.05074707736420169, | |
| "learning_rate": 6.511741524316798e-06, | |
| "loss": 0.6896, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.6014553698760375, | |
| "grad_norm": 0.038800133730654246, | |
| "learning_rate": 6.509060839560223e-06, | |
| "loss": 0.7429, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.6020404431930376, | |
| "grad_norm": 0.0672245152896617, | |
| "learning_rate": 6.506378373062107e-06, | |
| "loss": 0.8246, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.6026255165100377, | |
| "grad_norm": 0.04853749164209948, | |
| "learning_rate": 6.503694127088604e-06, | |
| "loss": 0.8006, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.6032105898270377, | |
| "grad_norm": 0.0423510296147158, | |
| "learning_rate": 6.501008103907376e-06, | |
| "loss": 0.8068, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.6037956631440378, | |
| "grad_norm": 0.04168402528442862, | |
| "learning_rate": 6.498320305787583e-06, | |
| "loss": 0.747, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.6043807364610377, | |
| "grad_norm": 0.05536933839665471, | |
| "learning_rate": 6.495630734999885e-06, | |
| "loss": 0.8214, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.6049658097780378, | |
| "grad_norm": 0.03706453349529763, | |
| "learning_rate": 6.4929393938164425e-06, | |
| "loss": 0.7507, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.6055508830950378, | |
| "grad_norm": 0.03569665816245634, | |
| "learning_rate": 6.490246284510907e-06, | |
| "loss": 0.8255, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.6061359564120379, | |
| "grad_norm": 0.04630173604608656, | |
| "learning_rate": 6.487551409358428e-06, | |
| "loss": 0.8046, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.606721029729038, | |
| "grad_norm": 0.04723153392312322, | |
| "learning_rate": 6.4848547706356444e-06, | |
| "loss": 0.8256, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.6073061030460379, | |
| "grad_norm": 0.03827183486478635, | |
| "learning_rate": 6.482156370620683e-06, | |
| "loss": 0.7563, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.607891176363038, | |
| "grad_norm": 0.0388015622476578, | |
| "learning_rate": 6.479456211593165e-06, | |
| "loss": 0.7176, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.608476249680038, | |
| "grad_norm": 0.03999779366433317, | |
| "learning_rate": 6.476754295834191e-06, | |
| "loss": 0.8224, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.6090613229970381, | |
| "grad_norm": 0.039892311683218856, | |
| "learning_rate": 6.47405062562635e-06, | |
| "loss": 0.8236, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.609646396314038, | |
| "grad_norm": 0.03973227428998665, | |
| "learning_rate": 6.471345203253711e-06, | |
| "loss": 0.8184, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.6102314696310381, | |
| "grad_norm": 0.04150106217436074, | |
| "learning_rate": 6.468638031001823e-06, | |
| "loss": 0.8804, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.6108165429480382, | |
| "grad_norm": 0.04239329779716534, | |
| "learning_rate": 6.465929111157714e-06, | |
| "loss": 0.7935, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.6114016162650382, | |
| "grad_norm": 0.05192716135637802, | |
| "learning_rate": 6.463218446009888e-06, | |
| "loss": 0.8526, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.6119866895820383, | |
| "grad_norm": 0.04095824415870296, | |
| "learning_rate": 6.4605060378483255e-06, | |
| "loss": 0.901, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.6125717628990383, | |
| "grad_norm": 0.03546640975061806, | |
| "learning_rate": 6.457791888964478e-06, | |
| "loss": 0.7811, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.6131568362160383, | |
| "grad_norm": 0.040296634693903335, | |
| "learning_rate": 6.455076001651265e-06, | |
| "loss": 0.7403, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.6137419095330383, | |
| "grad_norm": 0.039361319712739894, | |
| "learning_rate": 6.452358378203079e-06, | |
| "loss": 0.8359, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.6143269828500384, | |
| "grad_norm": 0.04449717698536887, | |
| "learning_rate": 6.449639020915777e-06, | |
| "loss": 0.7877, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.6149120561670385, | |
| "grad_norm": 0.05075898809213348, | |
| "learning_rate": 6.446917932086681e-06, | |
| "loss": 0.7867, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.6154971294840385, | |
| "grad_norm": 0.05427508854998823, | |
| "learning_rate": 6.444195114014573e-06, | |
| "loss": 0.9079, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.6160822028010385, | |
| "grad_norm": 0.039038596627602784, | |
| "learning_rate": 6.441470568999704e-06, | |
| "loss": 0.8449, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.6166672761180385, | |
| "grad_norm": 0.05495139920274083, | |
| "learning_rate": 6.438744299343774e-06, | |
| "loss": 0.8195, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.6172523494350386, | |
| "grad_norm": 0.04036928882191201, | |
| "learning_rate": 6.436016307349947e-06, | |
| "loss": 0.9269, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.6178374227520386, | |
| "grad_norm": 0.03982823893586514, | |
| "learning_rate": 6.4332865953228395e-06, | |
| "loss": 0.7358, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.6184224960690387, | |
| "grad_norm": 0.06481474497909782, | |
| "learning_rate": 6.430555165568521e-06, | |
| "loss": 0.7267, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.6190075693860387, | |
| "grad_norm": 0.05875784906821891, | |
| "learning_rate": 6.427822020394512e-06, | |
| "loss": 0.8439, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.6195926427030387, | |
| "grad_norm": 0.040087218682744265, | |
| "learning_rate": 6.425087162109781e-06, | |
| "loss": 0.7822, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.6201777160200388, | |
| "grad_norm": 0.04225947062502592, | |
| "learning_rate": 6.422350593024747e-06, | |
| "loss": 0.8365, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.6207627893370388, | |
| "grad_norm": 0.03505701993198131, | |
| "learning_rate": 6.419612315451275e-06, | |
| "loss": 0.7465, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.6213478626540389, | |
| "grad_norm": 0.03617054781781428, | |
| "learning_rate": 6.4168723317026655e-06, | |
| "loss": 0.7628, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.6219329359710388, | |
| "grad_norm": 0.04149790973744168, | |
| "learning_rate": 6.414130644093669e-06, | |
| "loss": 0.7846, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.6225180092880389, | |
| "grad_norm": 0.08318622079767567, | |
| "learning_rate": 6.411387254940473e-06, | |
| "loss": 0.8865, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.623103082605039, | |
| "grad_norm": 0.04061744209355914, | |
| "learning_rate": 6.4086421665607e-06, | |
| "loss": 0.7673, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.623688155922039, | |
| "grad_norm": 0.04460594650043864, | |
| "learning_rate": 6.405895381273411e-06, | |
| "loss": 0.7735, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.624273229239039, | |
| "grad_norm": 0.05550280064698044, | |
| "learning_rate": 6.403146901399098e-06, | |
| "loss": 0.8569, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.624858302556039, | |
| "grad_norm": 0.04163827309254167, | |
| "learning_rate": 6.400396729259685e-06, | |
| "loss": 0.8951, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.6254433758730391, | |
| "grad_norm": 0.1509844863070891, | |
| "learning_rate": 6.39764486717853e-06, | |
| "loss": 0.8082, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.6260284491900391, | |
| "grad_norm": 0.04492131767454413, | |
| "learning_rate": 6.394891317480412e-06, | |
| "loss": 0.7518, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.6266135225070392, | |
| "grad_norm": 0.0471963162082604, | |
| "learning_rate": 6.39213608249154e-06, | |
| "loss": 0.8127, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.6271985958240393, | |
| "grad_norm": 0.038627758596647896, | |
| "learning_rate": 6.389379164539545e-06, | |
| "loss": 0.7781, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.6277836691410392, | |
| "grad_norm": 0.04265598384978139, | |
| "learning_rate": 6.386620565953482e-06, | |
| "loss": 0.7698, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.6283687424580393, | |
| "grad_norm": 0.040866964196317926, | |
| "learning_rate": 6.383860289063821e-06, | |
| "loss": 0.741, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.6289538157750393, | |
| "grad_norm": 0.04488242542194653, | |
| "learning_rate": 6.3810983362024575e-06, | |
| "loss": 0.8618, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6295388890920394, | |
| "grad_norm": 0.04181264245748804, | |
| "learning_rate": 6.3783347097026935e-06, | |
| "loss": 0.8318, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.6301239624090393, | |
| "grad_norm": 0.057019285327405245, | |
| "learning_rate": 6.375569411899253e-06, | |
| "loss": 0.822, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.6307090357260394, | |
| "grad_norm": 0.04075452383662032, | |
| "learning_rate": 6.3728024451282675e-06, | |
| "loss": 0.7854, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.6312941090430395, | |
| "grad_norm": 0.04369789370291459, | |
| "learning_rate": 6.37003381172728e-06, | |
| "loss": 0.7976, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.6318791823600395, | |
| "grad_norm": 0.05849968444823343, | |
| "learning_rate": 6.367263514035242e-06, | |
| "loss": 0.8309, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.6324642556770396, | |
| "grad_norm": 0.07398315736379607, | |
| "learning_rate": 6.364491554392508e-06, | |
| "loss": 0.8695, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.6330493289940395, | |
| "grad_norm": 0.14149329637182187, | |
| "learning_rate": 6.36171793514084e-06, | |
| "loss": 0.9035, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.6336344023110396, | |
| "grad_norm": 0.09431782156363094, | |
| "learning_rate": 6.358942658623402e-06, | |
| "loss": 0.7978, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.6342194756280396, | |
| "grad_norm": 0.11444686544474032, | |
| "learning_rate": 6.356165727184753e-06, | |
| "loss": 0.8075, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.6348045489450397, | |
| "grad_norm": 0.07683296171985984, | |
| "learning_rate": 6.353387143170856e-06, | |
| "loss": 0.7619, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.6353896222620398, | |
| "grad_norm": 0.03611601798693106, | |
| "learning_rate": 6.3506069089290705e-06, | |
| "loss": 0.7886, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.6359746955790397, | |
| "grad_norm": 0.055915745635355715, | |
| "learning_rate": 6.3478250268081435e-06, | |
| "loss": 0.8511, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.6365597688960398, | |
| "grad_norm": 0.048663447635497695, | |
| "learning_rate": 6.34504149915822e-06, | |
| "loss": 0.9001, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.6371448422130398, | |
| "grad_norm": 0.04226309960545497, | |
| "learning_rate": 6.342256328330833e-06, | |
| "loss": 0.7945, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.6377299155300399, | |
| "grad_norm": 0.0425747978871558, | |
| "learning_rate": 6.339469516678903e-06, | |
| "loss": 0.7945, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.6383149888470399, | |
| "grad_norm": 0.049540193290765706, | |
| "learning_rate": 6.33668106655674e-06, | |
| "loss": 0.8424, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.6389000621640399, | |
| "grad_norm": 0.04361089778932384, | |
| "learning_rate": 6.333890980320033e-06, | |
| "loss": 0.8775, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.6394851354810399, | |
| "grad_norm": 0.04952042923998371, | |
| "learning_rate": 6.331099260325858e-06, | |
| "loss": 0.7921, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.64007020879804, | |
| "grad_norm": 0.08510241121137069, | |
| "learning_rate": 6.32830590893267e-06, | |
| "loss": 0.8449, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.6406552821150401, | |
| "grad_norm": 0.052581634750892337, | |
| "learning_rate": 6.325510928500298e-06, | |
| "loss": 0.8504, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.64124035543204, | |
| "grad_norm": 0.06971844661189186, | |
| "learning_rate": 6.322714321389955e-06, | |
| "loss": 0.8049, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.6418254287490401, | |
| "grad_norm": 0.0425225993758099, | |
| "learning_rate": 6.319916089964221e-06, | |
| "loss": 0.7374, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.6424105020660401, | |
| "grad_norm": 0.04070882005149654, | |
| "learning_rate": 6.317116236587052e-06, | |
| "loss": 0.747, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.6429955753830402, | |
| "grad_norm": 0.04748840753374928, | |
| "learning_rate": 6.314314763623775e-06, | |
| "loss": 0.7728, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.6435806487000402, | |
| "grad_norm": 0.08783073493232298, | |
| "learning_rate": 6.31151167344108e-06, | |
| "loss": 0.8031, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.6441657220170403, | |
| "grad_norm": 0.12757918242470023, | |
| "learning_rate": 6.308706968407029e-06, | |
| "loss": 0.8275, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.6447507953340403, | |
| "grad_norm": 0.04265066246559783, | |
| "learning_rate": 6.305900650891045e-06, | |
| "loss": 0.8143, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.6453358686510403, | |
| "grad_norm": 0.05532213512913742, | |
| "learning_rate": 6.303092723263917e-06, | |
| "loss": 0.7623, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.6459209419680404, | |
| "grad_norm": 0.05575276568946241, | |
| "learning_rate": 6.300283187897788e-06, | |
| "loss": 0.7578, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.6465060152850404, | |
| "grad_norm": 0.04004488402472709, | |
| "learning_rate": 6.297472047166164e-06, | |
| "loss": 0.8287, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.6470910886020405, | |
| "grad_norm": 0.07040462606702534, | |
| "learning_rate": 6.294659303443907e-06, | |
| "loss": 0.802, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.6476761619190404, | |
| "grad_norm": 0.04991605035702622, | |
| "learning_rate": 6.291844959107231e-06, | |
| "loss": 0.78, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.6482612352360405, | |
| "grad_norm": 0.04810590450360042, | |
| "learning_rate": 6.289029016533705e-06, | |
| "loss": 0.7531, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.6488463085530406, | |
| "grad_norm": 0.04521072775635856, | |
| "learning_rate": 6.286211478102243e-06, | |
| "loss": 0.773, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.6494313818700406, | |
| "grad_norm": 0.04141710815776832, | |
| "learning_rate": 6.283392346193114e-06, | |
| "loss": 0.813, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.6500164551870407, | |
| "grad_norm": 0.043835473627139195, | |
| "learning_rate": 6.280571623187929e-06, | |
| "loss": 0.793, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.6506015285040406, | |
| "grad_norm": 0.05669397811371989, | |
| "learning_rate": 6.277749311469643e-06, | |
| "loss": 0.7317, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.6511866018210407, | |
| "grad_norm": 0.08646975004658393, | |
| "learning_rate": 6.274925413422558e-06, | |
| "loss": 0.8447, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.6517716751380407, | |
| "grad_norm": 0.039267068048581634, | |
| "learning_rate": 6.272099931432308e-06, | |
| "loss": 0.8171, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.6523567484550408, | |
| "grad_norm": 0.04626551882560527, | |
| "learning_rate": 6.2692728678858705e-06, | |
| "loss": 0.7719, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.6529418217720409, | |
| "grad_norm": 0.08786584886983898, | |
| "learning_rate": 6.26644422517156e-06, | |
| "loss": 0.7863, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.6535268950890408, | |
| "grad_norm": 0.07658703901409003, | |
| "learning_rate": 6.26361400567902e-06, | |
| "loss": 0.8893, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.6541119684060409, | |
| "grad_norm": 0.03799609243525315, | |
| "learning_rate": 6.2607822117992326e-06, | |
| "loss": 0.7575, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.6546970417230409, | |
| "grad_norm": 0.04335037636508533, | |
| "learning_rate": 6.257948845924505e-06, | |
| "loss": 0.7564, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.655282115040041, | |
| "grad_norm": 0.04285395891669155, | |
| "learning_rate": 6.2551139104484755e-06, | |
| "loss": 0.8482, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.6558671883570409, | |
| "grad_norm": 0.03461985415221269, | |
| "learning_rate": 6.252277407766103e-06, | |
| "loss": 0.7411, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.656452261674041, | |
| "grad_norm": 0.06081410937727396, | |
| "learning_rate": 6.249439340273679e-06, | |
| "loss": 0.8753, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.6570373349910411, | |
| "grad_norm": 0.08982630298329526, | |
| "learning_rate": 6.246599710368809e-06, | |
| "loss": 0.7514, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.6576224083080411, | |
| "grad_norm": 0.04564349868186195, | |
| "learning_rate": 6.243758520450423e-06, | |
| "loss": 0.7989, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.6582074816250412, | |
| "grad_norm": 0.043054553052619716, | |
| "learning_rate": 6.240915772918768e-06, | |
| "loss": 0.7454, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6587925549420411, | |
| "grad_norm": 0.03677134636684232, | |
| "learning_rate": 6.238071470175405e-06, | |
| "loss": 0.8528, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.6593776282590412, | |
| "grad_norm": 0.04172483135818754, | |
| "learning_rate": 6.235225614623212e-06, | |
| "loss": 0.812, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.6599627015760412, | |
| "grad_norm": 0.05277877562040755, | |
| "learning_rate": 6.232378208666376e-06, | |
| "loss": 0.8283, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.6605477748930413, | |
| "grad_norm": 0.04454598931616706, | |
| "learning_rate": 6.229529254710396e-06, | |
| "loss": 0.8537, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.6611328482100414, | |
| "grad_norm": 0.047900592019452154, | |
| "learning_rate": 6.226678755162076e-06, | |
| "loss": 0.825, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.6617179215270413, | |
| "grad_norm": 0.05963183612092722, | |
| "learning_rate": 6.223826712429529e-06, | |
| "loss": 0.8042, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.6623029948440414, | |
| "grad_norm": 0.046383510242271296, | |
| "learning_rate": 6.220973128922168e-06, | |
| "loss": 0.865, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.6628880681610414, | |
| "grad_norm": 0.04417415078522508, | |
| "learning_rate": 6.218118007050713e-06, | |
| "loss": 0.8235, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.6634731414780415, | |
| "grad_norm": 0.04207639503924024, | |
| "learning_rate": 6.215261349227178e-06, | |
| "loss": 0.7858, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.6640582147950415, | |
| "grad_norm": 0.04649797867374506, | |
| "learning_rate": 6.212403157864878e-06, | |
| "loss": 0.868, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.6646432881120415, | |
| "grad_norm": 0.09407948533657494, | |
| "learning_rate": 6.209543435378422e-06, | |
| "loss": 0.8818, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.6652283614290416, | |
| "grad_norm": 0.037478109364168094, | |
| "learning_rate": 6.206682184183712e-06, | |
| "loss": 0.812, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.6658134347460416, | |
| "grad_norm": 0.05288676232169846, | |
| "learning_rate": 6.203819406697945e-06, | |
| "loss": 0.7548, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.6663985080630417, | |
| "grad_norm": 0.04535893610318677, | |
| "learning_rate": 6.200955105339603e-06, | |
| "loss": 0.8772, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.6669835813800417, | |
| "grad_norm": 0.04773693536693857, | |
| "learning_rate": 6.198089282528456e-06, | |
| "loss": 0.7763, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.6675686546970417, | |
| "grad_norm": 0.0518520434245037, | |
| "learning_rate": 6.195221940685563e-06, | |
| "loss": 0.7668, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.6681537280140417, | |
| "grad_norm": 0.05462892968699057, | |
| "learning_rate": 6.192353082233263e-06, | |
| "loss": 0.7096, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.6687388013310418, | |
| "grad_norm": 0.04122645056649732, | |
| "learning_rate": 6.189482709595177e-06, | |
| "loss": 0.7839, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.6693238746480419, | |
| "grad_norm": 0.058891294040750164, | |
| "learning_rate": 6.186610825196204e-06, | |
| "loss": 0.7504, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.6699089479650419, | |
| "grad_norm": 0.05296752593762354, | |
| "learning_rate": 6.183737431462524e-06, | |
| "loss": 0.7591, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.6704940212820419, | |
| "grad_norm": 0.04766421657837364, | |
| "learning_rate": 6.180862530821588e-06, | |
| "loss": 0.742, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.6710790945990419, | |
| "grad_norm": 0.0915137633830507, | |
| "learning_rate": 6.177986125702121e-06, | |
| "loss": 0.8167, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.671664167916042, | |
| "grad_norm": 0.04064619759463224, | |
| "learning_rate": 6.17510821853412e-06, | |
| "loss": 0.755, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.672249241233042, | |
| "grad_norm": 0.03962465637676519, | |
| "learning_rate": 6.17222881174885e-06, | |
| "loss": 0.7952, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.6728343145500421, | |
| "grad_norm": 0.0475379390885668, | |
| "learning_rate": 6.169347907778846e-06, | |
| "loss": 0.7889, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6734193878670421, | |
| "grad_norm": 0.04260205050833479, | |
| "learning_rate": 6.166465509057902e-06, | |
| "loss": 0.7094, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.6740044611840421, | |
| "grad_norm": 0.0452503260334743, | |
| "learning_rate": 6.163581618021079e-06, | |
| "loss": 0.8137, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.6745895345010422, | |
| "grad_norm": 0.049045892950316486, | |
| "learning_rate": 6.1606962371046975e-06, | |
| "loss": 0.7476, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.6751746078180422, | |
| "grad_norm": 0.1839546472224546, | |
| "learning_rate": 6.157809368746337e-06, | |
| "loss": 0.7341, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.6757596811350423, | |
| "grad_norm": 0.04737636667405579, | |
| "learning_rate": 6.154921015384833e-06, | |
| "loss": 0.8772, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.6763447544520422, | |
| "grad_norm": 0.039827421277940374, | |
| "learning_rate": 6.152031179460276e-06, | |
| "loss": 0.8184, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.6769298277690423, | |
| "grad_norm": 0.03980204662807788, | |
| "learning_rate": 6.14913986341401e-06, | |
| "loss": 0.8504, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.6775149010860424, | |
| "grad_norm": 0.06377067252825656, | |
| "learning_rate": 6.146247069688627e-06, | |
| "loss": 0.8496, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.6780999744030424, | |
| "grad_norm": 0.5555285996614837, | |
| "learning_rate": 6.14335280072797e-06, | |
| "loss": 0.8131, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.6786850477200425, | |
| "grad_norm": 0.03909895393006111, | |
| "learning_rate": 6.140457058977125e-06, | |
| "loss": 0.7756, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.6792701210370424, | |
| "grad_norm": 0.03969052355186135, | |
| "learning_rate": 6.137559846882426e-06, | |
| "loss": 0.8209, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.6798551943540425, | |
| "grad_norm": 0.04103151559227765, | |
| "learning_rate": 6.134661166891445e-06, | |
| "loss": 0.8015, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.6804402676710425, | |
| "grad_norm": 0.05136493470713414, | |
| "learning_rate": 6.131761021453e-06, | |
| "loss": 0.8234, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.6810253409880426, | |
| "grad_norm": 0.04189728013078323, | |
| "learning_rate": 6.128859413017141e-06, | |
| "loss": 0.7964, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.6816104143050427, | |
| "grad_norm": 0.04337964675631273, | |
| "learning_rate": 6.1259563440351564e-06, | |
| "loss": 0.8434, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.6821954876220426, | |
| "grad_norm": 0.09451825900326687, | |
| "learning_rate": 6.123051816959569e-06, | |
| "loss": 0.8254, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.6827805609390427, | |
| "grad_norm": 0.045248262569469115, | |
| "learning_rate": 6.120145834244133e-06, | |
| "loss": 0.7613, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.6833656342560427, | |
| "grad_norm": 0.03837960459677211, | |
| "learning_rate": 6.117238398343831e-06, | |
| "loss": 0.7958, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.6839507075730428, | |
| "grad_norm": 0.05242659218061473, | |
| "learning_rate": 6.114329511714876e-06, | |
| "loss": 0.8356, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.6845357808900427, | |
| "grad_norm": 0.06860429447590187, | |
| "learning_rate": 6.111419176814704e-06, | |
| "loss": 0.802, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.6851208542070428, | |
| "grad_norm": 0.04508456559483142, | |
| "learning_rate": 6.108507396101975e-06, | |
| "loss": 0.8444, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.6857059275240428, | |
| "grad_norm": 0.06393026794280061, | |
| "learning_rate": 6.105594172036572e-06, | |
| "loss": 0.7585, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.6862910008410429, | |
| "grad_norm": 0.05435654372858688, | |
| "learning_rate": 6.102679507079597e-06, | |
| "loss": 0.9379, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.686876074158043, | |
| "grad_norm": 0.057231278163684694, | |
| "learning_rate": 6.099763403693366e-06, | |
| "loss": 0.8431, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.6874611474750429, | |
| "grad_norm": 0.0414940103750233, | |
| "learning_rate": 6.096845864341415e-06, | |
| "loss": 0.8247, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.688046220792043, | |
| "grad_norm": 0.03753965623304419, | |
| "learning_rate": 6.09392689148849e-06, | |
| "loss": 0.7384, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.688631294109043, | |
| "grad_norm": 0.0445855037853274, | |
| "learning_rate": 6.09100648760055e-06, | |
| "loss": 0.8913, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.6892163674260431, | |
| "grad_norm": 0.03616019636034817, | |
| "learning_rate": 6.08808465514476e-06, | |
| "loss": 0.7741, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.6898014407430431, | |
| "grad_norm": 0.03958351633767278, | |
| "learning_rate": 6.085161396589493e-06, | |
| "loss": 0.6991, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.6903865140600431, | |
| "grad_norm": 0.03958973007993701, | |
| "learning_rate": 6.082236714404331e-06, | |
| "loss": 0.8455, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.6909715873770432, | |
| "grad_norm": 0.04165937145938639, | |
| "learning_rate": 6.079310611060052e-06, | |
| "loss": 0.8072, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.6915566606940432, | |
| "grad_norm": 0.045951396419511376, | |
| "learning_rate": 6.07638308902864e-06, | |
| "loss": 0.8313, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.6921417340110433, | |
| "grad_norm": 0.04752344885301804, | |
| "learning_rate": 6.073454150783274e-06, | |
| "loss": 0.8802, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.6927268073280433, | |
| "grad_norm": 0.05117696052721477, | |
| "learning_rate": 6.070523798798329e-06, | |
| "loss": 0.9472, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.6933118806450433, | |
| "grad_norm": 0.040955259256951086, | |
| "learning_rate": 6.06759203554938e-06, | |
| "loss": 0.8126, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.6938969539620433, | |
| "grad_norm": 0.034916342462867116, | |
| "learning_rate": 6.064658863513186e-06, | |
| "loss": 0.7091, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.6944820272790434, | |
| "grad_norm": 0.042140933363091035, | |
| "learning_rate": 6.061724285167704e-06, | |
| "loss": 0.8323, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.6950671005960435, | |
| "grad_norm": 0.04413794065710716, | |
| "learning_rate": 6.058788302992072e-06, | |
| "loss": 0.8419, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 0.042514536823805356, | |
| "learning_rate": 6.055850919466621e-06, | |
| "loss": 0.8863, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.6962372472300435, | |
| "grad_norm": 0.04885967786149231, | |
| "learning_rate": 6.05291213707286e-06, | |
| "loss": 0.7658, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.6968223205470435, | |
| "grad_norm": 0.04978541890148103, | |
| "learning_rate": 6.0499719582934815e-06, | |
| "loss": 0.7496, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.6974073938640436, | |
| "grad_norm": 0.05231245753746865, | |
| "learning_rate": 6.047030385612362e-06, | |
| "loss": 0.784, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.6979924671810436, | |
| "grad_norm": 0.04028385347084005, | |
| "learning_rate": 6.0440874215145465e-06, | |
| "loss": 0.6913, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.6985775404980437, | |
| "grad_norm": 0.062297108276536735, | |
| "learning_rate": 6.041143068486264e-06, | |
| "loss": 0.7337, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.6991626138150437, | |
| "grad_norm": 0.03928043528877265, | |
| "learning_rate": 6.038197329014914e-06, | |
| "loss": 0.8415, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.6997476871320437, | |
| "grad_norm": 0.046537841954999465, | |
| "learning_rate": 6.035250205589064e-06, | |
| "loss": 0.7873, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.7003327604490438, | |
| "grad_norm": 0.042222747639775425, | |
| "learning_rate": 6.032301700698458e-06, | |
| "loss": 0.7629, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.7009178337660438, | |
| "grad_norm": 0.05930943404435324, | |
| "learning_rate": 6.029351816833998e-06, | |
| "loss": 0.8273, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.7015029070830439, | |
| "grad_norm": 0.05874167359504905, | |
| "learning_rate": 6.026400556487758e-06, | |
| "loss": 0.7159, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.7020879804000438, | |
| "grad_norm": 0.052410757851244154, | |
| "learning_rate": 6.023447922152972e-06, | |
| "loss": 0.7887, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.7026730537170439, | |
| "grad_norm": 0.03785761302792468, | |
| "learning_rate": 6.020493916324037e-06, | |
| "loss": 0.7942, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.703258127034044, | |
| "grad_norm": 0.0392431216621593, | |
| "learning_rate": 6.017538541496503e-06, | |
| "loss": 0.7885, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.703843200351044, | |
| "grad_norm": 0.08892449717880838, | |
| "learning_rate": 6.014581800167085e-06, | |
| "loss": 0.8001, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.7044282736680441, | |
| "grad_norm": 0.06747014549686459, | |
| "learning_rate": 6.011623694833644e-06, | |
| "loss": 0.727, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.705013346985044, | |
| "grad_norm": 0.04543757611973289, | |
| "learning_rate": 6.008664227995198e-06, | |
| "loss": 0.8129, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.7055984203020441, | |
| "grad_norm": 0.039104300131473785, | |
| "learning_rate": 6.005703402151916e-06, | |
| "loss": 0.8098, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.7061834936190441, | |
| "grad_norm": 0.038265510416551914, | |
| "learning_rate": 6.0027412198051114e-06, | |
| "loss": 0.8279, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.7067685669360442, | |
| "grad_norm": 0.03588239116909633, | |
| "learning_rate": 5.999777683457247e-06, | |
| "loss": 0.8096, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.7073536402530443, | |
| "grad_norm": 0.03971284808692058, | |
| "learning_rate": 5.996812795611928e-06, | |
| "loss": 0.8331, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.7079387135700442, | |
| "grad_norm": 0.03963917599976568, | |
| "learning_rate": 5.9938465587739e-06, | |
| "loss": 0.7908, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.7085237868870443, | |
| "grad_norm": 0.045273875457725724, | |
| "learning_rate": 5.990878975449051e-06, | |
| "loss": 0.8111, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.7091088602040443, | |
| "grad_norm": 0.04337467569823838, | |
| "learning_rate": 5.9879100481444055e-06, | |
| "loss": 0.8222, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.7096939335210444, | |
| "grad_norm": 0.039603085337812066, | |
| "learning_rate": 5.984939779368122e-06, | |
| "loss": 0.7721, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.7102790068380443, | |
| "grad_norm": 0.04156772105291293, | |
| "learning_rate": 5.981968171629494e-06, | |
| "loss": 0.8197, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.7108640801550444, | |
| "grad_norm": 0.04856521644044076, | |
| "learning_rate": 5.978995227438944e-06, | |
| "loss": 0.8677, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.7114491534720445, | |
| "grad_norm": 0.03956445076057253, | |
| "learning_rate": 5.976020949308027e-06, | |
| "loss": 0.7877, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.7120342267890445, | |
| "grad_norm": 0.04862552856496891, | |
| "learning_rate": 5.973045339749422e-06, | |
| "loss": 0.8109, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.7126193001060446, | |
| "grad_norm": 0.045295501112575506, | |
| "learning_rate": 5.970068401276935e-06, | |
| "loss": 0.791, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.7132043734230445, | |
| "grad_norm": 0.03916519269752596, | |
| "learning_rate": 5.967090136405491e-06, | |
| "loss": 0.8131, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.7137894467400446, | |
| "grad_norm": 0.038383406339616045, | |
| "learning_rate": 5.96411054765114e-06, | |
| "loss": 0.8102, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.7143745200570446, | |
| "grad_norm": 0.06824384503876316, | |
| "learning_rate": 5.961129637531047e-06, | |
| "loss": 0.8104, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.7149595933740447, | |
| "grad_norm": 0.07316382930858244, | |
| "learning_rate": 5.958147408563497e-06, | |
| "loss": 0.7862, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.7155446666910448, | |
| "grad_norm": 0.04737080928686243, | |
| "learning_rate": 5.9551638632678835e-06, | |
| "loss": 0.8513, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.7161297400080447, | |
| "grad_norm": 0.0374937969952467, | |
| "learning_rate": 5.952179004164718e-06, | |
| "loss": 0.8002, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.7167148133250448, | |
| "grad_norm": 0.04053689173262939, | |
| "learning_rate": 5.949192833775618e-06, | |
| "loss": 0.7657, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.7172998866420448, | |
| "grad_norm": 0.0398778756693423, | |
| "learning_rate": 5.946205354623312e-06, | |
| "loss": 0.7725, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.7178849599590449, | |
| "grad_norm": 0.04506339056501446, | |
| "learning_rate": 5.943216569231629e-06, | |
| "loss": 0.7931, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.7184700332760449, | |
| "grad_norm": 0.04982830890931998, | |
| "learning_rate": 5.940226480125508e-06, | |
| "loss": 0.8265, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.719055106593045, | |
| "grad_norm": 0.0397312539469508, | |
| "learning_rate": 5.937235089830984e-06, | |
| "loss": 0.769, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.719640179910045, | |
| "grad_norm": 0.03837231212563197, | |
| "learning_rate": 5.934242400875195e-06, | |
| "loss": 0.8259, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.720225253227045, | |
| "grad_norm": 0.03612472247568365, | |
| "learning_rate": 5.931248415786371e-06, | |
| "loss": 0.7637, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.7208103265440451, | |
| "grad_norm": 0.03537723098243959, | |
| "learning_rate": 5.928253137093844e-06, | |
| "loss": 0.738, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.7213953998610451, | |
| "grad_norm": 0.0345658269866166, | |
| "learning_rate": 5.925256567328036e-06, | |
| "loss": 0.7422, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.7219804731780451, | |
| "grad_norm": 0.041550218114552985, | |
| "learning_rate": 5.922258709020456e-06, | |
| "loss": 0.8158, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.7225655464950451, | |
| "grad_norm": 0.04738915180347003, | |
| "learning_rate": 5.919259564703705e-06, | |
| "loss": 0.7693, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.7231506198120452, | |
| "grad_norm": 0.22798373109574022, | |
| "learning_rate": 5.916259136911472e-06, | |
| "loss": 0.7964, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.7237356931290453, | |
| "grad_norm": 0.03960961306940275, | |
| "learning_rate": 5.913257428178526e-06, | |
| "loss": 0.8529, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.7243207664460453, | |
| "grad_norm": 0.03762982041843176, | |
| "learning_rate": 5.910254441040723e-06, | |
| "loss": 0.7978, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.7249058397630453, | |
| "grad_norm": 0.03967585434913458, | |
| "learning_rate": 5.907250178034994e-06, | |
| "loss": 0.7715, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.7254909130800453, | |
| "grad_norm": 0.035499686476307656, | |
| "learning_rate": 5.904244641699352e-06, | |
| "loss": 0.7821, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.7260759863970454, | |
| "grad_norm": 0.04146813756606358, | |
| "learning_rate": 5.9012378345728824e-06, | |
| "loss": 0.7832, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.7266610597140454, | |
| "grad_norm": 0.035835229174829814, | |
| "learning_rate": 5.8982297591957465e-06, | |
| "loss": 0.7951, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.7272461330310455, | |
| "grad_norm": 0.040752453651612, | |
| "learning_rate": 5.8952204181091775e-06, | |
| "loss": 0.7898, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.7278312063480454, | |
| "grad_norm": 0.04254122433979048, | |
| "learning_rate": 5.8922098138554745e-06, | |
| "loss": 0.8384, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.7284162796650455, | |
| "grad_norm": 0.048324438293500575, | |
| "learning_rate": 5.889197948978008e-06, | |
| "loss": 0.7273, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.7290013529820456, | |
| "grad_norm": 0.04876106635437417, | |
| "learning_rate": 5.886184826021208e-06, | |
| "loss": 0.7078, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.7295864262990456, | |
| "grad_norm": 0.047909628199116906, | |
| "learning_rate": 5.883170447530575e-06, | |
| "loss": 0.7293, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.7301714996160457, | |
| "grad_norm": 0.04759781973779755, | |
| "learning_rate": 5.880154816052666e-06, | |
| "loss": 0.8114, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.7307565729330456, | |
| "grad_norm": 0.03579815989872979, | |
| "learning_rate": 5.8771379341350905e-06, | |
| "loss": 0.7843, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.7313416462500457, | |
| "grad_norm": 0.048181696474966534, | |
| "learning_rate": 5.874119804326525e-06, | |
| "loss": 0.85, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7319267195670457, | |
| "grad_norm": 0.040629567336692485, | |
| "learning_rate": 5.871100429176694e-06, | |
| "loss": 0.8121, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.7325117928840458, | |
| "grad_norm": 0.039483264479736525, | |
| "learning_rate": 5.8680798112363784e-06, | |
| "loss": 0.8365, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.7330968662010459, | |
| "grad_norm": 0.05089919878146447, | |
| "learning_rate": 5.865057953057401e-06, | |
| "loss": 0.8076, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.7336819395180458, | |
| "grad_norm": 0.035397442355980586, | |
| "learning_rate": 5.862034857192642e-06, | |
| "loss": 0.7798, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.7342670128350459, | |
| "grad_norm": 0.03687328400855783, | |
| "learning_rate": 5.859010526196021e-06, | |
| "loss": 0.7859, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.7348520861520459, | |
| "grad_norm": 0.06502490332457671, | |
| "learning_rate": 5.855984962622504e-06, | |
| "loss": 0.7927, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.735437159469046, | |
| "grad_norm": 0.04157588146277028, | |
| "learning_rate": 5.852958169028094e-06, | |
| "loss": 0.7907, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.736022232786046, | |
| "grad_norm": 0.04254658384010035, | |
| "learning_rate": 5.849930147969839e-06, | |
| "loss": 0.7903, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.736607306103046, | |
| "grad_norm": 0.04207256800472794, | |
| "learning_rate": 5.846900902005822e-06, | |
| "loss": 0.7673, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.7371923794200461, | |
| "grad_norm": 0.03928632402273387, | |
| "learning_rate": 5.843870433695156e-06, | |
| "loss": 0.7548, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.7377774527370461, | |
| "grad_norm": 0.039954700127276935, | |
| "learning_rate": 5.8408387455979946e-06, | |
| "loss": 0.7986, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.7383625260540462, | |
| "grad_norm": 0.03783285103775797, | |
| "learning_rate": 5.837805840275515e-06, | |
| "loss": 0.8107, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.7389475993710461, | |
| "grad_norm": 0.040841957568584206, | |
| "learning_rate": 5.834771720289929e-06, | |
| "loss": 0.8005, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.7395326726880462, | |
| "grad_norm": 0.044981013127008924, | |
| "learning_rate": 5.831736388204467e-06, | |
| "loss": 0.7682, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.7401177460050462, | |
| "grad_norm": 0.038332859477558344, | |
| "learning_rate": 5.828699846583389e-06, | |
| "loss": 0.7548, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.7407028193220463, | |
| "grad_norm": 0.04385966631432292, | |
| "learning_rate": 5.825662097991978e-06, | |
| "loss": 0.8131, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.7412878926390464, | |
| "grad_norm": 0.038909792748055394, | |
| "learning_rate": 5.82262314499653e-06, | |
| "loss": 0.8741, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.7418729659560463, | |
| "grad_norm": 0.05612505051259805, | |
| "learning_rate": 5.8195829901643655e-06, | |
| "loss": 0.8022, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.7424580392730464, | |
| "grad_norm": 0.041230289913411614, | |
| "learning_rate": 5.816541636063816e-06, | |
| "loss": 0.7588, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.7430431125900464, | |
| "grad_norm": 0.053472936644260796, | |
| "learning_rate": 5.813499085264229e-06, | |
| "loss": 0.733, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.7436281859070465, | |
| "grad_norm": 0.05949960419609776, | |
| "learning_rate": 5.8104553403359586e-06, | |
| "loss": 0.8247, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.7442132592240465, | |
| "grad_norm": 0.04317467554051099, | |
| "learning_rate": 5.807410403850371e-06, | |
| "loss": 0.8015, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.7447983325410465, | |
| "grad_norm": 0.040008771003069384, | |
| "learning_rate": 5.804364278379842e-06, | |
| "loss": 0.7975, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.7453834058580466, | |
| "grad_norm": 0.039926332581406196, | |
| "learning_rate": 5.801316966497744e-06, | |
| "loss": 0.7404, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.7459684791750466, | |
| "grad_norm": 0.03922271741776474, | |
| "learning_rate": 5.798268470778461e-06, | |
| "loss": 0.8157, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.7465535524920467, | |
| "grad_norm": 0.03804854554685112, | |
| "learning_rate": 5.795218793797367e-06, | |
| "loss": 0.6987, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.7471386258090467, | |
| "grad_norm": 0.03876336199264868, | |
| "learning_rate": 5.792167938130842e-06, | |
| "loss": 0.8456, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.7477236991260467, | |
| "grad_norm": 0.03771896812051095, | |
| "learning_rate": 5.78911590635626e-06, | |
| "loss": 0.726, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.7483087724430467, | |
| "grad_norm": 0.036735705010982965, | |
| "learning_rate": 5.786062701051983e-06, | |
| "loss": 0.9274, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.7488938457600468, | |
| "grad_norm": 0.03943361612412777, | |
| "learning_rate": 5.783008324797375e-06, | |
| "loss": 0.7146, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.7494789190770469, | |
| "grad_norm": 0.03647462654976768, | |
| "learning_rate": 5.779952780172777e-06, | |
| "loss": 0.7589, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.7500639923940469, | |
| "grad_norm": 0.03462458239120716, | |
| "learning_rate": 5.776896069759528e-06, | |
| "loss": 0.7397, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.750649065711047, | |
| "grad_norm": 0.03495722370436699, | |
| "learning_rate": 5.773838196139946e-06, | |
| "loss": 0.6993, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.7512341390280469, | |
| "grad_norm": 0.05321435758446983, | |
| "learning_rate": 5.770779161897329e-06, | |
| "loss": 0.8397, | |
| "step": 1284 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 3418, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 428, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5326977607139328.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |