| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.987241531016278, | |
| "eval_steps": 500, | |
| "global_step": 567, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005279366476022877, | |
| "grad_norm": 9.06258192697587, | |
| "learning_rate": 0.0, | |
| "loss": 1.678, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010558732952045754, | |
| "grad_norm": 9.487470045686269, | |
| "learning_rate": 1.7543859649122808e-07, | |
| "loss": 1.8487, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.01583809942806863, | |
| "grad_norm": 8.792837550815417, | |
| "learning_rate": 3.5087719298245616e-07, | |
| "loss": 1.6855, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.02111746590409151, | |
| "grad_norm": 9.323941973098307, | |
| "learning_rate": 5.263157894736843e-07, | |
| "loss": 1.79, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.026396832380114386, | |
| "grad_norm": 8.5661762778806, | |
| "learning_rate": 7.017543859649123e-07, | |
| "loss": 1.6796, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.03167619885613726, | |
| "grad_norm": 8.780182073885886, | |
| "learning_rate": 8.771929824561404e-07, | |
| "loss": 1.8326, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.03695556533216014, | |
| "grad_norm": 8.658784025795836, | |
| "learning_rate": 1.0526315789473685e-06, | |
| "loss": 1.7229, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.04223493180818302, | |
| "grad_norm": 8.678284010844528, | |
| "learning_rate": 1.2280701754385965e-06, | |
| "loss": 1.782, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0475142982842059, | |
| "grad_norm": 7.4972200445720425, | |
| "learning_rate": 1.4035087719298246e-06, | |
| "loss": 1.6596, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05279366476022877, | |
| "grad_norm": 8.204566912821951, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "loss": 1.6707, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05807303123625165, | |
| "grad_norm": 7.461093116720939, | |
| "learning_rate": 1.7543859649122807e-06, | |
| "loss": 1.718, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.06335239771227452, | |
| "grad_norm": 6.113172095963636, | |
| "learning_rate": 1.929824561403509e-06, | |
| "loss": 1.5939, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.0686317641882974, | |
| "grad_norm": 5.4039227506301835, | |
| "learning_rate": 2.105263157894737e-06, | |
| "loss": 1.497, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07391113066432028, | |
| "grad_norm": 5.072244053671971, | |
| "learning_rate": 2.280701754385965e-06, | |
| "loss": 1.4584, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.07919049714034315, | |
| "grad_norm": 4.290896240507921, | |
| "learning_rate": 2.456140350877193e-06, | |
| "loss": 1.4996, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08446986361636603, | |
| "grad_norm": 3.563114688671191, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 1.4042, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08974923009238892, | |
| "grad_norm": 3.598254571007457, | |
| "learning_rate": 2.8070175438596493e-06, | |
| "loss": 1.4125, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0950285965684118, | |
| "grad_norm": 3.6215074072359865, | |
| "learning_rate": 2.9824561403508774e-06, | |
| "loss": 1.4723, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.10030796304443466, | |
| "grad_norm": 2.915485138222026, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 1.2987, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10558732952045755, | |
| "grad_norm": 2.4634215162994595, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 1.2651, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.11086669599648043, | |
| "grad_norm": 2.3107374113003103, | |
| "learning_rate": 3.5087719298245615e-06, | |
| "loss": 1.2512, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.1161460624725033, | |
| "grad_norm": 2.7621133107185707, | |
| "learning_rate": 3.6842105263157896e-06, | |
| "loss": 1.3191, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12142542894852618, | |
| "grad_norm": 2.877480148788483, | |
| "learning_rate": 3.859649122807018e-06, | |
| "loss": 1.2354, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.12670479542454904, | |
| "grad_norm": 2.329645780694447, | |
| "learning_rate": 4.035087719298246e-06, | |
| "loss": 1.174, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13198416190057194, | |
| "grad_norm": 2.692865294147214, | |
| "learning_rate": 4.210526315789474e-06, | |
| "loss": 1.2824, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.1372635283765948, | |
| "grad_norm": 2.2247599668267615, | |
| "learning_rate": 4.385964912280702e-06, | |
| "loss": 1.2023, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14254289485261767, | |
| "grad_norm": 2.038589631126849, | |
| "learning_rate": 4.56140350877193e-06, | |
| "loss": 1.1412, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.14782226132864057, | |
| "grad_norm": 2.0910541565756002, | |
| "learning_rate": 4.736842105263158e-06, | |
| "loss": 1.1989, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15310162780466344, | |
| "grad_norm": 1.7517601173871857, | |
| "learning_rate": 4.912280701754386e-06, | |
| "loss": 1.0738, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.1583809942806863, | |
| "grad_norm": 1.6413195717279263, | |
| "learning_rate": 5.087719298245615e-06, | |
| "loss": 1.0853, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1636603607567092, | |
| "grad_norm": 1.688876231888243, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 1.061, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.16893972723273207, | |
| "grad_norm": 1.5670006557231517, | |
| "learning_rate": 5.438596491228071e-06, | |
| "loss": 1.1496, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.17421909370875496, | |
| "grad_norm": 1.7072674079415744, | |
| "learning_rate": 5.6140350877192985e-06, | |
| "loss": 1.1424, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.17949846018477783, | |
| "grad_norm": 1.6307968487984736, | |
| "learning_rate": 5.789473684210527e-06, | |
| "loss": 1.0655, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.1847778266608007, | |
| "grad_norm": 1.6358139036334913, | |
| "learning_rate": 5.964912280701755e-06, | |
| "loss": 1.0241, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.1900571931368236, | |
| "grad_norm": 1.4688899759958145, | |
| "learning_rate": 6.140350877192983e-06, | |
| "loss": 1.0076, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.19533655961284646, | |
| "grad_norm": 1.5254490097810096, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 1.0991, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.20061592608886933, | |
| "grad_norm": 1.5005708253602146, | |
| "learning_rate": 6.491228070175439e-06, | |
| "loss": 1.0824, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.20589529256489222, | |
| "grad_norm": 1.420109858670133, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.994, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.2111746590409151, | |
| "grad_norm": 1.4491053784050478, | |
| "learning_rate": 6.842105263157896e-06, | |
| "loss": 1.0583, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.21645402551693796, | |
| "grad_norm": 1.448588096978498, | |
| "learning_rate": 7.017543859649123e-06, | |
| "loss": 0.9892, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.22173339199296085, | |
| "grad_norm": 1.4526199178661001, | |
| "learning_rate": 7.192982456140352e-06, | |
| "loss": 1.0051, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.22701275846898372, | |
| "grad_norm": 1.4693112027438093, | |
| "learning_rate": 7.368421052631579e-06, | |
| "loss": 0.9833, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.2322921249450066, | |
| "grad_norm": 1.3739429688061344, | |
| "learning_rate": 7.5438596491228074e-06, | |
| "loss": 0.9793, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.23757149142102948, | |
| "grad_norm": 1.4247570212621312, | |
| "learning_rate": 7.719298245614036e-06, | |
| "loss": 0.9366, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24285085789705235, | |
| "grad_norm": 1.2987600676752833, | |
| "learning_rate": 7.894736842105265e-06, | |
| "loss": 0.9006, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.24813022437307522, | |
| "grad_norm": 1.5006141932812773, | |
| "learning_rate": 8.070175438596492e-06, | |
| "loss": 0.9785, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2534095908490981, | |
| "grad_norm": 1.468806773441352, | |
| "learning_rate": 8.24561403508772e-06, | |
| "loss": 0.9941, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.258688957325121, | |
| "grad_norm": 1.3084024864816892, | |
| "learning_rate": 8.421052631578948e-06, | |
| "loss": 0.938, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2639683238011439, | |
| "grad_norm": 1.4542464523472705, | |
| "learning_rate": 8.596491228070176e-06, | |
| "loss": 1.009, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2692476902771667, | |
| "grad_norm": 1.3097725384782457, | |
| "learning_rate": 8.771929824561405e-06, | |
| "loss": 0.9557, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.2745270567531896, | |
| "grad_norm": 1.3424191425040415, | |
| "learning_rate": 8.947368421052632e-06, | |
| "loss": 0.9773, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2798064232292125, | |
| "grad_norm": 1.3120126450980685, | |
| "learning_rate": 9.12280701754386e-06, | |
| "loss": 0.979, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.28508578970523535, | |
| "grad_norm": 1.4326002194117426, | |
| "learning_rate": 9.298245614035088e-06, | |
| "loss": 0.9909, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.29036515618125824, | |
| "grad_norm": 1.3411895985410853, | |
| "learning_rate": 9.473684210526315e-06, | |
| "loss": 0.9343, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.29564452265728114, | |
| "grad_norm": 1.4092777694950307, | |
| "learning_rate": 9.649122807017545e-06, | |
| "loss": 0.9711, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.300923889133304, | |
| "grad_norm": 1.3183008246582104, | |
| "learning_rate": 9.824561403508772e-06, | |
| "loss": 1.0189, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.3062032556093269, | |
| "grad_norm": 1.4351454680178788, | |
| "learning_rate": 1e-05, | |
| "loss": 0.9581, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.31148262208534977, | |
| "grad_norm": 1.4327154249987661, | |
| "learning_rate": 9.999905136743635e-06, | |
| "loss": 1.0108, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.3167619885613726, | |
| "grad_norm": 1.2783140310829089, | |
| "learning_rate": 9.999620550574155e-06, | |
| "loss": 0.8755, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.3220413550373955, | |
| "grad_norm": 1.3286747156393754, | |
| "learning_rate": 9.999146252290264e-06, | |
| "loss": 0.9038, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3273207215134184, | |
| "grad_norm": 1.332121876828825, | |
| "learning_rate": 9.99848225988936e-06, | |
| "loss": 0.9326, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.33260008798944124, | |
| "grad_norm": 1.279293629368409, | |
| "learning_rate": 9.99762859856683e-06, | |
| "loss": 0.8677, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.33787945446546414, | |
| "grad_norm": 1.273872943436948, | |
| "learning_rate": 9.996585300715117e-06, | |
| "loss": 0.9299, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.34315882094148703, | |
| "grad_norm": 1.2562758551505886, | |
| "learning_rate": 9.995352405922467e-06, | |
| "loss": 0.9017, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.3484381874175099, | |
| "grad_norm": 1.4032802781936289, | |
| "learning_rate": 9.99392996097145e-06, | |
| "loss": 0.9452, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.35371755389353277, | |
| "grad_norm": 1.4699498993510196, | |
| "learning_rate": 9.992318019837171e-06, | |
| "loss": 1.0884, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.35899692036955566, | |
| "grad_norm": 1.2352131609899204, | |
| "learning_rate": 9.990516643685222e-06, | |
| "loss": 0.9171, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.36427628684557856, | |
| "grad_norm": 1.3192425912595864, | |
| "learning_rate": 9.988525900869366e-06, | |
| "loss": 0.9075, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.3695556533216014, | |
| "grad_norm": 1.2967022211363584, | |
| "learning_rate": 9.98634586692894e-06, | |
| "loss": 0.885, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3748350197976243, | |
| "grad_norm": 1.3926263384486055, | |
| "learning_rate": 9.983976624585996e-06, | |
| "loss": 0.9215, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.3801143862736472, | |
| "grad_norm": 1.2279876199917352, | |
| "learning_rate": 9.981418263742148e-06, | |
| "loss": 0.9121, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.38539375274967, | |
| "grad_norm": 1.3296649614901273, | |
| "learning_rate": 9.978670881475173e-06, | |
| "loss": 0.8719, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3906731192256929, | |
| "grad_norm": 1.4366980284008448, | |
| "learning_rate": 9.975734582035323e-06, | |
| "loss": 0.9158, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.3959524857017158, | |
| "grad_norm": 1.4161336324128757, | |
| "learning_rate": 9.972609476841368e-06, | |
| "loss": 0.9519, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.40123185217773866, | |
| "grad_norm": 1.2765268199027533, | |
| "learning_rate": 9.96929568447637e-06, | |
| "loss": 0.8892, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.40651121865376155, | |
| "grad_norm": 1.2903936251174384, | |
| "learning_rate": 9.965793330683182e-06, | |
| "loss": 0.8749, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.41179058512978445, | |
| "grad_norm": 1.328926357616079, | |
| "learning_rate": 9.96210254835968e-06, | |
| "loss": 0.8218, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.4170699516058073, | |
| "grad_norm": 1.192377478319799, | |
| "learning_rate": 9.958223477553715e-06, | |
| "loss": 0.8215, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.4223493180818302, | |
| "grad_norm": 1.2953681293753618, | |
| "learning_rate": 9.954156265457801e-06, | |
| "loss": 0.8103, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.4276286845578531, | |
| "grad_norm": 1.3072992620223463, | |
| "learning_rate": 9.949901066403536e-06, | |
| "loss": 0.8992, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.4329080510338759, | |
| "grad_norm": 1.2374067876062618, | |
| "learning_rate": 9.945458041855732e-06, | |
| "loss": 0.8831, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4381874175098988, | |
| "grad_norm": 1.3974745936753212, | |
| "learning_rate": 9.940827360406297e-06, | |
| "loss": 0.8843, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.4434667839859217, | |
| "grad_norm": 1.2232655465327642, | |
| "learning_rate": 9.936009197767847e-06, | |
| "loss": 0.8714, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.44874615046194455, | |
| "grad_norm": 1.3329454900801005, | |
| "learning_rate": 9.931003736767013e-06, | |
| "loss": 0.9267, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.45402551693796744, | |
| "grad_norm": 1.2551849569644793, | |
| "learning_rate": 9.925811167337533e-06, | |
| "loss": 0.8765, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.45930488341399034, | |
| "grad_norm": 1.324099190921721, | |
| "learning_rate": 9.920431686513023e-06, | |
| "loss": 0.8835, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.4645842498900132, | |
| "grad_norm": 1.2343457957658, | |
| "learning_rate": 9.91486549841951e-06, | |
| "loss": 0.8557, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4698636163660361, | |
| "grad_norm": 1.357007540882203, | |
| "learning_rate": 9.909112814267686e-06, | |
| "loss": 0.93, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.47514298284205897, | |
| "grad_norm": 1.2685007078756392, | |
| "learning_rate": 9.903173852344889e-06, | |
| "loss": 0.8493, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.4804223493180818, | |
| "grad_norm": 1.3198835038669643, | |
| "learning_rate": 9.89704883800683e-06, | |
| "loss": 0.9375, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4857017157941047, | |
| "grad_norm": 1.2921827299558808, | |
| "learning_rate": 9.890738003669029e-06, | |
| "loss": 0.8502, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.4909810822701276, | |
| "grad_norm": 1.3792411082761877, | |
| "learning_rate": 9.884241588798004e-06, | |
| "loss": 0.8722, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.49626044874615044, | |
| "grad_norm": 1.3853454707822626, | |
| "learning_rate": 9.877559839902185e-06, | |
| "loss": 0.9781, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.5015398152221734, | |
| "grad_norm": 1.4954228020473375, | |
| "learning_rate": 9.870693010522552e-06, | |
| "loss": 0.9494, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5068191816981962, | |
| "grad_norm": 1.4537142950730755, | |
| "learning_rate": 9.863641361223025e-06, | |
| "loss": 0.8948, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5120985481742191, | |
| "grad_norm": 1.3353911864277976, | |
| "learning_rate": 9.85640515958057e-06, | |
| "loss": 0.9305, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.517377914650242, | |
| "grad_norm": 1.3069189595420478, | |
| "learning_rate": 9.848984680175049e-06, | |
| "loss": 1.0241, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5226572811262649, | |
| "grad_norm": 1.4160480924828072, | |
| "learning_rate": 9.841380204578795e-06, | |
| "loss": 0.8869, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5279366476022878, | |
| "grad_norm": 1.6379023507410095, | |
| "learning_rate": 9.833592021345938e-06, | |
| "loss": 0.8854, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5332160140783107, | |
| "grad_norm": 1.4984036893204795, | |
| "learning_rate": 9.825620426001446e-06, | |
| "loss": 0.9243, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5384953805543334, | |
| "grad_norm": 1.3234999163374, | |
| "learning_rate": 9.817465721029916e-06, | |
| "loss": 0.8645, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5437747470303563, | |
| "grad_norm": 1.3367013313050653, | |
| "learning_rate": 9.809128215864096e-06, | |
| "loss": 0.9064, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5490541135063792, | |
| "grad_norm": 1.2888959675698541, | |
| "learning_rate": 9.800608226873143e-06, | |
| "loss": 0.8828, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5543334799824021, | |
| "grad_norm": 1.3417129361301388, | |
| "learning_rate": 9.791906077350613e-06, | |
| "loss": 0.8687, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.559612846458425, | |
| "grad_norm": 1.387011742779051, | |
| "learning_rate": 9.783022097502204e-06, | |
| "loss": 1.0081, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5648922129344479, | |
| "grad_norm": 1.2844930485660748, | |
| "learning_rate": 9.773956624433224e-06, | |
| "loss": 0.8988, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5701715794104707, | |
| "grad_norm": 1.362695293650949, | |
| "learning_rate": 9.764710002135784e-06, | |
| "loss": 0.8097, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5754509458864936, | |
| "grad_norm": 1.311448593659273, | |
| "learning_rate": 9.755282581475769e-06, | |
| "loss": 0.8707, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5807303123625165, | |
| "grad_norm": 1.4708655509314155, | |
| "learning_rate": 9.745674720179507e-06, | |
| "loss": 0.913, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5860096788385394, | |
| "grad_norm": 1.3142546545330323, | |
| "learning_rate": 9.735886782820202e-06, | |
| "loss": 0.8495, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5912890453145623, | |
| "grad_norm": 1.2448969618431651, | |
| "learning_rate": 9.7259191408041e-06, | |
| "loss": 0.7694, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5965684117905852, | |
| "grad_norm": 1.2817154139311548, | |
| "learning_rate": 9.715772172356388e-06, | |
| "loss": 0.8874, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.601847778266608, | |
| "grad_norm": 1.303527565876824, | |
| "learning_rate": 9.705446262506858e-06, | |
| "loss": 0.9887, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6071271447426309, | |
| "grad_norm": 1.3162139512016877, | |
| "learning_rate": 9.694941803075285e-06, | |
| "loss": 0.8731, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6124065112186537, | |
| "grad_norm": 1.3631926446929201, | |
| "learning_rate": 9.684259192656554e-06, | |
| "loss": 0.8773, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6176858776946766, | |
| "grad_norm": 1.354454946440946, | |
| "learning_rate": 9.673398836605554e-06, | |
| "loss": 0.9415, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6229652441706995, | |
| "grad_norm": 1.2806247492822374, | |
| "learning_rate": 9.66236114702178e-06, | |
| "loss": 0.8214, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6282446106467224, | |
| "grad_norm": 1.3067756329426423, | |
| "learning_rate": 9.651146542733702e-06, | |
| "loss": 0.9561, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.6335239771227452, | |
| "grad_norm": 1.3982461215765278, | |
| "learning_rate": 9.639755449282874e-06, | |
| "loss": 0.8812, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.6388033435987681, | |
| "grad_norm": 1.2734112863554599, | |
| "learning_rate": 9.628188298907782e-06, | |
| "loss": 0.803, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.644082710074791, | |
| "grad_norm": 1.4042153797470949, | |
| "learning_rate": 9.616445530527448e-06, | |
| "loss": 0.8159, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6493620765508139, | |
| "grad_norm": 1.230725944316371, | |
| "learning_rate": 9.60452758972477e-06, | |
| "loss": 0.8846, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6546414430268368, | |
| "grad_norm": 1.242349305278167, | |
| "learning_rate": 9.592434928729617e-06, | |
| "loss": 0.7621, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6599208095028597, | |
| "grad_norm": 1.4468948851039833, | |
| "learning_rate": 9.58016800640167e-06, | |
| "loss": 0.9327, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6652001759788825, | |
| "grad_norm": 1.3595406863398718, | |
| "learning_rate": 9.567727288213005e-06, | |
| "loss": 0.8629, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6704795424549054, | |
| "grad_norm": 1.3391481209651412, | |
| "learning_rate": 9.555113246230443e-06, | |
| "loss": 0.943, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6757589089309283, | |
| "grad_norm": 1.3816914835623122, | |
| "learning_rate": 9.542326359097619e-06, | |
| "loss": 0.8841, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6810382754069512, | |
| "grad_norm": 1.2580871478999183, | |
| "learning_rate": 9.529367112016836e-06, | |
| "loss": 0.8039, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6863176418829741, | |
| "grad_norm": 1.3901966106075496, | |
| "learning_rate": 9.516235996730645e-06, | |
| "loss": 0.8409, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.691597008358997, | |
| "grad_norm": 1.2698547636051096, | |
| "learning_rate": 9.502933511503187e-06, | |
| "loss": 0.8499, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6968763748350199, | |
| "grad_norm": 1.390987513649665, | |
| "learning_rate": 9.489460161101291e-06, | |
| "loss": 0.8549, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.7021557413110426, | |
| "grad_norm": 1.3670503603402828, | |
| "learning_rate": 9.475816456775313e-06, | |
| "loss": 0.8436, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7074351077870655, | |
| "grad_norm": 1.2624531025923733, | |
| "learning_rate": 9.46200291623974e-06, | |
| "loss": 0.853, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.7127144742630884, | |
| "grad_norm": 1.2250644357723437, | |
| "learning_rate": 9.44802006365355e-06, | |
| "loss": 0.8283, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.7179938407391113, | |
| "grad_norm": 1.2734440256518054, | |
| "learning_rate": 9.43386842960031e-06, | |
| "loss": 0.8474, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7232732072151342, | |
| "grad_norm": 1.3509174047115824, | |
| "learning_rate": 9.419548551068061e-06, | |
| "loss": 0.8657, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7285525736911571, | |
| "grad_norm": 1.370913524291918, | |
| "learning_rate": 9.405060971428924e-06, | |
| "loss": 0.8915, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7338319401671799, | |
| "grad_norm": 1.2433881056858749, | |
| "learning_rate": 9.39040624041849e-06, | |
| "loss": 1.024, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7391113066432028, | |
| "grad_norm": 1.1648887790445894, | |
| "learning_rate": 9.375584914114963e-06, | |
| "loss": 0.7808, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7443906731192257, | |
| "grad_norm": 1.3435997997706464, | |
| "learning_rate": 9.360597554918055e-06, | |
| "loss": 0.9497, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7496700395952486, | |
| "grad_norm": 1.401286458253954, | |
| "learning_rate": 9.345444731527642e-06, | |
| "loss": 0.927, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7549494060712715, | |
| "grad_norm": 1.2883022833187427, | |
| "learning_rate": 9.330127018922195e-06, | |
| "loss": 0.9708, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7602287725472944, | |
| "grad_norm": 1.2683832198503506, | |
| "learning_rate": 9.31464499833695e-06, | |
| "loss": 0.8359, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7655081390233172, | |
| "grad_norm": 1.2280565258082867, | |
| "learning_rate": 9.298999257241862e-06, | |
| "loss": 0.8085, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.77078750549934, | |
| "grad_norm": 1.3268019274693263, | |
| "learning_rate": 9.283190389319315e-06, | |
| "loss": 0.8619, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.776066871975363, | |
| "grad_norm": 1.3405013825655892, | |
| "learning_rate": 9.26721899444158e-06, | |
| "loss": 0.8757, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7813462384513858, | |
| "grad_norm": 1.3113438616437634, | |
| "learning_rate": 9.251085678648072e-06, | |
| "loss": 0.8586, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7866256049274087, | |
| "grad_norm": 1.2692947439879936, | |
| "learning_rate": 9.234791054122336e-06, | |
| "loss": 0.8315, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7919049714034316, | |
| "grad_norm": 1.288784803692807, | |
| "learning_rate": 9.218335739168833e-06, | |
| "loss": 0.8138, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.7971843378794544, | |
| "grad_norm": 1.2853502663214944, | |
| "learning_rate": 9.201720358189464e-06, | |
| "loss": 0.8953, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.8024637043554773, | |
| "grad_norm": 1.3844299853873043, | |
| "learning_rate": 9.18494554165989e-06, | |
| "loss": 0.8527, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.8077430708315002, | |
| "grad_norm": 1.2348194951175424, | |
| "learning_rate": 9.168011926105598e-06, | |
| "loss": 0.7873, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8130224373075231, | |
| "grad_norm": 1.4444636219923, | |
| "learning_rate": 9.150920154077753e-06, | |
| "loss": 0.9273, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.818301803783546, | |
| "grad_norm": 1.3415281982191147, | |
| "learning_rate": 9.133670874128818e-06, | |
| "loss": 0.8165, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8235811702595689, | |
| "grad_norm": 1.4171257480887072, | |
| "learning_rate": 9.116264740787937e-06, | |
| "loss": 0.903, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8288605367355917, | |
| "grad_norm": 1.4009278155261338, | |
| "learning_rate": 9.098702414536107e-06, | |
| "loss": 0.8654, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8341399032116146, | |
| "grad_norm": 1.3353961972609718, | |
| "learning_rate": 9.08098456178111e-06, | |
| "loss": 0.8821, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8394192696876375, | |
| "grad_norm": 1.465378569130035, | |
| "learning_rate": 9.06311185483223e-06, | |
| "loss": 0.8585, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8446986361636604, | |
| "grad_norm": 1.3247392983018136, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 0.8409, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8499780026396833, | |
| "grad_norm": 1.3094980178064088, | |
| "learning_rate": 9.026904596944163e-06, | |
| "loss": 0.8423, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8552573691157062, | |
| "grad_norm": 1.3167256777304588, | |
| "learning_rate": 9.008571419900334e-06, | |
| "loss": 0.8123, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8605367355917289, | |
| "grad_norm": 1.3199382134511854, | |
| "learning_rate": 8.990086136401199e-06, | |
| "loss": 0.8171, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8658161020677518, | |
| "grad_norm": 1.2759439255391014, | |
| "learning_rate": 8.97144944787643e-06, | |
| "loss": 0.7351, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8710954685437747, | |
| "grad_norm": 1.3371401649586945, | |
| "learning_rate": 8.952662061500817e-06, | |
| "loss": 0.9156, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.8763748350197976, | |
| "grad_norm": 1.2566756220149857, | |
| "learning_rate": 8.933724690167417e-06, | |
| "loss": 0.9278, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.8816542014958205, | |
| "grad_norm": 1.2918012041625928, | |
| "learning_rate": 8.914638052460515e-06, | |
| "loss": 0.796, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8869335679718434, | |
| "grad_norm": 1.3420464341954295, | |
| "learning_rate": 8.895402872628352e-06, | |
| "loss": 0.8289, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8922129344478662, | |
| "grad_norm": 1.386536525968897, | |
| "learning_rate": 8.87601988055565e-06, | |
| "loss": 0.9055, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8974923009238891, | |
| "grad_norm": 1.2764097483742913, | |
| "learning_rate": 8.856489811735904e-06, | |
| "loss": 0.8221, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.902771667399912, | |
| "grad_norm": 1.3242925889713713, | |
| "learning_rate": 8.836813407243485e-06, | |
| "loss": 0.8052, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.9080510338759349, | |
| "grad_norm": 1.135434673976009, | |
| "learning_rate": 8.816991413705515e-06, | |
| "loss": 0.8048, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9133304003519578, | |
| "grad_norm": 1.3633873651508777, | |
| "learning_rate": 8.797024583273536e-06, | |
| "loss": 0.8403, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9186097668279807, | |
| "grad_norm": 1.4254861868193114, | |
| "learning_rate": 8.776913673594968e-06, | |
| "loss": 0.8558, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9238891333040036, | |
| "grad_norm": 1.3944884567839855, | |
| "learning_rate": 8.756659447784367e-06, | |
| "loss": 0.8265, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9291684997800264, | |
| "grad_norm": 1.2473776372568752, | |
| "learning_rate": 8.736262674394455e-06, | |
| "loss": 0.8558, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9344478662560493, | |
| "grad_norm": 1.1199152507629353, | |
| "learning_rate": 8.715724127386971e-06, | |
| "loss": 0.7684, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9397272327320721, | |
| "grad_norm": 1.4769772323796146, | |
| "learning_rate": 8.695044586103297e-06, | |
| "loss": 0.8404, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.945006599208095, | |
| "grad_norm": 1.2812768021421608, | |
| "learning_rate": 8.674224835234879e-06, | |
| "loss": 0.855, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9502859656841179, | |
| "grad_norm": 1.4074704240057607, | |
| "learning_rate": 8.653265664793466e-06, | |
| "loss": 0.8966, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.9555653321601408, | |
| "grad_norm": 1.3552977566183917, | |
| "learning_rate": 8.632167870081122e-06, | |
| "loss": 0.8983, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9608446986361636, | |
| "grad_norm": 1.2662415913666043, | |
| "learning_rate": 8.610932251660046e-06, | |
| "loss": 0.7676, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9661240651121865, | |
| "grad_norm": 1.3122048439005143, | |
| "learning_rate": 8.58955961532221e-06, | |
| "loss": 0.8486, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9714034315882094, | |
| "grad_norm": 1.2880133358543706, | |
| "learning_rate": 8.568050772058763e-06, | |
| "loss": 1.0695, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9766827980642323, | |
| "grad_norm": 1.2876646312084281, | |
| "learning_rate": 8.546406538029268e-06, | |
| "loss": 0.8744, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9819621645402552, | |
| "grad_norm": 1.3907216667545839, | |
| "learning_rate": 8.524627734530738e-06, | |
| "loss": 0.8009, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9872415310162781, | |
| "grad_norm": 1.298714668518304, | |
| "learning_rate": 8.502715187966455e-06, | |
| "loss": 0.8211, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9925208974923009, | |
| "grad_norm": 1.4112133312678243, | |
| "learning_rate": 8.480669729814635e-06, | |
| "loss": 0.8909, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9978002639683238, | |
| "grad_norm": 1.2429484880228319, | |
| "learning_rate": 8.458492196596852e-06, | |
| "loss": 0.7842, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 1.2429484880228319, | |
| "learning_rate": 8.436183429846314e-06, | |
| "loss": 0.8917, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.005279366476023, | |
| "grad_norm": 2.3017818904176828, | |
| "learning_rate": 8.413744276075928e-06, | |
| "loss": 0.7453, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0105587329520458, | |
| "grad_norm": 1.223195818545867, | |
| "learning_rate": 8.39117558674617e-06, | |
| "loss": 0.6252, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0158380994280687, | |
| "grad_norm": 1.2015392058187855, | |
| "learning_rate": 8.368478218232787e-06, | |
| "loss": 0.6357, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.0211174659040916, | |
| "grad_norm": 1.2194373310662718, | |
| "learning_rate": 8.345653031794292e-06, | |
| "loss": 0.6568, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.0263968323801145, | |
| "grad_norm": 1.251074042866813, | |
| "learning_rate": 8.32270089353929e-06, | |
| "loss": 0.6674, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.0316761988561374, | |
| "grad_norm": 1.3553593206962593, | |
| "learning_rate": 8.299622674393615e-06, | |
| "loss": 0.7704, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.03695556533216, | |
| "grad_norm": 1.3610141965233205, | |
| "learning_rate": 8.27641925006727e-06, | |
| "loss": 0.6893, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.042234931808183, | |
| "grad_norm": 1.4430956370832788, | |
| "learning_rate": 8.25309150102121e-06, | |
| "loss": 0.6783, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0475142982842058, | |
| "grad_norm": 1.3237087640173875, | |
| "learning_rate": 8.229640312433938e-06, | |
| "loss": 0.6328, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0527936647602287, | |
| "grad_norm": 1.4354224675777918, | |
| "learning_rate": 8.206066574167893e-06, | |
| "loss": 0.7054, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0580730312362516, | |
| "grad_norm": 1.3820711783982724, | |
| "learning_rate": 8.182371180735708e-06, | |
| "loss": 0.6596, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0633523977122745, | |
| "grad_norm": 1.5892680249474918, | |
| "learning_rate": 8.158555031266255e-06, | |
| "loss": 0.7119, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.0686317641882974, | |
| "grad_norm": 1.3831340041775368, | |
| "learning_rate": 8.134619029470535e-06, | |
| "loss": 0.6956, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.0739111306643203, | |
| "grad_norm": 1.4618391544645484, | |
| "learning_rate": 8.110564083607371e-06, | |
| "loss": 0.6927, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.0791904971403432, | |
| "grad_norm": 1.399225289114619, | |
| "learning_rate": 8.086391106448965e-06, | |
| "loss": 0.6719, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.084469863616366, | |
| "grad_norm": 1.39102454168437, | |
| "learning_rate": 8.06210101524625e-06, | |
| "loss": 0.6677, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.089749230092389, | |
| "grad_norm": 1.4013045019864605, | |
| "learning_rate": 8.037694731694085e-06, | |
| "loss": 0.6807, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.095028596568412, | |
| "grad_norm": 1.3292251495775314, | |
| "learning_rate": 8.013173181896283e-06, | |
| "loss": 0.685, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.1003079630444346, | |
| "grad_norm": 1.468780970931853, | |
| "learning_rate": 7.988537296330468e-06, | |
| "loss": 0.6559, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.1055873295204575, | |
| "grad_norm": 1.2560486698645472, | |
| "learning_rate": 7.963788009812775e-06, | |
| "loss": 0.5966, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.1108666959964804, | |
| "grad_norm": 1.3263436068368955, | |
| "learning_rate": 7.938926261462366e-06, | |
| "loss": 0.6426, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.1161460624725033, | |
| "grad_norm": 1.4262360139572436, | |
| "learning_rate": 7.913952994665805e-06, | |
| "loss": 0.7044, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.1214254289485261, | |
| "grad_norm": 1.413724127688339, | |
| "learning_rate": 7.888869157041257e-06, | |
| "loss": 0.6892, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.126704795424549, | |
| "grad_norm": 1.3691991114364659, | |
| "learning_rate": 7.863675700402527e-06, | |
| "loss": 0.6913, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.131984161900572, | |
| "grad_norm": 1.5050181189304115, | |
| "learning_rate": 7.838373580722952e-06, | |
| "loss": 0.7563, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.1372635283765948, | |
| "grad_norm": 1.280216474266895, | |
| "learning_rate": 7.812963758099118e-06, | |
| "loss": 0.6034, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.1425428948526177, | |
| "grad_norm": 1.3442258228040502, | |
| "learning_rate": 7.787447196714428e-06, | |
| "loss": 0.703, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.1478222613286406, | |
| "grad_norm": 1.374879774404637, | |
| "learning_rate": 7.76182486480253e-06, | |
| "loss": 0.6622, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1531016278046635, | |
| "grad_norm": 1.1079594025474235, | |
| "learning_rate": 7.736097734610557e-06, | |
| "loss": 0.6343, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1583809942806864, | |
| "grad_norm": 1.4099196984187832, | |
| "learning_rate": 7.710266782362248e-06, | |
| "loss": 0.7379, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.163660360756709, | |
| "grad_norm": 1.3722807994126047, | |
| "learning_rate": 7.684332988220901e-06, | |
| "loss": 0.7447, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.168939727232732, | |
| "grad_norm": 1.495776876658676, | |
| "learning_rate": 7.658297336252181e-06, | |
| "loss": 0.6477, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.1742190937087549, | |
| "grad_norm": 1.3604596279976626, | |
| "learning_rate": 7.63216081438678e-06, | |
| "loss": 0.7295, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.1794984601847778, | |
| "grad_norm": 1.3235758656247603, | |
| "learning_rate": 7.605924414382926e-06, | |
| "loss": 0.6585, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.1847778266608007, | |
| "grad_norm": 1.4440449502234758, | |
| "learning_rate": 7.579589131788756e-06, | |
| "loss": 0.6244, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.1900571931368236, | |
| "grad_norm": 1.3928149968149692, | |
| "learning_rate": 7.553155965904535e-06, | |
| "loss": 0.637, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.1953365596128465, | |
| "grad_norm": 1.4032725114348137, | |
| "learning_rate": 7.526625919744741e-06, | |
| "loss": 0.6644, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.2006159260888694, | |
| "grad_norm": 1.3266237278115651, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.6354, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.2058952925648923, | |
| "grad_norm": 1.5117455894482101, | |
| "learning_rate": 7.473279216998896e-06, | |
| "loss": 0.634, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.2111746590409151, | |
| "grad_norm": 1.5645913721329012, | |
| "learning_rate": 7.4464645846696186e-06, | |
| "loss": 0.8021, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.216454025516938, | |
| "grad_norm": 1.595585311092696, | |
| "learning_rate": 7.419557120501508e-06, | |
| "loss": 0.6831, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.221733391992961, | |
| "grad_norm": 1.3739554991120078, | |
| "learning_rate": 7.392557845506433e-06, | |
| "loss": 0.6571, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2270127584689838, | |
| "grad_norm": 1.383758484574002, | |
| "learning_rate": 7.365467784180051e-06, | |
| "loss": 0.6015, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.2322921249450065, | |
| "grad_norm": 1.2935886046335063, | |
| "learning_rate": 7.3382879644629345e-06, | |
| "loss": 0.684, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.2375714914210294, | |
| "grad_norm": 1.4930967440370626, | |
| "learning_rate": 7.311019417701567e-06, | |
| "loss": 0.618, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.2428508578970523, | |
| "grad_norm": 1.4340994519601895, | |
| "learning_rate": 7.283663178609204e-06, | |
| "loss": 0.6676, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.2481302243730752, | |
| "grad_norm": 1.332079262932709, | |
| "learning_rate": 7.256220285226615e-06, | |
| "loss": 0.6518, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.253409590849098, | |
| "grad_norm": 1.4124012184704442, | |
| "learning_rate": 7.2286917788826926e-06, | |
| "loss": 0.7255, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.258688957325121, | |
| "grad_norm": 1.435352374027868, | |
| "learning_rate": 7.201078704154938e-06, | |
| "loss": 0.6427, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.2639683238011439, | |
| "grad_norm": 1.3129409573708608, | |
| "learning_rate": 7.173382108829826e-06, | |
| "loss": 0.6435, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.2692476902771668, | |
| "grad_norm": 1.3029674291538322, | |
| "learning_rate": 7.145603043863045e-06, | |
| "loss": 0.6018, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.2745270567531897, | |
| "grad_norm": 1.4384137050989114, | |
| "learning_rate": 7.117742563339622e-06, | |
| "loss": 0.6399, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.2798064232292126, | |
| "grad_norm": 1.404893956226061, | |
| "learning_rate": 7.089801724433918e-06, | |
| "loss": 0.6591, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.2850857897052355, | |
| "grad_norm": 1.3631545091800101, | |
| "learning_rate": 7.061781587369518e-06, | |
| "loss": 0.661, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.2903651561812581, | |
| "grad_norm": 1.3722052435590018, | |
| "learning_rate": 7.033683215379002e-06, | |
| "loss": 0.7329, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.2956445226572813, | |
| "grad_norm": 1.3373371627523003, | |
| "learning_rate": 7.005507674663594e-06, | |
| "loss": 0.6853, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.300923889133304, | |
| "grad_norm": 1.347515123739391, | |
| "learning_rate": 6.977256034352713e-06, | |
| "loss": 0.6356, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.3062032556093268, | |
| "grad_norm": 1.3251248469180115, | |
| "learning_rate": 6.948929366463397e-06, | |
| "loss": 0.6542, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3114826220853497, | |
| "grad_norm": 1.413403738053324, | |
| "learning_rate": 6.9205287458596305e-06, | |
| "loss": 0.6732, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.3167619885613726, | |
| "grad_norm": 1.3195739239798052, | |
| "learning_rate": 6.892055250211552e-06, | |
| "loss": 0.6157, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3220413550373955, | |
| "grad_norm": 1.398192094348221, | |
| "learning_rate": 6.86350995995457e-06, | |
| "loss": 0.6903, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.3273207215134184, | |
| "grad_norm": 1.4424925485085278, | |
| "learning_rate": 6.834893958248361e-06, | |
| "loss": 0.6967, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.3326000879894413, | |
| "grad_norm": 1.3715769454036013, | |
| "learning_rate": 6.806208330935766e-06, | |
| "loss": 0.6402, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.3378794544654642, | |
| "grad_norm": 1.3832262511831421, | |
| "learning_rate": 6.77745416650159e-06, | |
| "loss": 0.6684, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.343158820941487, | |
| "grad_norm": 1.3503209557607232, | |
| "learning_rate": 6.748632556031306e-06, | |
| "loss": 0.7828, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.34843818741751, | |
| "grad_norm": 1.3619508308924722, | |
| "learning_rate": 6.719744593169642e-06, | |
| "loss": 0.6583, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.3537175538935329, | |
| "grad_norm": 1.543700428502048, | |
| "learning_rate": 6.690791374079086e-06, | |
| "loss": 0.6687, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.3589969203695555, | |
| "grad_norm": 1.3454959558325137, | |
| "learning_rate": 6.6617739973982985e-06, | |
| "loss": 0.6109, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.3642762868455787, | |
| "grad_norm": 1.3091432151076758, | |
| "learning_rate": 6.6326935642004165e-06, | |
| "loss": 0.6819, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.3695556533216013, | |
| "grad_norm": 1.4211398893275302, | |
| "learning_rate": 6.6035511779512764e-06, | |
| "loss": 0.6106, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3748350197976242, | |
| "grad_norm": 1.3056584370485818, | |
| "learning_rate": 6.57434794446754e-06, | |
| "loss": 0.6348, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.3801143862736471, | |
| "grad_norm": 1.432042689389407, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.6428, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.38539375274967, | |
| "grad_norm": 1.3426884980712488, | |
| "learning_rate": 6.515763370565218e-06, | |
| "loss": 0.6076, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.390673119225693, | |
| "grad_norm": 1.2615012752998496, | |
| "learning_rate": 6.486384253156014e-06, | |
| "loss": 0.7665, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.3959524857017158, | |
| "grad_norm": 1.3453647581013601, | |
| "learning_rate": 6.456948734446624e-06, | |
| "loss": 0.6377, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.4012318521777387, | |
| "grad_norm": 1.4161701822318469, | |
| "learning_rate": 6.427457931376712e-06, | |
| "loss": 0.6732, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.4065112186537616, | |
| "grad_norm": 1.32784380027798, | |
| "learning_rate": 6.39791296298372e-06, | |
| "loss": 0.628, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.4117905851297845, | |
| "grad_norm": 1.3941543313635256, | |
| "learning_rate": 6.368314950360416e-06, | |
| "loss": 0.6554, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4170699516058072, | |
| "grad_norm": 1.4362476191327336, | |
| "learning_rate": 6.3386650166123406e-06, | |
| "loss": 0.7686, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.4223493180818303, | |
| "grad_norm": 1.4890178566624934, | |
| "learning_rate": 6.308964286815203e-06, | |
| "loss": 0.6515, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.427628684557853, | |
| "grad_norm": 1.52171364834682, | |
| "learning_rate": 6.279213887972179e-06, | |
| "loss": 0.6851, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.4329080510338759, | |
| "grad_norm": 1.170459513215867, | |
| "learning_rate": 6.249414948971154e-06, | |
| "loss": 0.634, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.4381874175098988, | |
| "grad_norm": 1.2892794847690134, | |
| "learning_rate": 6.219568600541886e-06, | |
| "loss": 0.5732, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.4434667839859217, | |
| "grad_norm": 1.4147522223651536, | |
| "learning_rate": 6.189675975213094e-06, | |
| "loss": 0.6505, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.4487461504619445, | |
| "grad_norm": 1.2766098284530847, | |
| "learning_rate": 6.159738207269491e-06, | |
| "loss": 0.613, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.4540255169379674, | |
| "grad_norm": 1.3170993095662313, | |
| "learning_rate": 6.129756432708739e-06, | |
| "loss": 0.6058, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.4593048834139903, | |
| "grad_norm": 1.3490297596989358, | |
| "learning_rate": 6.099731789198344e-06, | |
| "loss": 0.7526, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.4645842498900132, | |
| "grad_norm": 1.49015141935795, | |
| "learning_rate": 6.0696654160324875e-06, | |
| "loss": 0.6664, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.4698636163660361, | |
| "grad_norm": 1.353677527773509, | |
| "learning_rate": 6.039558454088796e-06, | |
| "loss": 0.6508, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.475142982842059, | |
| "grad_norm": 1.3542791249145698, | |
| "learning_rate": 6.009412045785051e-06, | |
| "loss": 0.6868, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.480422349318082, | |
| "grad_norm": 1.239412403087578, | |
| "learning_rate": 5.9792273350358354e-06, | |
| "loss": 0.6542, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.4857017157941046, | |
| "grad_norm": 1.3657653754563595, | |
| "learning_rate": 5.9490054672091305e-06, | |
| "loss": 0.695, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.4909810822701277, | |
| "grad_norm": 1.4092314752807444, | |
| "learning_rate": 5.918747589082853e-06, | |
| "loss": 0.6472, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.4962604487461504, | |
| "grad_norm": 1.518575708392721, | |
| "learning_rate": 5.888454848801345e-06, | |
| "loss": 0.6623, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.5015398152221735, | |
| "grad_norm": 1.4295896368916283, | |
| "learning_rate": 5.8581283958317995e-06, | |
| "loss": 0.7579, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.5068191816981962, | |
| "grad_norm": 1.479897530210997, | |
| "learning_rate": 5.82776938092065e-06, | |
| "loss": 0.7334, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.512098548174219, | |
| "grad_norm": 1.4366013380091691, | |
| "learning_rate": 5.797378956049905e-06, | |
| "loss": 0.6739, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.517377914650242, | |
| "grad_norm": 1.4716566746195219, | |
| "learning_rate": 5.766958274393428e-06, | |
| "loss": 0.7233, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.5226572811262649, | |
| "grad_norm": 1.3374013752311613, | |
| "learning_rate": 5.736508490273189e-06, | |
| "loss": 0.6999, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.5279366476022878, | |
| "grad_norm": 1.404502862270622, | |
| "learning_rate": 5.706030759115458e-06, | |
| "loss": 0.6502, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.5332160140783107, | |
| "grad_norm": 1.3895925622506242, | |
| "learning_rate": 5.675526237406965e-06, | |
| "loss": 0.6693, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.5384953805543335, | |
| "grad_norm": 1.3933211625692163, | |
| "learning_rate": 5.644996082651018e-06, | |
| "loss": 0.6272, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.5437747470303562, | |
| "grad_norm": 1.2462836635087724, | |
| "learning_rate": 5.614441453323571e-06, | |
| "loss": 0.6725, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.5490541135063793, | |
| "grad_norm": 1.4996013016049565, | |
| "learning_rate": 5.583863508829281e-06, | |
| "loss": 0.6956, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.554333479982402, | |
| "grad_norm": 1.3766256340590475, | |
| "learning_rate": 5.553263409457504e-06, | |
| "loss": 0.659, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.5596128464584251, | |
| "grad_norm": 1.3350837109105465, | |
| "learning_rate": 5.522642316338268e-06, | |
| "loss": 0.6357, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.5648922129344478, | |
| "grad_norm": 1.3570996604619927, | |
| "learning_rate": 5.492001391398214e-06, | |
| "loss": 0.6544, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.5701715794104707, | |
| "grad_norm": 1.4608558691508997, | |
| "learning_rate": 5.46134179731651e-06, | |
| "loss": 0.6512, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.5754509458864936, | |
| "grad_norm": 1.2494448543139998, | |
| "learning_rate": 5.430664697480731e-06, | |
| "loss": 0.5658, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.5807303123625165, | |
| "grad_norm": 1.444693017380396, | |
| "learning_rate": 5.399971255942708e-06, | |
| "loss": 0.6901, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5860096788385394, | |
| "grad_norm": 1.4186391329903683, | |
| "learning_rate": 5.36926263737437e-06, | |
| "loss": 0.8807, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.5912890453145623, | |
| "grad_norm": 1.29633534515009, | |
| "learning_rate": 5.338540007023538e-06, | |
| "loss": 0.6461, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.5965684117905852, | |
| "grad_norm": 1.4448726879769416, | |
| "learning_rate": 5.3078045306697154e-06, | |
| "loss": 0.6523, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.6018477782666078, | |
| "grad_norm": 1.266507195220378, | |
| "learning_rate": 5.27705737457985e-06, | |
| "loss": 0.6408, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.607127144742631, | |
| "grad_norm": 1.3540583386343656, | |
| "learning_rate": 5.246299705464085e-06, | |
| "loss": 0.6488, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.6124065112186536, | |
| "grad_norm": 1.343878144578292, | |
| "learning_rate": 5.2155326904314795e-06, | |
| "loss": 0.6031, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.6176858776946768, | |
| "grad_norm": 1.390922633295502, | |
| "learning_rate": 5.184757496945726e-06, | |
| "loss": 0.6732, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.6229652441706994, | |
| "grad_norm": 1.303700297184845, | |
| "learning_rate": 5.153975292780852e-06, | |
| "loss": 0.644, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.6282446106467225, | |
| "grad_norm": 1.4719857684130002, | |
| "learning_rate": 5.123187245976912e-06, | |
| "loss": 0.6542, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.6335239771227452, | |
| "grad_norm": 1.5316116004451763, | |
| "learning_rate": 5.09239452479565e-06, | |
| "loss": 0.6741, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6388033435987681, | |
| "grad_norm": 1.5058092447545324, | |
| "learning_rate": 5.061598297676192e-06, | |
| "loss": 0.6624, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.644082710074791, | |
| "grad_norm": 1.2957852805869594, | |
| "learning_rate": 5.030799733190694e-06, | |
| "loss": 0.6866, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.649362076550814, | |
| "grad_norm": 1.3465817125883073, | |
| "learning_rate": 5e-06, | |
| "loss": 0.665, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.6546414430268368, | |
| "grad_norm": 1.2222436930506864, | |
| "learning_rate": 4.9692002668093075e-06, | |
| "loss": 0.5887, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.6599208095028597, | |
| "grad_norm": 1.441331154425715, | |
| "learning_rate": 4.9384017023238085e-06, | |
| "loss": 0.673, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.6652001759788826, | |
| "grad_norm": 1.3514338153223537, | |
| "learning_rate": 4.907605475204352e-06, | |
| "loss": 0.7095, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.6704795424549053, | |
| "grad_norm": 1.4614586482457859, | |
| "learning_rate": 4.876812754023092e-06, | |
| "loss": 0.7205, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.6757589089309284, | |
| "grad_norm": 1.3928056564895086, | |
| "learning_rate": 4.846024707219149e-06, | |
| "loss": 0.6358, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.681038275406951, | |
| "grad_norm": 1.3113749307682454, | |
| "learning_rate": 4.815242503054277e-06, | |
| "loss": 0.6465, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.6863176418829742, | |
| "grad_norm": 1.4316497180240197, | |
| "learning_rate": 4.784467309568524e-06, | |
| "loss": 0.6794, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6915970083589968, | |
| "grad_norm": 1.3763481895692722, | |
| "learning_rate": 4.753700294535916e-06, | |
| "loss": 0.7105, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.69687637483502, | |
| "grad_norm": 1.3560535615138942, | |
| "learning_rate": 4.7229426254201504e-06, | |
| "loss": 0.6566, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.7021557413110426, | |
| "grad_norm": 1.3113897207300194, | |
| "learning_rate": 4.692195469330286e-06, | |
| "loss": 0.753, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.7074351077870655, | |
| "grad_norm": 1.2314416333529012, | |
| "learning_rate": 4.661459992976463e-06, | |
| "loss": 0.6087, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.7127144742630884, | |
| "grad_norm": 1.357070932304121, | |
| "learning_rate": 4.630737362625631e-06, | |
| "loss": 0.678, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.7179938407391113, | |
| "grad_norm": 1.3907841932602958, | |
| "learning_rate": 4.6000287440572925e-06, | |
| "loss": 0.6819, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.7232732072151342, | |
| "grad_norm": 1.3821824046618116, | |
| "learning_rate": 4.569335302519271e-06, | |
| "loss": 0.6329, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.7285525736911571, | |
| "grad_norm": 1.4473432204015564, | |
| "learning_rate": 4.53865820268349e-06, | |
| "loss": 0.7144, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.73383194016718, | |
| "grad_norm": 1.4376742031177947, | |
| "learning_rate": 4.507998608601787e-06, | |
| "loss": 0.6086, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.7391113066432027, | |
| "grad_norm": 1.2849628847256984, | |
| "learning_rate": 4.477357683661734e-06, | |
| "loss": 0.6101, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.7443906731192258, | |
| "grad_norm": 1.3554057763386258, | |
| "learning_rate": 4.446736590542497e-06, | |
| "loss": 0.5833, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.7496700395952485, | |
| "grad_norm": 1.3213798453951964, | |
| "learning_rate": 4.41613649117072e-06, | |
| "loss": 0.6859, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.7549494060712716, | |
| "grad_norm": 1.319837554365992, | |
| "learning_rate": 4.3855585466764305e-06, | |
| "loss": 0.655, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.7602287725472943, | |
| "grad_norm": 1.3686144434660683, | |
| "learning_rate": 4.355003917348985e-06, | |
| "loss": 0.6474, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.7655081390233172, | |
| "grad_norm": 1.3793264604803168, | |
| "learning_rate": 4.324473762593037e-06, | |
| "loss": 0.5843, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.77078750549934, | |
| "grad_norm": 1.3441234479337094, | |
| "learning_rate": 4.293969240884545e-06, | |
| "loss": 0.5984, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.776066871975363, | |
| "grad_norm": 1.237308449464165, | |
| "learning_rate": 4.263491509726812e-06, | |
| "loss": 0.6477, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.7813462384513858, | |
| "grad_norm": 1.3355474025021052, | |
| "learning_rate": 4.233041725606573e-06, | |
| "loss": 0.636, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.7866256049274087, | |
| "grad_norm": 1.3458947073703338, | |
| "learning_rate": 4.202621043950096e-06, | |
| "loss": 0.6152, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.7919049714034316, | |
| "grad_norm": 1.3724772310082562, | |
| "learning_rate": 4.17223061907935e-06, | |
| "loss": 0.6669, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7971843378794543, | |
| "grad_norm": 1.3927314177261432, | |
| "learning_rate": 4.141871604168201e-06, | |
| "loss": 0.6871, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.8024637043554774, | |
| "grad_norm": 1.425898039985732, | |
| "learning_rate": 4.111545151198657e-06, | |
| "loss": 0.6479, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.8077430708315, | |
| "grad_norm": 1.4786764449830878, | |
| "learning_rate": 4.081252410917148e-06, | |
| "loss": 0.6758, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.8130224373075232, | |
| "grad_norm": 1.5596067837918601, | |
| "learning_rate": 4.050994532790871e-06, | |
| "loss": 0.6792, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.818301803783546, | |
| "grad_norm": 1.3295616520702254, | |
| "learning_rate": 4.020772664964166e-06, | |
| "loss": 0.6447, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.823581170259569, | |
| "grad_norm": 1.356711049558864, | |
| "learning_rate": 3.99058795421495e-06, | |
| "loss": 0.6988, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.8288605367355917, | |
| "grad_norm": 1.3459848599920097, | |
| "learning_rate": 3.960441545911205e-06, | |
| "loss": 0.6793, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.8341399032116146, | |
| "grad_norm": 1.2796136680768018, | |
| "learning_rate": 3.930334583967514e-06, | |
| "loss": 0.6404, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.8394192696876375, | |
| "grad_norm": 1.430373997763793, | |
| "learning_rate": 3.9002682108016585e-06, | |
| "loss": 0.7089, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.8446986361636604, | |
| "grad_norm": 1.443868769930965, | |
| "learning_rate": 3.870243567291263e-06, | |
| "loss": 0.6088, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8499780026396833, | |
| "grad_norm": 1.3354422816955691, | |
| "learning_rate": 3.840261792730511e-06, | |
| "loss": 0.6469, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.8552573691157062, | |
| "grad_norm": 1.5007921332211551, | |
| "learning_rate": 3.8103240247869077e-06, | |
| "loss": 0.6917, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.860536735591729, | |
| "grad_norm": 1.4369902254697013, | |
| "learning_rate": 3.7804313994581143e-06, | |
| "loss": 0.6745, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.8658161020677517, | |
| "grad_norm": 1.2954980945001948, | |
| "learning_rate": 3.7505850510288455e-06, | |
| "loss": 0.6402, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.8710954685437748, | |
| "grad_norm": 1.3479250337805435, | |
| "learning_rate": 3.720786112027822e-06, | |
| "loss": 0.6281, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.8763748350197975, | |
| "grad_norm": 1.3528354522984527, | |
| "learning_rate": 3.6910357131847986e-06, | |
| "loss": 0.6253, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.8816542014958206, | |
| "grad_norm": 1.2410984546080153, | |
| "learning_rate": 3.6613349833876607e-06, | |
| "loss": 0.5576, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.8869335679718433, | |
| "grad_norm": 1.1827340907861352, | |
| "learning_rate": 3.6316850496395863e-06, | |
| "loss": 0.5936, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.8922129344478662, | |
| "grad_norm": 1.2980573086194132, | |
| "learning_rate": 3.602087037016281e-06, | |
| "loss": 0.8214, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.897492300923889, | |
| "grad_norm": 1.4315757982637016, | |
| "learning_rate": 3.5725420686232903e-06, | |
| "loss": 0.6522, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.902771667399912, | |
| "grad_norm": 1.4091204255580805, | |
| "learning_rate": 3.5430512655533774e-06, | |
| "loss": 0.5795, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.9080510338759349, | |
| "grad_norm": 1.3444722372985694, | |
| "learning_rate": 3.513615746843987e-06, | |
| "loss": 0.7231, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.9133304003519578, | |
| "grad_norm": 1.5246355682127404, | |
| "learning_rate": 3.484236629434783e-06, | |
| "loss": 0.6603, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.9186097668279807, | |
| "grad_norm": 1.415464008217028, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.6775, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.9238891333040036, | |
| "grad_norm": 1.3192883237623132, | |
| "learning_rate": 3.4256520555324613e-06, | |
| "loss": 0.6316, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.9291684997800265, | |
| "grad_norm": 1.428352611949904, | |
| "learning_rate": 3.3964488220487252e-06, | |
| "loss": 0.6544, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.9344478662560491, | |
| "grad_norm": 1.5172404820075067, | |
| "learning_rate": 3.3673064357995844e-06, | |
| "loss": 0.5938, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.9397272327320723, | |
| "grad_norm": 1.3819196499385575, | |
| "learning_rate": 3.3382260026017027e-06, | |
| "loss": 0.658, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.945006599208095, | |
| "grad_norm": 1.2219098246955071, | |
| "learning_rate": 3.3092086259209144e-06, | |
| "loss": 0.6436, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.950285965684118, | |
| "grad_norm": 1.3112983916512726, | |
| "learning_rate": 3.2802554068303595e-06, | |
| "loss": 0.6277, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9555653321601407, | |
| "grad_norm": 1.3181733253990144, | |
| "learning_rate": 3.2513674439686945e-06, | |
| "loss": 0.6051, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.9608446986361636, | |
| "grad_norm": 1.3950428699097805, | |
| "learning_rate": 3.22254583349841e-06, | |
| "loss": 0.6047, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.9661240651121865, | |
| "grad_norm": 1.401575375536184, | |
| "learning_rate": 3.1937916690642356e-06, | |
| "loss": 0.7536, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.9714034315882094, | |
| "grad_norm": 1.4675958168712424, | |
| "learning_rate": 3.16510604175164e-06, | |
| "loss": 0.6029, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.9766827980642323, | |
| "grad_norm": 1.2821606191811943, | |
| "learning_rate": 3.13649004004543e-06, | |
| "loss": 0.5921, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.9819621645402552, | |
| "grad_norm": 1.352642460803231, | |
| "learning_rate": 3.107944749788449e-06, | |
| "loss": 0.6541, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.987241531016278, | |
| "grad_norm": 1.4500846871984014, | |
| "learning_rate": 3.0794712541403716e-06, | |
| "loss": 0.5958, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.9925208974923008, | |
| "grad_norm": 1.344043810326557, | |
| "learning_rate": 3.0510706335366034e-06, | |
| "loss": 0.678, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.9978002639683239, | |
| "grad_norm": 1.3311569208901686, | |
| "learning_rate": 3.0227439656472878e-06, | |
| "loss": 0.5696, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.3311569208901686, | |
| "learning_rate": 2.9944923253364066e-06, | |
| "loss": 0.6124, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.0052793664760227, | |
| "grad_norm": 2.2453037842894434, | |
| "learning_rate": 2.966316784621e-06, | |
| "loss": 0.5021, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 2.010558732952046, | |
| "grad_norm": 1.448400627279987, | |
| "learning_rate": 2.9382184126304834e-06, | |
| "loss": 0.4754, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.0158380994280685, | |
| "grad_norm": 1.3928060588224582, | |
| "learning_rate": 2.910198275566085e-06, | |
| "loss": 0.5552, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.0211174659040916, | |
| "grad_norm": 1.3053890423029602, | |
| "learning_rate": 2.8822574366603804e-06, | |
| "loss": 0.5096, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.0263968323801143, | |
| "grad_norm": 1.312006902737542, | |
| "learning_rate": 2.8543969561369556e-06, | |
| "loss": 0.4384, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.0316761988561374, | |
| "grad_norm": 1.3424294511811377, | |
| "learning_rate": 2.8266178911701757e-06, | |
| "loss": 0.4524, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.03695556533216, | |
| "grad_norm": 1.353115219108087, | |
| "learning_rate": 2.798921295845064e-06, | |
| "loss": 0.5847, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.042234931808183, | |
| "grad_norm": 1.359151667072332, | |
| "learning_rate": 2.771308221117309e-06, | |
| "loss": 0.5274, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.047514298284206, | |
| "grad_norm": 1.1804464420852299, | |
| "learning_rate": 2.743779714773386e-06, | |
| "loss": 0.4868, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.052793664760229, | |
| "grad_norm": 1.2257778269575734, | |
| "learning_rate": 2.7163368213907975e-06, | |
| "loss": 0.4974, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.0580730312362516, | |
| "grad_norm": 1.3592658717787198, | |
| "learning_rate": 2.6889805822984348e-06, | |
| "loss": 0.429, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.0633523977122747, | |
| "grad_norm": 1.4335615689021757, | |
| "learning_rate": 2.6617120355370667e-06, | |
| "loss": 0.4936, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.0686317641882974, | |
| "grad_norm": 1.4971943650916089, | |
| "learning_rate": 2.6345322158199503e-06, | |
| "loss": 0.4891, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.07391113066432, | |
| "grad_norm": 1.3630600653303417, | |
| "learning_rate": 2.607442154493568e-06, | |
| "loss": 0.4339, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.079190497140343, | |
| "grad_norm": 1.3452492502199729, | |
| "learning_rate": 2.5804428794984926e-06, | |
| "loss": 0.4788, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.084469863616366, | |
| "grad_norm": 1.3717266175336726, | |
| "learning_rate": 2.5535354153303827e-06, | |
| "loss": 0.4589, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.089749230092389, | |
| "grad_norm": 1.4172021390970797, | |
| "learning_rate": 2.526720783001107e-06, | |
| "loss": 0.474, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.0950285965684117, | |
| "grad_norm": 1.4012306931647633, | |
| "learning_rate": 2.5000000000000015e-06, | |
| "loss": 0.4703, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.100307963044435, | |
| "grad_norm": 1.3223744927738885, | |
| "learning_rate": 2.473374080255261e-06, | |
| "loss": 0.4927, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.1055873295204575, | |
| "grad_norm": 1.4340111903752237, | |
| "learning_rate": 2.4468440340954664e-06, | |
| "loss": 0.4911, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.1108666959964806, | |
| "grad_norm": 1.4908200109241228, | |
| "learning_rate": 2.4204108682112443e-06, | |
| "loss": 0.4923, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.1161460624725033, | |
| "grad_norm": 1.4498803710381125, | |
| "learning_rate": 2.3940755856170744e-06, | |
| "loss": 0.5264, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.1214254289485264, | |
| "grad_norm": 1.5579303480326079, | |
| "learning_rate": 2.3678391856132203e-06, | |
| "loss": 0.4671, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.126704795424549, | |
| "grad_norm": 1.3682695560666365, | |
| "learning_rate": 2.341702663747819e-06, | |
| "loss": 0.4791, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.131984161900572, | |
| "grad_norm": 1.3740119474315011, | |
| "learning_rate": 2.3156670117790996e-06, | |
| "loss": 0.5081, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.137263528376595, | |
| "grad_norm": 1.4378876285894175, | |
| "learning_rate": 2.289733217637753e-06, | |
| "loss": 0.6887, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.1425428948526175, | |
| "grad_norm": 1.3945669735187922, | |
| "learning_rate": 2.2639022653894443e-06, | |
| "loss": 0.4747, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.1478222613286406, | |
| "grad_norm": 1.5102801606024971, | |
| "learning_rate": 2.238175135197471e-06, | |
| "loss": 0.4772, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.1531016278046633, | |
| "grad_norm": 1.343039872751995, | |
| "learning_rate": 2.2125528032855727e-06, | |
| "loss": 0.4662, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.1583809942806864, | |
| "grad_norm": 1.7777099063240305, | |
| "learning_rate": 2.1870362419008844e-06, | |
| "loss": 0.4426, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.163660360756709, | |
| "grad_norm": 1.3840206283915173, | |
| "learning_rate": 2.1616264192770496e-06, | |
| "loss": 0.451, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.168939727232732, | |
| "grad_norm": 1.2930950541912372, | |
| "learning_rate": 2.136324299597474e-06, | |
| "loss": 0.523, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.174219093708755, | |
| "grad_norm": 1.4431983442764567, | |
| "learning_rate": 2.1111308429587446e-06, | |
| "loss": 0.4051, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.179498460184778, | |
| "grad_norm": 1.2550043379116107, | |
| "learning_rate": 2.0860470053341957e-06, | |
| "loss": 0.499, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.1847778266608007, | |
| "grad_norm": 1.4359151954027813, | |
| "learning_rate": 2.061073738537635e-06, | |
| "loss": 0.4591, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.190057193136824, | |
| "grad_norm": 1.3446535561143784, | |
| "learning_rate": 2.0362119901872262e-06, | |
| "loss": 0.4076, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.1953365596128465, | |
| "grad_norm": 1.2459969483698727, | |
| "learning_rate": 2.011462703669532e-06, | |
| "loss": 0.4957, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.200615926088869, | |
| "grad_norm": 1.2992424145598012, | |
| "learning_rate": 1.9868268181037186e-06, | |
| "loss": 0.463, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.2058952925648923, | |
| "grad_norm": 1.4602888158466671, | |
| "learning_rate": 1.9623052683059164e-06, | |
| "loss": 0.4719, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.211174659040915, | |
| "grad_norm": 1.3759597249907445, | |
| "learning_rate": 1.937898984753751e-06, | |
| "loss": 0.4951, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.216454025516938, | |
| "grad_norm": 1.412150518484592, | |
| "learning_rate": 1.913608893551036e-06, | |
| "loss": 0.52, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.2217333919929607, | |
| "grad_norm": 1.485490691650101, | |
| "learning_rate": 1.8894359163926312e-06, | |
| "loss": 0.444, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.227012758468984, | |
| "grad_norm": 1.3670795340613098, | |
| "learning_rate": 1.865380970529469e-06, | |
| "loss": 0.5399, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.2322921249450065, | |
| "grad_norm": 1.3525729496527066, | |
| "learning_rate": 1.8414449687337467e-06, | |
| "loss": 0.5159, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.2375714914210296, | |
| "grad_norm": 1.4059006780837846, | |
| "learning_rate": 1.8176288192642944e-06, | |
| "loss": 0.5099, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.2428508578970523, | |
| "grad_norm": 1.2761502912826002, | |
| "learning_rate": 1.7939334258321094e-06, | |
| "loss": 0.4717, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.2481302243730754, | |
| "grad_norm": 1.4040503918498035, | |
| "learning_rate": 1.7703596875660645e-06, | |
| "loss": 0.4469, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.253409590849098, | |
| "grad_norm": 1.2908543753758535, | |
| "learning_rate": 1.746908498978791e-06, | |
| "loss": 0.485, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.2586889573251208, | |
| "grad_norm": 1.3759893346792271, | |
| "learning_rate": 1.7235807499327335e-06, | |
| "loss": 0.5101, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.263968323801144, | |
| "grad_norm": 1.4728227490351313, | |
| "learning_rate": 1.7003773256063882e-06, | |
| "loss": 0.5347, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.2692476902771666, | |
| "grad_norm": 1.489969037850193, | |
| "learning_rate": 1.6772991064607113e-06, | |
| "loss": 0.4467, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.2745270567531897, | |
| "grad_norm": 1.417700490467943, | |
| "learning_rate": 1.6543469682057105e-06, | |
| "loss": 0.481, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.2798064232292123, | |
| "grad_norm": 1.386346425394359, | |
| "learning_rate": 1.6315217817672142e-06, | |
| "loss": 0.4621, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.2850857897052355, | |
| "grad_norm": 1.2589332584059243, | |
| "learning_rate": 1.60882441325383e-06, | |
| "loss": 0.5175, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.290365156181258, | |
| "grad_norm": 1.3880552404176263, | |
| "learning_rate": 1.5862557239240729e-06, | |
| "loss": 0.4387, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.2956445226572813, | |
| "grad_norm": 1.360835274455909, | |
| "learning_rate": 1.5638165701536866e-06, | |
| "loss": 0.5115, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.300923889133304, | |
| "grad_norm": 1.3843299657391916, | |
| "learning_rate": 1.54150780340315e-06, | |
| "loss": 0.4899, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.306203255609327, | |
| "grad_norm": 1.3835648119835473, | |
| "learning_rate": 1.5193302701853674e-06, | |
| "loss": 0.4664, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.3114826220853497, | |
| "grad_norm": 1.4640561341524838, | |
| "learning_rate": 1.4972848120335453e-06, | |
| "loss": 0.4609, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.316761988561373, | |
| "grad_norm": 1.421258969771172, | |
| "learning_rate": 1.475372265469265e-06, | |
| "loss": 0.4763, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.3220413550373955, | |
| "grad_norm": 1.2751966894937146, | |
| "learning_rate": 1.453593461970733e-06, | |
| "loss": 0.4701, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.327320721513418, | |
| "grad_norm": 1.340345062295216, | |
| "learning_rate": 1.4319492279412388e-06, | |
| "loss": 0.4832, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.3326000879894413, | |
| "grad_norm": 1.4159387316202012, | |
| "learning_rate": 1.410440384677791e-06, | |
| "loss": 0.42, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.337879454465464, | |
| "grad_norm": 1.2648065651654534, | |
| "learning_rate": 1.389067748339954e-06, | |
| "loss": 0.4404, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.343158820941487, | |
| "grad_norm": 1.3711182773880273, | |
| "learning_rate": 1.3678321299188802e-06, | |
| "loss": 0.4388, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.3484381874175098, | |
| "grad_norm": 1.3906995330557852, | |
| "learning_rate": 1.3467343352065349e-06, | |
| "loss": 0.5312, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.353717553893533, | |
| "grad_norm": 1.397658739943819, | |
| "learning_rate": 1.3257751647651223e-06, | |
| "loss": 0.4675, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.3589969203695555, | |
| "grad_norm": 1.4627906940091926, | |
| "learning_rate": 1.3049554138967052e-06, | |
| "loss": 0.4395, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.3642762868455787, | |
| "grad_norm": 1.381779631997442, | |
| "learning_rate": 1.2842758726130283e-06, | |
| "loss": 0.569, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.3695556533216013, | |
| "grad_norm": 1.4402592673817487, | |
| "learning_rate": 1.2637373256055445e-06, | |
| "loss": 0.4903, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.3748350197976245, | |
| "grad_norm": 1.3333412823689215, | |
| "learning_rate": 1.2433405522156334e-06, | |
| "loss": 0.4824, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.380114386273647, | |
| "grad_norm": 1.4106276931723192, | |
| "learning_rate": 1.2230863264050308e-06, | |
| "loss": 0.487, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.3853937527496702, | |
| "grad_norm": 1.3557673199870695, | |
| "learning_rate": 1.202975416726464e-06, | |
| "loss": 0.5265, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.390673119225693, | |
| "grad_norm": 1.4343260095491823, | |
| "learning_rate": 1.1830085862944851e-06, | |
| "loss": 0.449, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.3959524857017156, | |
| "grad_norm": 1.446767986226991, | |
| "learning_rate": 1.163186592756515e-06, | |
| "loss": 0.4699, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.4012318521777387, | |
| "grad_norm": 1.424117946851896, | |
| "learning_rate": 1.1435101882640964e-06, | |
| "loss": 0.4514, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.4065112186537614, | |
| "grad_norm": 1.5365862656275142, | |
| "learning_rate": 1.1239801194443507e-06, | |
| "loss": 0.4373, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.4117905851297845, | |
| "grad_norm": 1.2648145260275343, | |
| "learning_rate": 1.1045971273716476e-06, | |
| "loss": 0.4329, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.417069951605807, | |
| "grad_norm": 1.3311965815866447, | |
| "learning_rate": 1.085361947539486e-06, | |
| "loss": 0.4769, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.4223493180818303, | |
| "grad_norm": 1.3975266216041633, | |
| "learning_rate": 1.066275309832584e-06, | |
| "loss": 0.472, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.427628684557853, | |
| "grad_norm": 1.3252996893653324, | |
| "learning_rate": 1.0473379384991833e-06, | |
| "loss": 0.4243, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.432908051033876, | |
| "grad_norm": 1.4139994351082152, | |
| "learning_rate": 1.02855055212357e-06, | |
| "loss": 0.4785, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.4381874175098988, | |
| "grad_norm": 1.3372117034643396, | |
| "learning_rate": 1.0099138635988026e-06, | |
| "loss": 0.4215, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.443466783985922, | |
| "grad_norm": 1.3606372944047547, | |
| "learning_rate": 9.91428580099667e-07, | |
| "loss": 0.4413, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.4487461504619445, | |
| "grad_norm": 1.318507119449003, | |
| "learning_rate": 9.73095403055837e-07, | |
| "loss": 0.415, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.4540255169379677, | |
| "grad_norm": 1.2123750888837692, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 0.4886, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.4593048834139903, | |
| "grad_norm": 1.3528666735660853, | |
| "learning_rate": 9.368881451677725e-07, | |
| "loss": 0.4838, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.464584249890013, | |
| "grad_norm": 1.3418881482008247, | |
| "learning_rate": 9.190154382188921e-07, | |
| "loss": 0.4466, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.469863616366036, | |
| "grad_norm": 1.3296751447001665, | |
| "learning_rate": 9.01297585463895e-07, | |
| "loss": 0.4595, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.475142982842059, | |
| "grad_norm": 1.4089808402301305, | |
| "learning_rate": 8.837352592120646e-07, | |
| "loss": 0.4365, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.480422349318082, | |
| "grad_norm": 1.402157984134382, | |
| "learning_rate": 8.663291258711831e-07, | |
| "loss": 0.4776, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.4857017157941046, | |
| "grad_norm": 1.334493882835527, | |
| "learning_rate": 8.490798459222477e-07, | |
| "loss": 0.446, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.4909810822701277, | |
| "grad_norm": 1.4469360566334513, | |
| "learning_rate": 8.31988073894403e-07, | |
| "loss": 0.5585, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.4962604487461504, | |
| "grad_norm": 1.3547614775330397, | |
| "learning_rate": 8.150544583401116e-07, | |
| "loss": 0.4951, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.5015398152221735, | |
| "grad_norm": 1.4317273864472844, | |
| "learning_rate": 7.98279641810537e-07, | |
| "loss": 0.4658, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.506819181698196, | |
| "grad_norm": 1.3305013073280645, | |
| "learning_rate": 7.816642608311692e-07, | |
| "loss": 0.5777, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.512098548174219, | |
| "grad_norm": 1.3136690941260454, | |
| "learning_rate": 7.652089458776651e-07, | |
| "loss": 0.499, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.517377914650242, | |
| "grad_norm": 1.3874569359917572, | |
| "learning_rate": 7.489143213519301e-07, | |
| "loss": 0.5347, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.522657281126265, | |
| "grad_norm": 1.447251018353361, | |
| "learning_rate": 7.327810055584211e-07, | |
| "loss": 0.435, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.5279366476022878, | |
| "grad_norm": 1.3496149617623217, | |
| "learning_rate": 7.168096106806871e-07, | |
| "loss": 0.4171, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.5332160140783104, | |
| "grad_norm": 1.249564521876932, | |
| "learning_rate": 7.010007427581378e-07, | |
| "loss": 0.4364, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.5384953805543335, | |
| "grad_norm": 1.16664480296305, | |
| "learning_rate": 6.853550016630517e-07, | |
| "loss": 0.4704, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.543774747030356, | |
| "grad_norm": 1.360240021411605, | |
| "learning_rate": 6.698729810778065e-07, | |
| "loss": 0.4452, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.5490541135063793, | |
| "grad_norm": 1.342403768766378, | |
| "learning_rate": 6.545552684723583e-07, | |
| "loss": 0.4693, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.554333479982402, | |
| "grad_norm": 1.3325018963351474, | |
| "learning_rate": 6.394024450819458e-07, | |
| "loss": 0.6651, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.559612846458425, | |
| "grad_norm": 1.3913165441700324, | |
| "learning_rate": 6.244150858850368e-07, | |
| "loss": 0.4975, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.564892212934448, | |
| "grad_norm": 1.2628490579653824, | |
| "learning_rate": 6.095937595815104e-07, | |
| "loss": 0.492, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.570171579410471, | |
| "grad_norm": 1.2910442047354849, | |
| "learning_rate": 5.949390285710777e-07, | |
| "loss": 0.4534, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.5754509458864936, | |
| "grad_norm": 1.3249433260471921, | |
| "learning_rate": 5.804514489319402e-07, | |
| "loss": 0.487, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.5807303123625163, | |
| "grad_norm": 1.4072341388559009, | |
| "learning_rate": 5.661315703996905e-07, | |
| "loss": 0.4675, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.5860096788385394, | |
| "grad_norm": 1.4015735453825087, | |
| "learning_rate": 5.519799363464523e-07, | |
| "loss": 0.4845, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.5912890453145625, | |
| "grad_norm": 1.4217891239143823, | |
| "learning_rate": 5.379970837602611e-07, | |
| "loss": 0.4998, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.596568411790585, | |
| "grad_norm": 1.3648246182499375, | |
| "learning_rate": 5.241835432246888e-07, | |
| "loss": 0.5176, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.601847778266608, | |
| "grad_norm": 1.414958969535076, | |
| "learning_rate": 5.105398388987098e-07, | |
| "loss": 0.4967, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.607127144742631, | |
| "grad_norm": 1.3790275298806813, | |
| "learning_rate": 4.970664884968135e-07, | |
| "loss": 0.4526, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.6124065112186536, | |
| "grad_norm": 1.4260512881673313, | |
| "learning_rate": 4.837640032693558e-07, | |
| "loss": 0.4988, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.6176858776946768, | |
| "grad_norm": 1.4375522094160005, | |
| "learning_rate": 4.7063288798316397e-07, | |
| "loss": 0.5034, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.6229652441706994, | |
| "grad_norm": 1.2795249845130867, | |
| "learning_rate": 4.576736409023813e-07, | |
| "loss": 0.4697, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.6282446106467225, | |
| "grad_norm": 1.341408752696913, | |
| "learning_rate": 4.448867537695578e-07, | |
| "loss": 0.4577, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.633523977122745, | |
| "grad_norm": 1.388819232620493, | |
| "learning_rate": 4.322727117869951e-07, | |
| "loss": 0.4578, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.6388033435987683, | |
| "grad_norm": 1.4259575315112532, | |
| "learning_rate": 4.198319935983325e-07, | |
| "loss": 0.432, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.644082710074791, | |
| "grad_norm": 1.3388679638482945, | |
| "learning_rate": 4.0756507127038494e-07, | |
| "loss": 0.4297, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.6493620765508137, | |
| "grad_norm": 1.343061176539468, | |
| "learning_rate": 3.9547241027523164e-07, | |
| "loss": 0.4731, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.654641443026837, | |
| "grad_norm": 1.3358210629083995, | |
| "learning_rate": 3.8355446947255293e-07, | |
| "loss": 0.3901, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.65992080950286, | |
| "grad_norm": 1.2293751714391306, | |
| "learning_rate": 3.71811701092219e-07, | |
| "loss": 0.4707, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.6652001759788826, | |
| "grad_norm": 1.3129862851940244, | |
| "learning_rate": 3.602445507171276e-07, | |
| "loss": 0.4352, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.6704795424549053, | |
| "grad_norm": 1.4034927380523827, | |
| "learning_rate": 3.488534572662994e-07, | |
| "loss": 0.4641, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.6757589089309284, | |
| "grad_norm": 1.2489349783536317, | |
| "learning_rate": 3.3763885297822153e-07, | |
| "loss": 0.4681, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.681038275406951, | |
| "grad_norm": 1.338257980632932, | |
| "learning_rate": 3.266011633944477e-07, | |
| "loss": 0.4466, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.686317641882974, | |
| "grad_norm": 1.340765808641228, | |
| "learning_rate": 3.1574080734344757e-07, | |
| "loss": 0.4427, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.691597008358997, | |
| "grad_norm": 1.3488826792679693, | |
| "learning_rate": 3.0505819692471797e-07, | |
| "loss": 0.4425, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.69687637483502, | |
| "grad_norm": 1.2448047137351241, | |
| "learning_rate": 2.9455373749314285e-07, | |
| "loss": 0.5045, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.7021557413110426, | |
| "grad_norm": 1.327982717505523, | |
| "learning_rate": 2.842278276436128e-07, | |
| "loss": 0.4434, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.7074351077870658, | |
| "grad_norm": 1.3317380787717712, | |
| "learning_rate": 2.7408085919590265e-07, | |
| "loss": 0.4685, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.7127144742630884, | |
| "grad_norm": 1.418889669781179, | |
| "learning_rate": 2.6411321717979886e-07, | |
| "loss": 0.4459, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.717993840739111, | |
| "grad_norm": 1.4212898616431393, | |
| "learning_rate": 2.5432527982049424e-07, | |
| "loss": 0.4436, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.723273207215134, | |
| "grad_norm": 1.372505212467702, | |
| "learning_rate": 2.447174185242324e-07, | |
| "loss": 0.4942, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.7285525736911573, | |
| "grad_norm": 1.429171310323447, | |
| "learning_rate": 2.3528999786421758e-07, | |
| "loss": 0.4636, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.73383194016718, | |
| "grad_norm": 1.338261834173231, | |
| "learning_rate": 2.2604337556677846e-07, | |
| "loss": 0.4656, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.7391113066432027, | |
| "grad_norm": 1.324849203082196, | |
| "learning_rate": 2.1697790249779638e-07, | |
| "loss": 0.4788, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.744390673119226, | |
| "grad_norm": 1.350337658446117, | |
| "learning_rate": 2.080939226493889e-07, | |
| "loss": 0.4846, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.7496700395952485, | |
| "grad_norm": 1.3500445059182393, | |
| "learning_rate": 1.9939177312685963e-07, | |
| "loss": 0.4419, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.7549494060712716, | |
| "grad_norm": 1.375463337098407, | |
| "learning_rate": 1.908717841359048e-07, | |
| "loss": 0.4687, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.7602287725472943, | |
| "grad_norm": 1.4823623308521863, | |
| "learning_rate": 1.825342789700846e-07, | |
| "loss": 0.4779, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.765508139023317, | |
| "grad_norm": 1.4161590791252803, | |
| "learning_rate": 1.7437957399855488e-07, | |
| "loss": 0.4685, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.77078750549934, | |
| "grad_norm": 1.2986729971436586, | |
| "learning_rate": 1.664079786540629e-07, | |
| "loss": 0.4771, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.776066871975363, | |
| "grad_norm": 1.3827623556792923, | |
| "learning_rate": 1.5861979542120598e-07, | |
| "loss": 0.4634, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.781346238451386, | |
| "grad_norm": 1.3339794328080277, | |
| "learning_rate": 1.510153198249531e-07, | |
| "loss": 0.4435, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.7866256049274085, | |
| "grad_norm": 1.3094435959629867, | |
| "learning_rate": 1.435948404194304e-07, | |
| "loss": 0.4537, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.7919049714034316, | |
| "grad_norm": 1.395651401211996, | |
| "learning_rate": 1.363586387769761e-07, | |
| "loss": 0.4729, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.7971843378794543, | |
| "grad_norm": 1.3803361937212524, | |
| "learning_rate": 1.2930698947744957e-07, | |
| "loss": 0.5551, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.8024637043554774, | |
| "grad_norm": 1.403170004866064, | |
| "learning_rate": 1.22440160097817e-07, | |
| "loss": 0.4659, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.8077430708315, | |
| "grad_norm": 1.4326502602660136, | |
| "learning_rate": 1.157584112019966e-07, | |
| "loss": 0.4532, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.813022437307523, | |
| "grad_norm": 1.413170997771312, | |
| "learning_rate": 1.0926199633097156e-07, | |
| "loss": 0.5077, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.818301803783546, | |
| "grad_norm": 1.4978501520243235, | |
| "learning_rate": 1.0295116199317057e-07, | |
| "loss": 0.4524, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.823581170259569, | |
| "grad_norm": 1.3811339585897102, | |
| "learning_rate": 9.682614765511134e-08, | |
| "loss": 0.4267, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.8288605367355917, | |
| "grad_norm": 1.2532370949093539, | |
| "learning_rate": 9.08871857323157e-08, | |
| "loss": 0.5053, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.8341399032116144, | |
| "grad_norm": 1.3788001439277469, | |
| "learning_rate": 8.513450158049109e-08, | |
| "loss": 0.4377, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.8394192696876375, | |
| "grad_norm": 1.3386655810754182, | |
| "learning_rate": 7.956831348697791e-08, | |
| "loss": 0.5369, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.8446986361636606, | |
| "grad_norm": 1.4031016680793478, | |
| "learning_rate": 7.418883266246734e-08, | |
| "loss": 0.4692, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.8499780026396833, | |
| "grad_norm": 1.4223612412457645, | |
| "learning_rate": 6.899626323298714e-08, | |
| "loss": 0.4975, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.855257369115706, | |
| "grad_norm": 1.468568132224329, | |
| "learning_rate": 6.399080223215503e-08, | |
| "loss": 0.4278, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.860536735591729, | |
| "grad_norm": 1.2905228846748407, | |
| "learning_rate": 5.917263959370312e-08, | |
| "loss": 0.4459, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.8658161020677517, | |
| "grad_norm": 1.30438415852383, | |
| "learning_rate": 5.454195814427021e-08, | |
| "loss": 0.4881, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.871095468543775, | |
| "grad_norm": 1.4658032992281533, | |
| "learning_rate": 5.009893359646523e-08, | |
| "loss": 0.422, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.8763748350197975, | |
| "grad_norm": 1.403126172079016, | |
| "learning_rate": 4.584373454219859e-08, | |
| "loss": 0.4298, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.8816542014958206, | |
| "grad_norm": 1.305238740579575, | |
| "learning_rate": 4.177652244628627e-08, | |
| "loss": 0.447, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.8869335679718433, | |
| "grad_norm": 1.4226875729692003, | |
| "learning_rate": 3.7897451640321326e-08, | |
| "loss": 0.537, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.8922129344478664, | |
| "grad_norm": 1.3043310631991534, | |
| "learning_rate": 3.4206669316819155e-08, | |
| "loss": 0.495, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.897492300923889, | |
| "grad_norm": 1.4379823363129705, | |
| "learning_rate": 3.0704315523631956e-08, | |
| "loss": 0.4139, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.9027716673999118, | |
| "grad_norm": 1.3544951690739686, | |
| "learning_rate": 2.7390523158633552e-08, | |
| "loss": 0.5213, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.908051033875935, | |
| "grad_norm": 1.5078288720536737, | |
| "learning_rate": 2.426541796467785e-08, | |
| "loss": 0.4605, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.913330400351958, | |
| "grad_norm": 1.35610421146911, | |
| "learning_rate": 2.1329118524827662e-08, | |
| "loss": 0.4708, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.9186097668279807, | |
| "grad_norm": 1.4726737456407306, | |
| "learning_rate": 1.8581736257852756e-08, | |
| "loss": 0.4868, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.9238891333040034, | |
| "grad_norm": 1.353334569052309, | |
| "learning_rate": 1.6023375414004894e-08, | |
| "loss": 0.4867, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.9291684997800265, | |
| "grad_norm": 1.3416226125226052, | |
| "learning_rate": 1.3654133071059894e-08, | |
| "loss": 0.4396, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.934447866256049, | |
| "grad_norm": 1.226026839315963, | |
| "learning_rate": 1.1474099130635575e-08, | |
| "loss": 0.4901, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.9397272327320723, | |
| "grad_norm": 1.3404474468548127, | |
| "learning_rate": 9.48335631477948e-09, | |
| "loss": 0.4651, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.945006599208095, | |
| "grad_norm": 1.3923456514703572, | |
| "learning_rate": 7.681980162830283e-09, | |
| "loss": 0.5356, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.950285965684118, | |
| "grad_norm": 1.2760505962703481, | |
| "learning_rate": 6.070039028550634e-09, | |
| "loss": 0.5385, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.9555653321601407, | |
| "grad_norm": 2.2171211350170843, | |
| "learning_rate": 4.647594077534235e-09, | |
| "loss": 0.5793, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.960844698636164, | |
| "grad_norm": 1.5115950710680348, | |
| "learning_rate": 3.41469928488547e-09, | |
| "loss": 0.5072, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.9661240651121865, | |
| "grad_norm": 1.386901569403929, | |
| "learning_rate": 2.371401433170495e-09, | |
| "loss": 0.4858, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.971403431588209, | |
| "grad_norm": 1.4637428485442363, | |
| "learning_rate": 1.5177401106419853e-09, | |
| "loss": 0.4478, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.9766827980642323, | |
| "grad_norm": 1.4551106718059847, | |
| "learning_rate": 8.537477097364522e-10, | |
| "loss": 0.4673, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.9819621645402554, | |
| "grad_norm": 1.3415126297649902, | |
| "learning_rate": 3.7944942584688947e-10, | |
| "loss": 0.4997, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.987241531016278, | |
| "grad_norm": 1.2328749287439438, | |
| "learning_rate": 9.486325636576254e-11, | |
| "loss": 0.5271, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.987241531016278, | |
| "step": 567, | |
| "total_flos": 4.6211312738788966e+17, | |
| "train_loss": 0.7107904120832944, | |
| "train_runtime": 62456.134, | |
| "train_samples_per_second": 0.437, | |
| "train_steps_per_second": 0.009 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 567, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.6211312738788966e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |