| { | |
| "best_global_step": 59376, | |
| "best_metric": 0.44603702425956726, | |
| "best_model_checkpoint": "./my_model1/checkpoint-59376", | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 59376, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0033683643222850983, | |
| "grad_norm": 6.297862529754639, | |
| "learning_rate": 1.1115477460281817e-06, | |
| "loss": 4.6374, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.006736728644570197, | |
| "grad_norm": 2.914098024368286, | |
| "learning_rate": 2.234323247066749e-06, | |
| "loss": 4.3734, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.010105092966855295, | |
| "grad_norm": 2.6752781867980957, | |
| "learning_rate": 3.357098748105317e-06, | |
| "loss": 4.1151, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.013473457289140393, | |
| "grad_norm": 2.557371139526367, | |
| "learning_rate": 4.479874249143884e-06, | |
| "loss": 3.8823, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.016841821611425493, | |
| "grad_norm": 2.3797903060913086, | |
| "learning_rate": 5.602649750182451e-06, | |
| "loss": 3.6958, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.02021018593371059, | |
| "grad_norm": 2.318178653717041, | |
| "learning_rate": 6.725425251221018e-06, | |
| "loss": 3.5736, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.02357855025599569, | |
| "grad_norm": 2.263061046600342, | |
| "learning_rate": 7.848200752259587e-06, | |
| "loss": 3.4535, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.026946914578280787, | |
| "grad_norm": 2.245070219039917, | |
| "learning_rate": 8.970976253298154e-06, | |
| "loss": 3.3254, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.030315278900565887, | |
| "grad_norm": 2.1786885261535645, | |
| "learning_rate": 1.009375175433672e-05, | |
| "loss": 3.1968, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.03368364322285099, | |
| "grad_norm": 2.1120216846466064, | |
| "learning_rate": 1.1216527255375288e-05, | |
| "loss": 3.0749, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.03705200754513608, | |
| "grad_norm": 2.225299835205078, | |
| "learning_rate": 1.2339302756413855e-05, | |
| "loss": 2.956, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.04042037186742118, | |
| "grad_norm": 1.9265443086624146, | |
| "learning_rate": 1.3462078257452423e-05, | |
| "loss": 2.8324, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.04378873618970628, | |
| "grad_norm": 4.846482276916504, | |
| "learning_rate": 1.458485375849099e-05, | |
| "loss": 2.7146, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.04715710051199138, | |
| "grad_norm": 8.298853874206543, | |
| "learning_rate": 1.5707629259529558e-05, | |
| "loss": 2.6336, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.05052546483427647, | |
| "grad_norm": 12.867733001708984, | |
| "learning_rate": 1.6830404760568124e-05, | |
| "loss": 2.5894, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.05389382915656157, | |
| "grad_norm": 17.92266082763672, | |
| "learning_rate": 1.7953180261606693e-05, | |
| "loss": 2.5615, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.057262193478846674, | |
| "grad_norm": 13.567904472351074, | |
| "learning_rate": 1.907595576264526e-05, | |
| "loss": 2.5376, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.060630557801131774, | |
| "grad_norm": 18.349245071411133, | |
| "learning_rate": 2.0198731263683825e-05, | |
| "loss": 2.5115, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.06399892212341687, | |
| "grad_norm": 18.910877227783203, | |
| "learning_rate": 2.1321506764722397e-05, | |
| "loss": 2.4836, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.06736728644570197, | |
| "grad_norm": 5.438470363616943, | |
| "learning_rate": 2.2444282265760963e-05, | |
| "loss": 2.448, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.07073565076798706, | |
| "grad_norm": 1.8990598917007446, | |
| "learning_rate": 2.356705776679953e-05, | |
| "loss": 2.3836, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.07410401509027216, | |
| "grad_norm": 1.7939313650131226, | |
| "learning_rate": 2.46898332678381e-05, | |
| "loss": 2.2869, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.07747237941255726, | |
| "grad_norm": 2.6316609382629395, | |
| "learning_rate": 2.581260876887666e-05, | |
| "loss": 2.1664, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.08084074373484236, | |
| "grad_norm": 3.9971001148223877, | |
| "learning_rate": 2.693538426991523e-05, | |
| "loss": 2.0635, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.08420910805712746, | |
| "grad_norm": 2.845649242401123, | |
| "learning_rate": 2.8058159770953803e-05, | |
| "loss": 2.0033, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08757747237941256, | |
| "grad_norm": 11.22779655456543, | |
| "learning_rate": 2.9180935271992365e-05, | |
| "loss": 1.944, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.09094583670169766, | |
| "grad_norm": 8.039031982421875, | |
| "learning_rate": 3.0303710773030935e-05, | |
| "loss": 1.8935, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.09431420102398276, | |
| "grad_norm": 19.868438720703125, | |
| "learning_rate": 3.14264862740695e-05, | |
| "loss": 1.8509, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.09768256534626785, | |
| "grad_norm": 19.26648712158203, | |
| "learning_rate": 3.254926177510807e-05, | |
| "loss": 1.808, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.10105092966855295, | |
| "grad_norm": 10.993364334106445, | |
| "learning_rate": 3.367203727614663e-05, | |
| "loss": 1.7658, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.10441929399083805, | |
| "grad_norm": 12.577337265014648, | |
| "learning_rate": 3.47948127771852e-05, | |
| "loss": 1.7268, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.10778765831312315, | |
| "grad_norm": 15.279227256774902, | |
| "learning_rate": 3.591758827822377e-05, | |
| "loss": 1.6961, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.11115602263540825, | |
| "grad_norm": 15.154927253723145, | |
| "learning_rate": 3.704036377926234e-05, | |
| "loss": 1.6632, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.11452438695769335, | |
| "grad_norm": 5.024831295013428, | |
| "learning_rate": 3.816313928030091e-05, | |
| "loss": 1.627, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.11789275127997845, | |
| "grad_norm": 7.439777851104736, | |
| "learning_rate": 3.928591478133947e-05, | |
| "loss": 1.5909, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.12126111560226355, | |
| "grad_norm": 7.653560638427734, | |
| "learning_rate": 4.040869028237804e-05, | |
| "loss": 1.5621, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.12462947992454863, | |
| "grad_norm": 7.883094310760498, | |
| "learning_rate": 4.1531465783416603e-05, | |
| "loss": 1.5307, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.12799784424683375, | |
| "grad_norm": 3.2945971488952637, | |
| "learning_rate": 4.265424128445518e-05, | |
| "loss": 1.5016, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.13136620856911885, | |
| "grad_norm": 5.135283946990967, | |
| "learning_rate": 4.377701678549374e-05, | |
| "loss": 1.4741, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.13473457289140395, | |
| "grad_norm": 8.129427909851074, | |
| "learning_rate": 4.489979228653231e-05, | |
| "loss": 1.4423, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.13810293721368902, | |
| "grad_norm": 8.010125160217285, | |
| "learning_rate": 4.6022567787570874e-05, | |
| "loss": 1.4146, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.14147130153597412, | |
| "grad_norm": 3.1212265491485596, | |
| "learning_rate": 4.714534328860944e-05, | |
| "loss": 1.3919, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.14483966585825922, | |
| "grad_norm": 3.6468098163604736, | |
| "learning_rate": 4.826811878964801e-05, | |
| "loss": 1.373, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.14820803018054432, | |
| "grad_norm": 4.597881317138672, | |
| "learning_rate": 4.939089429068658e-05, | |
| "loss": 1.352, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.15157639450282942, | |
| "grad_norm": 4.9619622230529785, | |
| "learning_rate": 5.051366979172515e-05, | |
| "loss": 1.3299, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.15494475882511452, | |
| "grad_norm": 4.055070877075195, | |
| "learning_rate": 5.163644529276371e-05, | |
| "loss": 1.312, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.15831312314739962, | |
| "grad_norm": 4.076910018920898, | |
| "learning_rate": 5.2759220793802276e-05, | |
| "loss": 1.2963, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.16168148746968472, | |
| "grad_norm": 2.7936923503875732, | |
| "learning_rate": 5.388199629484085e-05, | |
| "loss": 1.2782, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.16504985179196982, | |
| "grad_norm": 3.8645057678222656, | |
| "learning_rate": 5.5004771795879414e-05, | |
| "loss": 1.2598, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.16841821611425492, | |
| "grad_norm": 3.8098433017730713, | |
| "learning_rate": 5.6127547296917983e-05, | |
| "loss": 1.2426, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.17178658043654002, | |
| "grad_norm": 3.690554618835449, | |
| "learning_rate": 5.7250322797956546e-05, | |
| "loss": 1.2257, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.17515494475882512, | |
| "grad_norm": 3.7821402549743652, | |
| "learning_rate": 5.837309829899512e-05, | |
| "loss": 1.2138, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.17852330908111022, | |
| "grad_norm": 4.070770263671875, | |
| "learning_rate": 5.9495873800033684e-05, | |
| "loss": 1.2005, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.18189167340339532, | |
| "grad_norm": 5.843082904815674, | |
| "learning_rate": 6.061864930107225e-05, | |
| "loss": 1.1795, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.18526003772568042, | |
| "grad_norm": 4.773739337921143, | |
| "learning_rate": 6.174142480211082e-05, | |
| "loss": 1.1665, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.18862840204796552, | |
| "grad_norm": 3.8879311084747314, | |
| "learning_rate": 6.286420030314939e-05, | |
| "loss": 1.1529, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.1919967663702506, | |
| "grad_norm": 4.927277088165283, | |
| "learning_rate": 6.398697580418795e-05, | |
| "loss": 1.1397, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.1953651306925357, | |
| "grad_norm": 3.640209913253784, | |
| "learning_rate": 6.510975130522652e-05, | |
| "loss": 1.1199, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.1987334950148208, | |
| "grad_norm": 5.0505595207214355, | |
| "learning_rate": 6.62325268062651e-05, | |
| "loss": 1.1073, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.2021018593371059, | |
| "grad_norm": 3.703660011291504, | |
| "learning_rate": 6.735530230730366e-05, | |
| "loss": 1.0966, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.205470223659391, | |
| "grad_norm": 3.3192944526672363, | |
| "learning_rate": 6.847807780834223e-05, | |
| "loss": 1.0823, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.2088385879816761, | |
| "grad_norm": 4.713069915771484, | |
| "learning_rate": 6.96008533093808e-05, | |
| "loss": 1.0718, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.2122069523039612, | |
| "grad_norm": 4.135160446166992, | |
| "learning_rate": 7.072362881041936e-05, | |
| "loss": 1.057, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.2155753166262463, | |
| "grad_norm": 4.193116664886475, | |
| "learning_rate": 7.184640431145793e-05, | |
| "loss": 1.0509, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.2189436809485314, | |
| "grad_norm": 4.028440475463867, | |
| "learning_rate": 7.296917981249649e-05, | |
| "loss": 1.0365, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.2223120452708165, | |
| "grad_norm": 4.614249229431152, | |
| "learning_rate": 7.409195531353507e-05, | |
| "loss": 1.0293, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.2256804095931016, | |
| "grad_norm": 4.366164684295654, | |
| "learning_rate": 7.521473081457363e-05, | |
| "loss": 1.0198, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 0.2290487739153867, | |
| "grad_norm": 5.207546710968018, | |
| "learning_rate": 7.63375063156122e-05, | |
| "loss": 1.0059, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.2324171382376718, | |
| "grad_norm": 3.651235342025757, | |
| "learning_rate": 7.746028181665077e-05, | |
| "loss": 1.0009, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 0.2357855025599569, | |
| "grad_norm": 4.040618896484375, | |
| "learning_rate": 7.858305731768933e-05, | |
| "loss": 0.9907, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.239153866882242, | |
| "grad_norm": 3.792742967605591, | |
| "learning_rate": 7.97058328187279e-05, | |
| "loss": 0.9847, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 0.2425222312045271, | |
| "grad_norm": 4.362412929534912, | |
| "learning_rate": 8.082860831976646e-05, | |
| "loss": 0.9738, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.24589059552681217, | |
| "grad_norm": 4.572664737701416, | |
| "learning_rate": 8.195138382080504e-05, | |
| "loss": 0.9678, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 0.24925895984909727, | |
| "grad_norm": 4.474113464355469, | |
| "learning_rate": 8.30741593218436e-05, | |
| "loss": 0.957, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.2526273241713824, | |
| "grad_norm": 4.847846984863281, | |
| "learning_rate": 8.419693482288217e-05, | |
| "loss": 0.9492, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.2559956884936675, | |
| "grad_norm": 4.326010227203369, | |
| "learning_rate": 8.531971032392074e-05, | |
| "loss": 0.9444, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.2593640528159526, | |
| "grad_norm": 4.634029388427734, | |
| "learning_rate": 8.64424858249593e-05, | |
| "loss": 0.9337, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 0.2627324171382377, | |
| "grad_norm": 3.841517925262451, | |
| "learning_rate": 8.756526132599788e-05, | |
| "loss": 0.9282, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.2661007814605228, | |
| "grad_norm": 4.89427375793457, | |
| "learning_rate": 8.868803682703643e-05, | |
| "loss": 0.9164, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 0.2694691457828079, | |
| "grad_norm": 4.296108245849609, | |
| "learning_rate": 8.9810812328075e-05, | |
| "loss": 0.9146, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.27283751010509294, | |
| "grad_norm": 4.8395586013793945, | |
| "learning_rate": 9.093358782911357e-05, | |
| "loss": 0.903, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 0.27620587442737804, | |
| "grad_norm": 4.250405788421631, | |
| "learning_rate": 9.205636333015214e-05, | |
| "loss": 0.9013, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.27957423874966314, | |
| "grad_norm": 3.9244723320007324, | |
| "learning_rate": 9.317913883119071e-05, | |
| "loss": 0.8968, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 0.28294260307194824, | |
| "grad_norm": 4.492284774780273, | |
| "learning_rate": 9.430191433222928e-05, | |
| "loss": 0.8924, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.28631096739423334, | |
| "grad_norm": 4.632638454437256, | |
| "learning_rate": 9.542468983326785e-05, | |
| "loss": 0.8822, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.28967933171651844, | |
| "grad_norm": 3.6097586154937744, | |
| "learning_rate": 9.65474653343064e-05, | |
| "loss": 0.8774, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.29304769603880354, | |
| "grad_norm": 3.6722657680511475, | |
| "learning_rate": 9.767024083534497e-05, | |
| "loss": 0.8697, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 0.29641606036108864, | |
| "grad_norm": 4.693965911865234, | |
| "learning_rate": 9.879301633638355e-05, | |
| "loss": 0.8583, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.29978442468337374, | |
| "grad_norm": 3.5417885780334473, | |
| "learning_rate": 9.991579183742211e-05, | |
| "loss": 0.8498, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 0.30315278900565884, | |
| "grad_norm": 5.091881275177002, | |
| "learning_rate": 0.00010103856733846069, | |
| "loss": 0.8396, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.30652115332794394, | |
| "grad_norm": 4.218757152557373, | |
| "learning_rate": 0.00010216134283949925, | |
| "loss": 0.8314, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 0.30988951765022904, | |
| "grad_norm": 3.600708246231079, | |
| "learning_rate": 0.00010328411834053782, | |
| "loss": 0.8249, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.31325788197251414, | |
| "grad_norm": 3.8332407474517822, | |
| "learning_rate": 0.00010440689384157639, | |
| "loss": 0.8187, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 0.31662624629479924, | |
| "grad_norm": 3.1585068702697754, | |
| "learning_rate": 0.00010552966934261494, | |
| "loss": 0.8087, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.31999461061708434, | |
| "grad_norm": 3.4112815856933594, | |
| "learning_rate": 0.00010665244484365351, | |
| "loss": 0.8015, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.32336297493936944, | |
| "grad_norm": 4.372965335845947, | |
| "learning_rate": 0.00010777522034469207, | |
| "loss": 0.7892, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.32673133926165454, | |
| "grad_norm": 3.7581305503845215, | |
| "learning_rate": 0.00010889799584573066, | |
| "loss": 0.7814, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 0.33009970358393964, | |
| "grad_norm": 4.480976581573486, | |
| "learning_rate": 0.00011002077134676922, | |
| "loss": 0.7625, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.33346806790622474, | |
| "grad_norm": 3.4865591526031494, | |
| "learning_rate": 0.00011114354684780779, | |
| "loss": 0.7524, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 0.33683643222850984, | |
| "grad_norm": 3.5094540119171143, | |
| "learning_rate": 0.00011226632234884636, | |
| "loss": 0.7421, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.34020479655079494, | |
| "grad_norm": 3.0365946292877197, | |
| "learning_rate": 0.00011338909784988491, | |
| "loss": 0.7354, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 0.34357316087308004, | |
| "grad_norm": 3.5247597694396973, | |
| "learning_rate": 0.00011451187335092348, | |
| "loss": 0.7224, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.34694152519536514, | |
| "grad_norm": 3.1095457077026367, | |
| "learning_rate": 0.00011563464885196205, | |
| "loss": 0.7195, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 0.35030988951765024, | |
| "grad_norm": 3.8091487884521484, | |
| "learning_rate": 0.00011675742435300064, | |
| "loss": 0.713, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.35367825383993534, | |
| "grad_norm": 2.9617044925689697, | |
| "learning_rate": 0.00011788019985403919, | |
| "loss": 0.7067, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.35704661816222044, | |
| "grad_norm": 4.0781331062316895, | |
| "learning_rate": 0.00011900297535507776, | |
| "loss": 0.7022, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.36041498248450554, | |
| "grad_norm": 2.9260106086730957, | |
| "learning_rate": 0.00012012575085611633, | |
| "loss": 0.6967, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 0.36378334680679064, | |
| "grad_norm": 3.00919508934021, | |
| "learning_rate": 0.00012124852635715489, | |
| "loss": 0.6934, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.36715171112907574, | |
| "grad_norm": 2.74841046333313, | |
| "learning_rate": 0.00012237130185819344, | |
| "loss": 0.6874, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 0.37052007545136084, | |
| "grad_norm": 2.3908281326293945, | |
| "learning_rate": 0.000123494077359232, | |
| "loss": 0.6843, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.37388843977364594, | |
| "grad_norm": 2.5212063789367676, | |
| "learning_rate": 0.0001246168528602706, | |
| "loss": 0.681, | |
| "step": 11100 | |
| }, | |
| { | |
| "epoch": 0.37725680409593104, | |
| "grad_norm": 2.342548370361328, | |
| "learning_rate": 0.00012573962836130918, | |
| "loss": 0.6755, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.3806251684182161, | |
| "grad_norm": 2.2817301750183105, | |
| "learning_rate": 0.00012686240386234775, | |
| "loss": 0.6762, | |
| "step": 11300 | |
| }, | |
| { | |
| "epoch": 0.3839935327405012, | |
| "grad_norm": 2.4880239963531494, | |
| "learning_rate": 0.0001279851793633863, | |
| "loss": 0.6696, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.3873618970627863, | |
| "grad_norm": 2.2513132095336914, | |
| "learning_rate": 0.00012910795486442486, | |
| "loss": 0.6698, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.3907302613850714, | |
| "grad_norm": 2.4084956645965576, | |
| "learning_rate": 0.00013023073036546343, | |
| "loss": 0.6669, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.3940986257073565, | |
| "grad_norm": 2.5854873657226562, | |
| "learning_rate": 0.000131353505866502, | |
| "loss": 0.6629, | |
| "step": 11700 | |
| }, | |
| { | |
| "epoch": 0.3974669900296416, | |
| "grad_norm": 2.377323627471924, | |
| "learning_rate": 0.00013247628136754056, | |
| "loss": 0.6607, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.4008353543519267, | |
| "grad_norm": 2.0934255123138428, | |
| "learning_rate": 0.00013359905686857913, | |
| "loss": 0.6557, | |
| "step": 11900 | |
| }, | |
| { | |
| "epoch": 0.4042037186742118, | |
| "grad_norm": 2.2876408100128174, | |
| "learning_rate": 0.0001347218323696177, | |
| "loss": 0.6537, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.4075720829964969, | |
| "grad_norm": 2.856818199157715, | |
| "learning_rate": 0.00013584460787065627, | |
| "loss": 0.6534, | |
| "step": 12100 | |
| }, | |
| { | |
| "epoch": 0.410940447318782, | |
| "grad_norm": 2.3577589988708496, | |
| "learning_rate": 0.00013696738337169484, | |
| "loss": 0.6468, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 0.4143088116410671, | |
| "grad_norm": 2.1369576454162598, | |
| "learning_rate": 0.0001380901588727334, | |
| "loss": 0.6466, | |
| "step": 12300 | |
| }, | |
| { | |
| "epoch": 0.4176771759633522, | |
| "grad_norm": 2.0527994632720947, | |
| "learning_rate": 0.00013921293437377195, | |
| "loss": 0.6423, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 0.4210455402856373, | |
| "grad_norm": 2.1849894523620605, | |
| "learning_rate": 0.00014033570987481052, | |
| "loss": 0.6408, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.4244139046079224, | |
| "grad_norm": 2.403149127960205, | |
| "learning_rate": 0.00014145848537584912, | |
| "loss": 0.6401, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 0.4277822689302075, | |
| "grad_norm": 1.983995795249939, | |
| "learning_rate": 0.0001425812608768877, | |
| "loss": 0.6387, | |
| "step": 12700 | |
| }, | |
| { | |
| "epoch": 0.4311506332524926, | |
| "grad_norm": 2.141962766647339, | |
| "learning_rate": 0.00014370403637792623, | |
| "loss": 0.635, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.4345189975747777, | |
| "grad_norm": 1.9785326719284058, | |
| "learning_rate": 0.0001448268118789648, | |
| "loss": 0.6314, | |
| "step": 12900 | |
| }, | |
| { | |
| "epoch": 0.4378873618970628, | |
| "grad_norm": 2.0606772899627686, | |
| "learning_rate": 0.00014594958738000337, | |
| "loss": 0.6285, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.4412557262193479, | |
| "grad_norm": 1.88225519657135, | |
| "learning_rate": 0.00014707236288104194, | |
| "loss": 0.6296, | |
| "step": 13100 | |
| }, | |
| { | |
| "epoch": 0.444624090541633, | |
| "grad_norm": 2.204674005508423, | |
| "learning_rate": 0.0001481951383820805, | |
| "loss": 0.628, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 0.4479924548639181, | |
| "grad_norm": 1.8650182485580444, | |
| "learning_rate": 0.00014931791388311908, | |
| "loss": 0.6264, | |
| "step": 13300 | |
| }, | |
| { | |
| "epoch": 0.4513608191862032, | |
| "grad_norm": 1.7972240447998047, | |
| "learning_rate": 0.00015044068938415765, | |
| "loss": 0.6211, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 0.4547291835084883, | |
| "grad_norm": 1.8085206747055054, | |
| "learning_rate": 0.00015156346488519621, | |
| "loss": 0.6223, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.4580975478307734, | |
| "grad_norm": 1.877871036529541, | |
| "learning_rate": 0.00015268624038623478, | |
| "loss": 0.624, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 0.4614659121530585, | |
| "grad_norm": 2.295692205429077, | |
| "learning_rate": 0.00015380901588727335, | |
| "loss": 0.6198, | |
| "step": 13700 | |
| }, | |
| { | |
| "epoch": 0.4648342764753436, | |
| "grad_norm": 2.4655864238739014, | |
| "learning_rate": 0.0001549317913883119, | |
| "loss": 0.6171, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 0.4682026407976287, | |
| "grad_norm": 1.9931831359863281, | |
| "learning_rate": 0.00015605456688935046, | |
| "loss": 0.6146, | |
| "step": 13900 | |
| }, | |
| { | |
| "epoch": 0.4715710051199138, | |
| "grad_norm": 1.7389591932296753, | |
| "learning_rate": 0.00015717734239038906, | |
| "loss": 0.6141, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.4749393694421989, | |
| "grad_norm": 2.0048677921295166, | |
| "learning_rate": 0.00015830011789142763, | |
| "loss": 0.613, | |
| "step": 14100 | |
| }, | |
| { | |
| "epoch": 0.478307733764484, | |
| "grad_norm": 2.0038020610809326, | |
| "learning_rate": 0.0001594228933924662, | |
| "loss": 0.6116, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 0.4816760980867691, | |
| "grad_norm": 1.8391730785369873, | |
| "learning_rate": 0.00016054566889350474, | |
| "loss": 0.6093, | |
| "step": 14300 | |
| }, | |
| { | |
| "epoch": 0.4850444624090542, | |
| "grad_norm": 1.769494652748108, | |
| "learning_rate": 0.0001616684443945433, | |
| "loss": 0.6081, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 0.4884128267313393, | |
| "grad_norm": 1.9740633964538574, | |
| "learning_rate": 0.00016279121989558188, | |
| "loss": 0.6069, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.49178119105362433, | |
| "grad_norm": 2.1322596073150635, | |
| "learning_rate": 0.00016391399539662045, | |
| "loss": 0.6067, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 0.49514955537590943, | |
| "grad_norm": 1.6382005214691162, | |
| "learning_rate": 0.00016503677089765902, | |
| "loss": 0.604, | |
| "step": 14700 | |
| }, | |
| { | |
| "epoch": 0.49851791969819453, | |
| "grad_norm": 1.49541175365448, | |
| "learning_rate": 0.0001661595463986976, | |
| "loss": 0.6027, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 0.5018862840204796, | |
| "grad_norm": 1.5882339477539062, | |
| "learning_rate": 0.00016728232189973616, | |
| "loss": 0.6014, | |
| "step": 14900 | |
| }, | |
| { | |
| "epoch": 0.5052546483427648, | |
| "grad_norm": 1.491133213043213, | |
| "learning_rate": 0.00016840509740077473, | |
| "loss": 0.5983, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.5086230126650498, | |
| "grad_norm": 1.7467178106307983, | |
| "learning_rate": 0.0001695278729018133, | |
| "loss": 0.5996, | |
| "step": 15100 | |
| }, | |
| { | |
| "epoch": 0.511991376987335, | |
| "grad_norm": 1.5445200204849243, | |
| "learning_rate": 0.00017065064840285186, | |
| "loss": 0.5937, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 0.51535974130962, | |
| "grad_norm": 1.613213300704956, | |
| "learning_rate": 0.0001717734239038904, | |
| "loss": 0.5924, | |
| "step": 15300 | |
| }, | |
| { | |
| "epoch": 0.5187281056319052, | |
| "grad_norm": 1.67715585231781, | |
| "learning_rate": 0.00017289619940492898, | |
| "loss": 0.594, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 0.5220964699541902, | |
| "grad_norm": 1.7080377340316772, | |
| "learning_rate": 0.00017401897490596757, | |
| "loss": 0.5935, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.5254648342764754, | |
| "grad_norm": 1.7722272872924805, | |
| "learning_rate": 0.00017514175040700614, | |
| "loss": 0.5914, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 0.5288331985987604, | |
| "grad_norm": 1.7470366954803467, | |
| "learning_rate": 0.00017626452590804468, | |
| "loss": 0.5883, | |
| "step": 15700 | |
| }, | |
| { | |
| "epoch": 0.5322015629210456, | |
| "grad_norm": 1.974663496017456, | |
| "learning_rate": 0.00017738730140908325, | |
| "loss": 0.5908, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 0.5355699272433306, | |
| "grad_norm": 1.4482321739196777, | |
| "learning_rate": 0.00017851007691012182, | |
| "loss": 0.5885, | |
| "step": 15900 | |
| }, | |
| { | |
| "epoch": 0.5389382915656158, | |
| "grad_norm": 1.750618815422058, | |
| "learning_rate": 0.0001796328524111604, | |
| "loss": 0.5855, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.5423066558879008, | |
| "grad_norm": 1.3821526765823364, | |
| "learning_rate": 0.00018075562791219896, | |
| "loss": 0.5884, | |
| "step": 16100 | |
| }, | |
| { | |
| "epoch": 0.5456750202101859, | |
| "grad_norm": 1.4892586469650269, | |
| "learning_rate": 0.00018187840341323753, | |
| "loss": 0.5838, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 0.549043384532471, | |
| "grad_norm": 1.5591208934783936, | |
| "learning_rate": 0.0001830011789142761, | |
| "loss": 0.5834, | |
| "step": 16300 | |
| }, | |
| { | |
| "epoch": 0.5524117488547561, | |
| "grad_norm": 1.326253056526184, | |
| "learning_rate": 0.00018412395441531467, | |
| "loss": 0.5828, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 0.5557801131770412, | |
| "grad_norm": 1.5288639068603516, | |
| "learning_rate": 0.00018524672991635324, | |
| "loss": 0.5793, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 0.5591484774993263, | |
| "grad_norm": 1.4673304557800293, | |
| "learning_rate": 0.0001863695054173918, | |
| "loss": 0.5791, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 0.5625168418216114, | |
| "grad_norm": 1.6291229724884033, | |
| "learning_rate": 0.00018749228091843035, | |
| "loss": 0.5792, | |
| "step": 16700 | |
| }, | |
| { | |
| "epoch": 0.5658852061438965, | |
| "grad_norm": 1.3908525705337524, | |
| "learning_rate": 0.00018861505641946892, | |
| "loss": 0.5795, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 0.5692535704661816, | |
| "grad_norm": 1.4598628282546997, | |
| "learning_rate": 0.00018973783192050752, | |
| "loss": 0.576, | |
| "step": 16900 | |
| }, | |
| { | |
| "epoch": 0.5726219347884667, | |
| "grad_norm": 1.2881489992141724, | |
| "learning_rate": 0.00019086060742154608, | |
| "loss": 0.575, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.5759902991107518, | |
| "grad_norm": 1.2719937562942505, | |
| "learning_rate": 0.00019198338292258465, | |
| "loss": 0.5747, | |
| "step": 17100 | |
| }, | |
| { | |
| "epoch": 0.5793586634330369, | |
| "grad_norm": 1.2574406862258911, | |
| "learning_rate": 0.0001931061584236232, | |
| "loss": 0.573, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 0.582727027755322, | |
| "grad_norm": 1.457133173942566, | |
| "learning_rate": 0.00019422893392466177, | |
| "loss": 0.5738, | |
| "step": 17300 | |
| }, | |
| { | |
| "epoch": 0.5860953920776071, | |
| "grad_norm": 1.2623742818832397, | |
| "learning_rate": 0.00019535170942570033, | |
| "loss": 0.571, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 0.5894637563998922, | |
| "grad_norm": 1.4135565757751465, | |
| "learning_rate": 0.0001964744849267389, | |
| "loss": 0.5706, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 0.5928321207221773, | |
| "grad_norm": 1.502484917640686, | |
| "learning_rate": 0.00019759726042777747, | |
| "loss": 0.5713, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 0.5962004850444624, | |
| "grad_norm": 1.3130122423171997, | |
| "learning_rate": 0.00019872003592881604, | |
| "loss": 0.5683, | |
| "step": 17700 | |
| }, | |
| { | |
| "epoch": 0.5995688493667475, | |
| "grad_norm": 1.2580504417419434, | |
| "learning_rate": 0.0001998428114298546, | |
| "loss": 0.5696, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 0.6029372136890326, | |
| "grad_norm": 1.204026460647583, | |
| "learning_rate": 0.00019975859987929996, | |
| "loss": 0.5664, | |
| "step": 17900 | |
| }, | |
| { | |
| "epoch": 0.6063055780113177, | |
| "grad_norm": 1.3051841259002686, | |
| "learning_rate": 0.00019947790206453243, | |
| "loss": 0.5666, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.6096739423336028, | |
| "grad_norm": 1.1939951181411743, | |
| "learning_rate": 0.00019919720424976494, | |
| "loss": 0.5634, | |
| "step": 18100 | |
| }, | |
| { | |
| "epoch": 0.6130423066558879, | |
| "grad_norm": 1.25477135181427, | |
| "learning_rate": 0.00019891650643499742, | |
| "loss": 0.5628, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 0.616410670978173, | |
| "grad_norm": 1.1275781393051147, | |
| "learning_rate": 0.0001986358086202299, | |
| "loss": 0.5624, | |
| "step": 18300 | |
| }, | |
| { | |
| "epoch": 0.6197790353004581, | |
| "grad_norm": 1.1167781352996826, | |
| "learning_rate": 0.00019835511080546237, | |
| "loss": 0.5617, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 0.6231473996227432, | |
| "grad_norm": 1.193454623222351, | |
| "learning_rate": 0.00019807441299069488, | |
| "loss": 0.5605, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 0.6265157639450283, | |
| "grad_norm": 1.1406720876693726, | |
| "learning_rate": 0.00019779371517592739, | |
| "loss": 0.5587, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 0.6298841282673134, | |
| "grad_norm": 1.2136386632919312, | |
| "learning_rate": 0.00019751301736115986, | |
| "loss": 0.5573, | |
| "step": 18700 | |
| }, | |
| { | |
| "epoch": 0.6332524925895985, | |
| "grad_norm": 1.216199278831482, | |
| "learning_rate": 0.00019723231954639234, | |
| "loss": 0.5563, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 0.6366208569118836, | |
| "grad_norm": 1.2443403005599976, | |
| "learning_rate": 0.00019695162173162482, | |
| "loss": 0.5519, | |
| "step": 18900 | |
| }, | |
| { | |
| "epoch": 0.6399892212341687, | |
| "grad_norm": 1.1415669918060303, | |
| "learning_rate": 0.0001966709239168573, | |
| "loss": 0.5551, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.6433575855564538, | |
| "grad_norm": 1.2228775024414062, | |
| "learning_rate": 0.0001963902261020898, | |
| "loss": 0.5547, | |
| "step": 19100 | |
| }, | |
| { | |
| "epoch": 0.6467259498787389, | |
| "grad_norm": 1.1878366470336914, | |
| "learning_rate": 0.0001961095282873223, | |
| "loss": 0.5537, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 0.650094314201024, | |
| "grad_norm": 1.1277652978897095, | |
| "learning_rate": 0.0001958288304725548, | |
| "loss": 0.5521, | |
| "step": 19300 | |
| }, | |
| { | |
| "epoch": 0.6534626785233091, | |
| "grad_norm": 1.2011772394180298, | |
| "learning_rate": 0.00019554813265778727, | |
| "loss": 0.5519, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 0.6568310428455941, | |
| "grad_norm": 1.1792044639587402, | |
| "learning_rate": 0.00019526743484301975, | |
| "loss": 0.5493, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 0.6601994071678793, | |
| "grad_norm": 1.1553574800491333, | |
| "learning_rate": 0.00019498673702825225, | |
| "loss": 0.5464, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 0.6635677714901643, | |
| "grad_norm": 1.1871212720870972, | |
| "learning_rate": 0.00019470603921348473, | |
| "loss": 0.5489, | |
| "step": 19700 | |
| }, | |
| { | |
| "epoch": 0.6669361358124495, | |
| "grad_norm": 1.0879842042922974, | |
| "learning_rate": 0.0001944253413987172, | |
| "loss": 0.5476, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 0.6703045001347345, | |
| "grad_norm": 1.3135937452316284, | |
| "learning_rate": 0.0001941446435839497, | |
| "loss": 0.5482, | |
| "step": 19900 | |
| }, | |
| { | |
| "epoch": 0.6736728644570197, | |
| "grad_norm": 1.0638514757156372, | |
| "learning_rate": 0.0001938639457691822, | |
| "loss": 0.546, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.6770412287793047, | |
| "grad_norm": 1.139218807220459, | |
| "learning_rate": 0.0001935832479544147, | |
| "loss": 0.5434, | |
| "step": 20100 | |
| }, | |
| { | |
| "epoch": 0.6804095931015899, | |
| "grad_norm": 1.0563747882843018, | |
| "learning_rate": 0.00019330255013964718, | |
| "loss": 0.5462, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 0.6837779574238749, | |
| "grad_norm": 1.0997061729431152, | |
| "learning_rate": 0.00019302185232487965, | |
| "loss": 0.5401, | |
| "step": 20300 | |
| }, | |
| { | |
| "epoch": 0.6871463217461601, | |
| "grad_norm": 1.0555341243743896, | |
| "learning_rate": 0.00019274115451011213, | |
| "loss": 0.5413, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 0.6905146860684451, | |
| "grad_norm": 1.1296801567077637, | |
| "learning_rate": 0.00019246045669534464, | |
| "loss": 0.5394, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 0.6938830503907303, | |
| "grad_norm": 1.1637988090515137, | |
| "learning_rate": 0.00019217975888057714, | |
| "loss": 0.5405, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 0.6972514147130153, | |
| "grad_norm": 1.1942201852798462, | |
| "learning_rate": 0.00019189906106580962, | |
| "loss": 0.5401, | |
| "step": 20700 | |
| }, | |
| { | |
| "epoch": 0.7006197790353005, | |
| "grad_norm": 1.104561686515808, | |
| "learning_rate": 0.0001916183632510421, | |
| "loss": 0.5385, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 0.7039881433575855, | |
| "grad_norm": 1.0518121719360352, | |
| "learning_rate": 0.00019133766543627458, | |
| "loss": 0.5394, | |
| "step": 20900 | |
| }, | |
| { | |
| "epoch": 0.7073565076798707, | |
| "grad_norm": 1.0300666093826294, | |
| "learning_rate": 0.00019105696762150706, | |
| "loss": 0.5361, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.7107248720021557, | |
| "grad_norm": 0.9076865315437317, | |
| "learning_rate": 0.00019077626980673956, | |
| "loss": 0.5384, | |
| "step": 21100 | |
| }, | |
| { | |
| "epoch": 0.7140932363244409, | |
| "grad_norm": 1.170762062072754, | |
| "learning_rate": 0.00019049557199197204, | |
| "loss": 0.5356, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 0.7174616006467259, | |
| "grad_norm": 1.102295160293579, | |
| "learning_rate": 0.00019021487417720455, | |
| "loss": 0.5359, | |
| "step": 21300 | |
| }, | |
| { | |
| "epoch": 0.7208299649690111, | |
| "grad_norm": 1.102849006652832, | |
| "learning_rate": 0.00018993417636243703, | |
| "loss": 0.535, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 0.7241983292912961, | |
| "grad_norm": 0.9895302653312683, | |
| "learning_rate": 0.0001896534785476695, | |
| "loss": 0.533, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 0.7275666936135813, | |
| "grad_norm": 1.0017067193984985, | |
| "learning_rate": 0.000189372780732902, | |
| "loss": 0.5328, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 0.7309350579358663, | |
| "grad_norm": 1.068293809890747, | |
| "learning_rate": 0.0001890920829181345, | |
| "loss": 0.5355, | |
| "step": 21700 | |
| }, | |
| { | |
| "epoch": 0.7343034222581515, | |
| "grad_norm": 1.092910647392273, | |
| "learning_rate": 0.00018881138510336697, | |
| "loss": 0.5322, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 0.7376717865804365, | |
| "grad_norm": 1.0329002141952515, | |
| "learning_rate": 0.00018853068728859947, | |
| "loss": 0.5308, | |
| "step": 21900 | |
| }, | |
| { | |
| "epoch": 0.7410401509027217, | |
| "grad_norm": 1.1431453227996826, | |
| "learning_rate": 0.00018824998947383195, | |
| "loss": 0.5312, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.7444085152250067, | |
| "grad_norm": 0.9961342811584473, | |
| "learning_rate": 0.00018796929165906446, | |
| "loss": 0.5316, | |
| "step": 22100 | |
| }, | |
| { | |
| "epoch": 0.7477768795472919, | |
| "grad_norm": 0.9267546534538269, | |
| "learning_rate": 0.00018768859384429693, | |
| "loss": 0.5308, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 0.7511452438695769, | |
| "grad_norm": 1.0788689851760864, | |
| "learning_rate": 0.0001874078960295294, | |
| "loss": 0.5297, | |
| "step": 22300 | |
| }, | |
| { | |
| "epoch": 0.7545136081918621, | |
| "grad_norm": 1.0680807828903198, | |
| "learning_rate": 0.0001871271982147619, | |
| "loss": 0.5283, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 0.7578819725141471, | |
| "grad_norm": 1.122947096824646, | |
| "learning_rate": 0.0001868465003999944, | |
| "loss": 0.5268, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 0.7612503368364322, | |
| "grad_norm": 1.0286208391189575, | |
| "learning_rate": 0.0001865658025852269, | |
| "loss": 0.5264, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 0.7646187011587173, | |
| "grad_norm": 1.0122915506362915, | |
| "learning_rate": 0.00018628510477045938, | |
| "loss": 0.5261, | |
| "step": 22700 | |
| }, | |
| { | |
| "epoch": 0.7679870654810024, | |
| "grad_norm": 1.0254476070404053, | |
| "learning_rate": 0.00018600440695569186, | |
| "loss": 0.5253, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 0.7713554298032875, | |
| "grad_norm": 0.9192175269126892, | |
| "learning_rate": 0.00018572370914092434, | |
| "loss": 0.5235, | |
| "step": 22900 | |
| }, | |
| { | |
| "epoch": 0.7747237941255726, | |
| "grad_norm": 1.0937845706939697, | |
| "learning_rate": 0.00018544301132615684, | |
| "loss": 0.5243, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.7780921584478577, | |
| "grad_norm": 1.0288293361663818, | |
| "learning_rate": 0.00018516231351138932, | |
| "loss": 0.5221, | |
| "step": 23100 | |
| }, | |
| { | |
| "epoch": 0.7814605227701428, | |
| "grad_norm": 1.0520168542861938, | |
| "learning_rate": 0.0001848816156966218, | |
| "loss": 0.5237, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 0.7848288870924279, | |
| "grad_norm": 0.9760498404502869, | |
| "learning_rate": 0.0001846009178818543, | |
| "loss": 0.5245, | |
| "step": 23300 | |
| }, | |
| { | |
| "epoch": 0.788197251414713, | |
| "grad_norm": 1.0123729705810547, | |
| "learning_rate": 0.00018432022006708678, | |
| "loss": 0.5238, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 0.7915656157369981, | |
| "grad_norm": 0.9239659905433655, | |
| "learning_rate": 0.0001840395222523193, | |
| "loss": 0.5228, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 0.7949339800592832, | |
| "grad_norm": 0.964204728603363, | |
| "learning_rate": 0.00018375882443755177, | |
| "loss": 0.5202, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 0.7983023443815683, | |
| "grad_norm": 1.024375081062317, | |
| "learning_rate": 0.00018347812662278425, | |
| "loss": 0.5214, | |
| "step": 23700 | |
| }, | |
| { | |
| "epoch": 0.8016707087038534, | |
| "grad_norm": 0.9285891652107239, | |
| "learning_rate": 0.00018319742880801672, | |
| "loss": 0.5216, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 0.8050390730261385, | |
| "grad_norm": 0.9374035000801086, | |
| "learning_rate": 0.00018291673099324923, | |
| "loss": 0.5199, | |
| "step": 23900 | |
| }, | |
| { | |
| "epoch": 0.8084074373484236, | |
| "grad_norm": 0.9423925280570984, | |
| "learning_rate": 0.00018263603317848174, | |
| "loss": 0.5182, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 0.8117758016707087, | |
| "grad_norm": 0.9198417663574219, | |
| "learning_rate": 0.00018235533536371421, | |
| "loss": 0.5195, | |
| "step": 24100 | |
| }, | |
| { | |
| "epoch": 0.8151441659929938, | |
| "grad_norm": 0.8950690627098083, | |
| "learning_rate": 0.0001820746375489467, | |
| "loss": 0.5174, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 0.8185125303152789, | |
| "grad_norm": 0.9775617718696594, | |
| "learning_rate": 0.00018179393973417917, | |
| "loss": 0.5163, | |
| "step": 24300 | |
| }, | |
| { | |
| "epoch": 0.821880894637564, | |
| "grad_norm": 0.961654543876648, | |
| "learning_rate": 0.00018151324191941165, | |
| "loss": 0.5145, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 0.8252492589598491, | |
| "grad_norm": 0.884971559047699, | |
| "learning_rate": 0.00018123254410464415, | |
| "loss": 0.5159, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 0.8286176232821342, | |
| "grad_norm": 0.9463781118392944, | |
| "learning_rate": 0.00018095184628987666, | |
| "loss": 0.5147, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 0.8319859876044193, | |
| "grad_norm": 0.9335620999336243, | |
| "learning_rate": 0.00018067114847510914, | |
| "loss": 0.5148, | |
| "step": 24700 | |
| }, | |
| { | |
| "epoch": 0.8353543519267044, | |
| "grad_norm": 1.0065468549728394, | |
| "learning_rate": 0.00018039045066034162, | |
| "loss": 0.5145, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 0.8387227162489895, | |
| "grad_norm": 0.9249733686447144, | |
| "learning_rate": 0.0001801097528455741, | |
| "loss": 0.5144, | |
| "step": 24900 | |
| }, | |
| { | |
| "epoch": 0.8420910805712746, | |
| "grad_norm": 0.9696065783500671, | |
| "learning_rate": 0.0001798290550308066, | |
| "loss": 0.5146, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 0.8454594448935597, | |
| "grad_norm": 0.9490009546279907, | |
| "learning_rate": 0.00017954835721603908, | |
| "loss": 0.5128, | |
| "step": 25100 | |
| }, | |
| { | |
| "epoch": 0.8488278092158448, | |
| "grad_norm": 0.9294765591621399, | |
| "learning_rate": 0.00017926765940127156, | |
| "loss": 0.5128, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 0.8521961735381299, | |
| "grad_norm": 0.9910796284675598, | |
| "learning_rate": 0.00017898696158650406, | |
| "loss": 0.5118, | |
| "step": 25300 | |
| }, | |
| { | |
| "epoch": 0.855564537860415, | |
| "grad_norm": 0.9949105381965637, | |
| "learning_rate": 0.00017870626377173654, | |
| "loss": 0.511, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 0.8589329021827001, | |
| "grad_norm": 0.9345620274543762, | |
| "learning_rate": 0.00017842556595696905, | |
| "loss": 0.5119, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 0.8623012665049852, | |
| "grad_norm": 0.9553151726722717, | |
| "learning_rate": 0.00017814486814220153, | |
| "loss": 0.5103, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.8656696308272703, | |
| "grad_norm": 0.878685474395752, | |
| "learning_rate": 0.000177864170327434, | |
| "loss": 0.5112, | |
| "step": 25700 | |
| }, | |
| { | |
| "epoch": 0.8690379951495554, | |
| "grad_norm": 0.9728811979293823, | |
| "learning_rate": 0.00017758347251266648, | |
| "loss": 0.5088, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 0.8724063594718404, | |
| "grad_norm": 0.9711565375328064, | |
| "learning_rate": 0.000177302774697899, | |
| "loss": 0.5087, | |
| "step": 25900 | |
| }, | |
| { | |
| "epoch": 0.8757747237941256, | |
| "grad_norm": 0.9093062281608582, | |
| "learning_rate": 0.0001770220768831315, | |
| "loss": 0.5086, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 0.8791430881164106, | |
| "grad_norm": 0.9751853942871094, | |
| "learning_rate": 0.00017674137906836397, | |
| "loss": 0.5106, | |
| "step": 26100 | |
| }, | |
| { | |
| "epoch": 0.8825114524386958, | |
| "grad_norm": 0.9044291377067566, | |
| "learning_rate": 0.00017646068125359645, | |
| "loss": 0.5077, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 0.8858798167609808, | |
| "grad_norm": 0.9224226474761963, | |
| "learning_rate": 0.00017617998343882893, | |
| "loss": 0.5075, | |
| "step": 26300 | |
| }, | |
| { | |
| "epoch": 0.889248181083266, | |
| "grad_norm": 0.9981474876403809, | |
| "learning_rate": 0.0001758992856240614, | |
| "loss": 0.5048, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 0.892616545405551, | |
| "grad_norm": 0.8626927733421326, | |
| "learning_rate": 0.0001756185878092939, | |
| "loss": 0.506, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 0.8959849097278362, | |
| "grad_norm": 0.8800698518753052, | |
| "learning_rate": 0.00017533788999452642, | |
| "loss": 0.5067, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 0.8993532740501212, | |
| "grad_norm": 0.8937718272209167, | |
| "learning_rate": 0.0001750571921797589, | |
| "loss": 0.5059, | |
| "step": 26700 | |
| }, | |
| { | |
| "epoch": 0.9027216383724064, | |
| "grad_norm": 0.8680539727210999, | |
| "learning_rate": 0.00017477649436499138, | |
| "loss": 0.5074, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 0.9060900026946914, | |
| "grad_norm": 0.8701693415641785, | |
| "learning_rate": 0.00017449579655022385, | |
| "loss": 0.5048, | |
| "step": 26900 | |
| }, | |
| { | |
| "epoch": 0.9094583670169766, | |
| "grad_norm": 0.937451958656311, | |
| "learning_rate": 0.00017421509873545636, | |
| "loss": 0.5036, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 0.9128267313392616, | |
| "grad_norm": 0.845152735710144, | |
| "learning_rate": 0.00017393440092068884, | |
| "loss": 0.5015, | |
| "step": 27100 | |
| }, | |
| { | |
| "epoch": 0.9161950956615468, | |
| "grad_norm": 0.8485780358314514, | |
| "learning_rate": 0.00017365370310592132, | |
| "loss": 0.5021, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 0.9195634599838318, | |
| "grad_norm": 0.8812822699546814, | |
| "learning_rate": 0.00017337300529115382, | |
| "loss": 0.5028, | |
| "step": 27300 | |
| }, | |
| { | |
| "epoch": 0.922931824306117, | |
| "grad_norm": 0.9817461371421814, | |
| "learning_rate": 0.0001730923074763863, | |
| "loss": 0.5032, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 0.926300188628402, | |
| "grad_norm": 0.8648643493652344, | |
| "learning_rate": 0.0001728116096616188, | |
| "loss": 0.5023, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 0.9296685529506872, | |
| "grad_norm": 0.8859161734580994, | |
| "learning_rate": 0.00017253091184685128, | |
| "loss": 0.5022, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 0.9330369172729722, | |
| "grad_norm": 0.8662147521972656, | |
| "learning_rate": 0.00017225021403208376, | |
| "loss": 0.5, | |
| "step": 27700 | |
| }, | |
| { | |
| "epoch": 0.9364052815952574, | |
| "grad_norm": 0.9094113111495972, | |
| "learning_rate": 0.00017196951621731624, | |
| "loss": 0.5018, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 0.9397736459175424, | |
| "grad_norm": 0.924689531326294, | |
| "learning_rate": 0.00017168881840254875, | |
| "loss": 0.5008, | |
| "step": 27900 | |
| }, | |
| { | |
| "epoch": 0.9431420102398276, | |
| "grad_norm": 0.8770294785499573, | |
| "learning_rate": 0.00017140812058778125, | |
| "loss": 0.5023, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 0.9465103745621126, | |
| "grad_norm": 0.8615702390670776, | |
| "learning_rate": 0.00017112742277301373, | |
| "loss": 0.4988, | |
| "step": 28100 | |
| }, | |
| { | |
| "epoch": 0.9498787388843978, | |
| "grad_norm": 0.9163374304771423, | |
| "learning_rate": 0.0001708467249582462, | |
| "loss": 0.5004, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 0.9532471032066828, | |
| "grad_norm": 0.8876280784606934, | |
| "learning_rate": 0.0001705660271434787, | |
| "loss": 0.5005, | |
| "step": 28300 | |
| }, | |
| { | |
| "epoch": 0.956615467528968, | |
| "grad_norm": 0.9345399737358093, | |
| "learning_rate": 0.00017028532932871117, | |
| "loss": 0.498, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 0.959983831851253, | |
| "grad_norm": 0.8554583191871643, | |
| "learning_rate": 0.00017000463151394367, | |
| "loss": 0.4995, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 0.9633521961735382, | |
| "grad_norm": 0.910744845867157, | |
| "learning_rate": 0.00016972393369917615, | |
| "loss": 0.499, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 0.9667205604958232, | |
| "grad_norm": 0.9200494289398193, | |
| "learning_rate": 0.00016944323588440866, | |
| "loss": 0.497, | |
| "step": 28700 | |
| }, | |
| { | |
| "epoch": 0.9700889248181084, | |
| "grad_norm": 0.821864902973175, | |
| "learning_rate": 0.00016916253806964113, | |
| "loss": 0.4976, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 0.9734572891403934, | |
| "grad_norm": 0.8839085698127747, | |
| "learning_rate": 0.0001688818402548736, | |
| "loss": 0.4981, | |
| "step": 28900 | |
| }, | |
| { | |
| "epoch": 0.9768256534626786, | |
| "grad_norm": 0.8938930630683899, | |
| "learning_rate": 0.00016860114244010612, | |
| "loss": 0.4982, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 0.9801940177849636, | |
| "grad_norm": 0.8309621810913086, | |
| "learning_rate": 0.0001683204446253386, | |
| "loss": 0.4971, | |
| "step": 29100 | |
| }, | |
| { | |
| "epoch": 0.9835623821072487, | |
| "grad_norm": 0.8898798227310181, | |
| "learning_rate": 0.00016803974681057107, | |
| "loss": 0.4981, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 0.9869307464295338, | |
| "grad_norm": 0.9762869477272034, | |
| "learning_rate": 0.00016775904899580358, | |
| "loss": 0.4968, | |
| "step": 29300 | |
| }, | |
| { | |
| "epoch": 0.9902991107518189, | |
| "grad_norm": 0.8826524615287781, | |
| "learning_rate": 0.00016747835118103606, | |
| "loss": 0.4983, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 0.993667475074104, | |
| "grad_norm": 0.8983336687088013, | |
| "learning_rate": 0.00016719765336626856, | |
| "loss": 0.4964, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 0.9970358393963891, | |
| "grad_norm": 0.8700274229049683, | |
| "learning_rate": 0.00016691695555150104, | |
| "loss": 0.496, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 0.49384912848472595, | |
| "eval_runtime": 9.0835, | |
| "eval_samples_per_second": 550.447, | |
| "eval_steps_per_second": 8.697, | |
| "step": 29688 | |
| }, | |
| { | |
| "epoch": 1.0004042037186742, | |
| "grad_norm": 0.9031352996826172, | |
| "learning_rate": 0.00016663625773673352, | |
| "loss": 0.4948, | |
| "step": 29700 | |
| }, | |
| { | |
| "epoch": 1.0037725680409593, | |
| "grad_norm": 0.8552715182304382, | |
| "learning_rate": 0.000166355559921966, | |
| "loss": 0.4954, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 1.0071409323632443, | |
| "grad_norm": 0.8794796466827393, | |
| "learning_rate": 0.0001660748621071985, | |
| "loss": 0.4944, | |
| "step": 29900 | |
| }, | |
| { | |
| "epoch": 1.0105092966855296, | |
| "grad_norm": 0.876146137714386, | |
| "learning_rate": 0.000165794164292431, | |
| "loss": 0.4954, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 1.0138776610078146, | |
| "grad_norm": 0.8548246026039124, | |
| "learning_rate": 0.0001655134664776635, | |
| "loss": 0.4957, | |
| "step": 30100 | |
| }, | |
| { | |
| "epoch": 1.0172460253300997, | |
| "grad_norm": 0.8883000016212463, | |
| "learning_rate": 0.00016523276866289597, | |
| "loss": 0.4939, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 1.0206143896523847, | |
| "grad_norm": 0.8102014064788818, | |
| "learning_rate": 0.00016495207084812845, | |
| "loss": 0.491, | |
| "step": 30300 | |
| }, | |
| { | |
| "epoch": 1.02398275397467, | |
| "grad_norm": 0.9280298948287964, | |
| "learning_rate": 0.00016467137303336095, | |
| "loss": 0.4939, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 1.027351118296955, | |
| "grad_norm": 0.9322350025177002, | |
| "learning_rate": 0.00016439067521859343, | |
| "loss": 0.4923, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 1.03071948261924, | |
| "grad_norm": 0.8731549978256226, | |
| "learning_rate": 0.0001641099774038259, | |
| "loss": 0.4929, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 1.0340878469415251, | |
| "grad_norm": 0.8500041365623474, | |
| "learning_rate": 0.00016382927958905841, | |
| "loss": 0.492, | |
| "step": 30700 | |
| }, | |
| { | |
| "epoch": 1.0374562112638104, | |
| "grad_norm": 0.8375087976455688, | |
| "learning_rate": 0.0001635485817742909, | |
| "loss": 0.4917, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 1.0408245755860954, | |
| "grad_norm": 0.8288936018943787, | |
| "learning_rate": 0.0001632678839595234, | |
| "loss": 0.4928, | |
| "step": 30900 | |
| }, | |
| { | |
| "epoch": 1.0441929399083805, | |
| "grad_norm": 0.8341562151908875, | |
| "learning_rate": 0.00016298718614475588, | |
| "loss": 0.4889, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 1.0475613042306655, | |
| "grad_norm": 0.8432872891426086, | |
| "learning_rate": 0.00016270648832998835, | |
| "loss": 0.4915, | |
| "step": 31100 | |
| }, | |
| { | |
| "epoch": 1.0509296685529508, | |
| "grad_norm": 0.8462439775466919, | |
| "learning_rate": 0.00016242579051522083, | |
| "loss": 0.4883, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 1.0542980328752358, | |
| "grad_norm": 0.8429282903671265, | |
| "learning_rate": 0.00016214509270045334, | |
| "loss": 0.4895, | |
| "step": 31300 | |
| }, | |
| { | |
| "epoch": 1.0576663971975209, | |
| "grad_norm": 0.8985344767570496, | |
| "learning_rate": 0.00016186439488568584, | |
| "loss": 0.4906, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 1.061034761519806, | |
| "grad_norm": 0.9159397482872009, | |
| "learning_rate": 0.00016158369707091832, | |
| "loss": 0.4891, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 1.0644031258420912, | |
| "grad_norm": 0.8448222279548645, | |
| "learning_rate": 0.0001613029992561508, | |
| "loss": 0.4891, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 1.0677714901643762, | |
| "grad_norm": 0.8303894400596619, | |
| "learning_rate": 0.00016102230144138328, | |
| "loss": 0.4902, | |
| "step": 31700 | |
| }, | |
| { | |
| "epoch": 1.0711398544866613, | |
| "grad_norm": 0.8498880863189697, | |
| "learning_rate": 0.00016074160362661576, | |
| "loss": 0.4871, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 1.0745082188089463, | |
| "grad_norm": 0.7907134294509888, | |
| "learning_rate": 0.00016046090581184826, | |
| "loss": 0.4885, | |
| "step": 31900 | |
| }, | |
| { | |
| "epoch": 1.0778765831312316, | |
| "grad_norm": 0.9202895164489746, | |
| "learning_rate": 0.00016018020799708077, | |
| "loss": 0.4888, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 1.0812449474535166, | |
| "grad_norm": 0.8670128583908081, | |
| "learning_rate": 0.00015989951018231325, | |
| "loss": 0.4859, | |
| "step": 32100 | |
| }, | |
| { | |
| "epoch": 1.0846133117758017, | |
| "grad_norm": 0.8007021546363831, | |
| "learning_rate": 0.00015961881236754573, | |
| "loss": 0.4885, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 1.0879816760980867, | |
| "grad_norm": 0.9113264083862305, | |
| "learning_rate": 0.0001593381145527782, | |
| "loss": 0.4876, | |
| "step": 32300 | |
| }, | |
| { | |
| "epoch": 1.0913500404203718, | |
| "grad_norm": 0.8807794451713562, | |
| "learning_rate": 0.0001590574167380107, | |
| "loss": 0.489, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 1.094718404742657, | |
| "grad_norm": 0.8606187105178833, | |
| "learning_rate": 0.0001587767189232432, | |
| "loss": 0.489, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 1.098086769064942, | |
| "grad_norm": 0.8390567898750305, | |
| "learning_rate": 0.00015849602110847567, | |
| "loss": 0.4874, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 1.1014551333872271, | |
| "grad_norm": 0.8143624663352966, | |
| "learning_rate": 0.00015821532329370817, | |
| "loss": 0.4883, | |
| "step": 32700 | |
| }, | |
| { | |
| "epoch": 1.1048234977095122, | |
| "grad_norm": 0.9023911356925964, | |
| "learning_rate": 0.00015793462547894065, | |
| "loss": 0.4884, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 1.1081918620317974, | |
| "grad_norm": 0.9291363954544067, | |
| "learning_rate": 0.00015765392766417316, | |
| "loss": 0.4869, | |
| "step": 32900 | |
| }, | |
| { | |
| "epoch": 1.1115602263540825, | |
| "grad_norm": 0.834904134273529, | |
| "learning_rate": 0.00015737322984940563, | |
| "loss": 0.4863, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 1.1149285906763675, | |
| "grad_norm": 0.8896390795707703, | |
| "learning_rate": 0.0001570925320346381, | |
| "loss": 0.485, | |
| "step": 33100 | |
| }, | |
| { | |
| "epoch": 1.1182969549986526, | |
| "grad_norm": 0.8215962648391724, | |
| "learning_rate": 0.0001568118342198706, | |
| "loss": 0.4867, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 1.1216653193209378, | |
| "grad_norm": 0.8174338936805725, | |
| "learning_rate": 0.0001565311364051031, | |
| "loss": 0.4865, | |
| "step": 33300 | |
| }, | |
| { | |
| "epoch": 1.1250336836432229, | |
| "grad_norm": 0.8599314093589783, | |
| "learning_rate": 0.0001562504385903356, | |
| "loss": 0.4832, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 1.128402047965508, | |
| "grad_norm": 0.7674278020858765, | |
| "learning_rate": 0.00015596974077556808, | |
| "loss": 0.4846, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 1.131770412287793, | |
| "grad_norm": 0.8474441170692444, | |
| "learning_rate": 0.00015568904296080056, | |
| "loss": 0.4848, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 1.1351387766100782, | |
| "grad_norm": 0.8045397996902466, | |
| "learning_rate": 0.00015540834514603304, | |
| "loss": 0.483, | |
| "step": 33700 | |
| }, | |
| { | |
| "epoch": 1.1385071409323633, | |
| "grad_norm": 0.8756964802742004, | |
| "learning_rate": 0.00015512764733126552, | |
| "loss": 0.4836, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 1.1418755052546483, | |
| "grad_norm": 0.8357768654823303, | |
| "learning_rate": 0.00015484694951649802, | |
| "loss": 0.4852, | |
| "step": 33900 | |
| }, | |
| { | |
| "epoch": 1.1452438695769334, | |
| "grad_norm": 0.9370204210281372, | |
| "learning_rate": 0.0001545662517017305, | |
| "loss": 0.4839, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 1.1486122338992186, | |
| "grad_norm": 0.8853762149810791, | |
| "learning_rate": 0.000154285553886963, | |
| "loss": 0.4844, | |
| "step": 34100 | |
| }, | |
| { | |
| "epoch": 1.1519805982215037, | |
| "grad_norm": 0.7827624678611755, | |
| "learning_rate": 0.00015400485607219548, | |
| "loss": 0.4832, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 1.1553489625437887, | |
| "grad_norm": 0.8665288090705872, | |
| "learning_rate": 0.00015372415825742796, | |
| "loss": 0.4838, | |
| "step": 34300 | |
| }, | |
| { | |
| "epoch": 1.1587173268660738, | |
| "grad_norm": 0.8360339403152466, | |
| "learning_rate": 0.00015344346044266047, | |
| "loss": 0.4821, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 1.162085691188359, | |
| "grad_norm": 0.8605954051017761, | |
| "learning_rate": 0.00015316276262789295, | |
| "loss": 0.4825, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 1.165454055510644, | |
| "grad_norm": 0.857475221157074, | |
| "learning_rate": 0.00015288206481312542, | |
| "loss": 0.4827, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 1.1688224198329291, | |
| "grad_norm": 0.8108141422271729, | |
| "learning_rate": 0.00015260136699835793, | |
| "loss": 0.4803, | |
| "step": 34700 | |
| }, | |
| { | |
| "epoch": 1.1721907841552142, | |
| "grad_norm": 0.8359714150428772, | |
| "learning_rate": 0.0001523206691835904, | |
| "loss": 0.4809, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 1.1755591484774994, | |
| "grad_norm": 0.8128540515899658, | |
| "learning_rate": 0.00015203997136882291, | |
| "loss": 0.4823, | |
| "step": 34900 | |
| }, | |
| { | |
| "epoch": 1.1789275127997845, | |
| "grad_norm": 0.8871669769287109, | |
| "learning_rate": 0.0001517592735540554, | |
| "loss": 0.4806, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 1.1822958771220695, | |
| "grad_norm": 0.8477233052253723, | |
| "learning_rate": 0.00015147857573928787, | |
| "loss": 0.481, | |
| "step": 35100 | |
| }, | |
| { | |
| "epoch": 1.1856642414443546, | |
| "grad_norm": 0.7827205061912537, | |
| "learning_rate": 0.00015119787792452035, | |
| "loss": 0.4792, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 1.1890326057666396, | |
| "grad_norm": 0.8286157250404358, | |
| "learning_rate": 0.00015091718010975286, | |
| "loss": 0.4782, | |
| "step": 35300 | |
| }, | |
| { | |
| "epoch": 1.1924009700889249, | |
| "grad_norm": 0.76893150806427, | |
| "learning_rate": 0.00015063648229498536, | |
| "loss": 0.4805, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 1.19576933441121, | |
| "grad_norm": 0.8076749444007874, | |
| "learning_rate": 0.00015035578448021784, | |
| "loss": 0.4813, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 1.199137698733495, | |
| "grad_norm": 0.8551127910614014, | |
| "learning_rate": 0.00015007508666545032, | |
| "loss": 0.4797, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 1.2025060630557802, | |
| "grad_norm": 0.9260111451148987, | |
| "learning_rate": 0.0001497943888506828, | |
| "loss": 0.4801, | |
| "step": 35700 | |
| }, | |
| { | |
| "epoch": 1.2058744273780653, | |
| "grad_norm": 0.9091964960098267, | |
| "learning_rate": 0.00014951369103591527, | |
| "loss": 0.4782, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 1.2092427917003503, | |
| "grad_norm": 0.8588406443595886, | |
| "learning_rate": 0.00014923299322114778, | |
| "loss": 0.4806, | |
| "step": 35900 | |
| }, | |
| { | |
| "epoch": 1.2126111560226354, | |
| "grad_norm": 0.8295513391494751, | |
| "learning_rate": 0.00014895229540638026, | |
| "loss": 0.479, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 1.2159795203449204, | |
| "grad_norm": 0.8360409736633301, | |
| "learning_rate": 0.00014867159759161276, | |
| "loss": 0.4793, | |
| "step": 36100 | |
| }, | |
| { | |
| "epoch": 1.2193478846672057, | |
| "grad_norm": 0.8704560995101929, | |
| "learning_rate": 0.00014839089977684524, | |
| "loss": 0.4788, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 1.2227162489894907, | |
| "grad_norm": 0.8278842568397522, | |
| "learning_rate": 0.00014811020196207772, | |
| "loss": 0.4796, | |
| "step": 36300 | |
| }, | |
| { | |
| "epoch": 1.2260846133117758, | |
| "grad_norm": 0.8524438142776489, | |
| "learning_rate": 0.00014782950414731023, | |
| "loss": 0.4784, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 1.2294529776340608, | |
| "grad_norm": 0.7825035452842712, | |
| "learning_rate": 0.0001475488063325427, | |
| "loss": 0.4783, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 1.232821341956346, | |
| "grad_norm": 0.8001949787139893, | |
| "learning_rate": 0.00014726810851777518, | |
| "loss": 0.4789, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 1.2361897062786311, | |
| "grad_norm": 0.7923149466514587, | |
| "learning_rate": 0.0001469874107030077, | |
| "loss": 0.4788, | |
| "step": 36700 | |
| }, | |
| { | |
| "epoch": 1.2395580706009162, | |
| "grad_norm": 0.8405751585960388, | |
| "learning_rate": 0.0001467067128882402, | |
| "loss": 0.4773, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 1.2429264349232012, | |
| "grad_norm": 0.8324115872383118, | |
| "learning_rate": 0.00014642601507347267, | |
| "loss": 0.4782, | |
| "step": 36900 | |
| }, | |
| { | |
| "epoch": 1.2462947992454865, | |
| "grad_norm": 0.8548023700714111, | |
| "learning_rate": 0.00014614531725870515, | |
| "loss": 0.4798, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 1.2496631635677715, | |
| "grad_norm": 0.8439319729804993, | |
| "learning_rate": 0.00014586461944393763, | |
| "loss": 0.4757, | |
| "step": 37100 | |
| }, | |
| { | |
| "epoch": 1.2530315278900566, | |
| "grad_norm": 0.7825635075569153, | |
| "learning_rate": 0.0001455839216291701, | |
| "loss": 0.4783, | |
| "step": 37200 | |
| }, | |
| { | |
| "epoch": 1.2563998922123416, | |
| "grad_norm": 0.8164156675338745, | |
| "learning_rate": 0.0001453032238144026, | |
| "loss": 0.4779, | |
| "step": 37300 | |
| }, | |
| { | |
| "epoch": 1.2597682565346267, | |
| "grad_norm": 0.8076338768005371, | |
| "learning_rate": 0.00014502252599963512, | |
| "loss": 0.4773, | |
| "step": 37400 | |
| }, | |
| { | |
| "epoch": 1.263136620856912, | |
| "grad_norm": 0.8112064003944397, | |
| "learning_rate": 0.0001447418281848676, | |
| "loss": 0.4754, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 1.266504985179197, | |
| "grad_norm": 0.7940359711647034, | |
| "learning_rate": 0.00014446113037010008, | |
| "loss": 0.4745, | |
| "step": 37600 | |
| }, | |
| { | |
| "epoch": 1.269873349501482, | |
| "grad_norm": 0.8495946526527405, | |
| "learning_rate": 0.00014418043255533255, | |
| "loss": 0.4758, | |
| "step": 37700 | |
| }, | |
| { | |
| "epoch": 1.2732417138237673, | |
| "grad_norm": 0.8374922275543213, | |
| "learning_rate": 0.00014389973474056506, | |
| "loss": 0.4771, | |
| "step": 37800 | |
| }, | |
| { | |
| "epoch": 1.2766100781460523, | |
| "grad_norm": 0.8647417426109314, | |
| "learning_rate": 0.00014361903692579754, | |
| "loss": 0.4771, | |
| "step": 37900 | |
| }, | |
| { | |
| "epoch": 1.2799784424683374, | |
| "grad_norm": 0.8156632781028748, | |
| "learning_rate": 0.00014333833911103002, | |
| "loss": 0.4747, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 1.2833468067906224, | |
| "grad_norm": 0.7802369594573975, | |
| "learning_rate": 0.00014305764129626252, | |
| "loss": 0.4741, | |
| "step": 38100 | |
| }, | |
| { | |
| "epoch": 1.2867151711129075, | |
| "grad_norm": 0.7542524337768555, | |
| "learning_rate": 0.000142776943481495, | |
| "loss": 0.4761, | |
| "step": 38200 | |
| }, | |
| { | |
| "epoch": 1.2900835354351927, | |
| "grad_norm": 0.8326511383056641, | |
| "learning_rate": 0.0001424962456667275, | |
| "loss": 0.4734, | |
| "step": 38300 | |
| }, | |
| { | |
| "epoch": 1.2934518997574778, | |
| "grad_norm": 0.7556424736976624, | |
| "learning_rate": 0.00014221554785195998, | |
| "loss": 0.4757, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 1.2968202640797628, | |
| "grad_norm": 0.8151201605796814, | |
| "learning_rate": 0.00014193485003719246, | |
| "loss": 0.4743, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 1.300188628402048, | |
| "grad_norm": 0.8914119601249695, | |
| "learning_rate": 0.00014165415222242494, | |
| "loss": 0.4769, | |
| "step": 38600 | |
| }, | |
| { | |
| "epoch": 1.3035569927243331, | |
| "grad_norm": 0.8541133999824524, | |
| "learning_rate": 0.00014137345440765745, | |
| "loss": 0.4744, | |
| "step": 38700 | |
| }, | |
| { | |
| "epoch": 1.3069253570466182, | |
| "grad_norm": 0.8853744864463806, | |
| "learning_rate": 0.00014109275659288995, | |
| "loss": 0.474, | |
| "step": 38800 | |
| }, | |
| { | |
| "epoch": 1.3102937213689032, | |
| "grad_norm": 0.8547524809837341, | |
| "learning_rate": 0.00014081205877812243, | |
| "loss": 0.474, | |
| "step": 38900 | |
| }, | |
| { | |
| "epoch": 1.3136620856911883, | |
| "grad_norm": 0.7881298661231995, | |
| "learning_rate": 0.0001405313609633549, | |
| "loss": 0.4727, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 1.3170304500134735, | |
| "grad_norm": 0.7588589191436768, | |
| "learning_rate": 0.0001402506631485874, | |
| "loss": 0.473, | |
| "step": 39100 | |
| }, | |
| { | |
| "epoch": 1.3203988143357586, | |
| "grad_norm": 0.7980801463127136, | |
| "learning_rate": 0.00013996996533381987, | |
| "loss": 0.4727, | |
| "step": 39200 | |
| }, | |
| { | |
| "epoch": 1.3237671786580436, | |
| "grad_norm": 0.8034206628799438, | |
| "learning_rate": 0.00013968926751905237, | |
| "loss": 0.4737, | |
| "step": 39300 | |
| }, | |
| { | |
| "epoch": 1.3271355429803289, | |
| "grad_norm": 0.7804720401763916, | |
| "learning_rate": 0.00013940856970428485, | |
| "loss": 0.4754, | |
| "step": 39400 | |
| }, | |
| { | |
| "epoch": 1.330503907302614, | |
| "grad_norm": 0.8541818261146545, | |
| "learning_rate": 0.00013912787188951736, | |
| "loss": 0.4733, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 1.333872271624899, | |
| "grad_norm": 0.8339990377426147, | |
| "learning_rate": 0.00013884717407474983, | |
| "loss": 0.4721, | |
| "step": 39600 | |
| }, | |
| { | |
| "epoch": 1.337240635947184, | |
| "grad_norm": 0.8007979393005371, | |
| "learning_rate": 0.0001385664762599823, | |
| "loss": 0.4745, | |
| "step": 39700 | |
| }, | |
| { | |
| "epoch": 1.340609000269469, | |
| "grad_norm": 0.848199188709259, | |
| "learning_rate": 0.00013828577844521482, | |
| "loss": 0.4725, | |
| "step": 39800 | |
| }, | |
| { | |
| "epoch": 1.3439773645917543, | |
| "grad_norm": 0.9129810333251953, | |
| "learning_rate": 0.0001380050806304473, | |
| "loss": 0.4716, | |
| "step": 39900 | |
| }, | |
| { | |
| "epoch": 1.3473457289140394, | |
| "grad_norm": 0.869888186454773, | |
| "learning_rate": 0.00013772438281567978, | |
| "loss": 0.4744, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 1.3507140932363244, | |
| "grad_norm": 0.8916295170783997, | |
| "learning_rate": 0.00013744368500091228, | |
| "loss": 0.4712, | |
| "step": 40100 | |
| }, | |
| { | |
| "epoch": 1.3540824575586097, | |
| "grad_norm": 0.8144074082374573, | |
| "learning_rate": 0.00013716298718614476, | |
| "loss": 0.4734, | |
| "step": 40200 | |
| }, | |
| { | |
| "epoch": 1.3574508218808947, | |
| "grad_norm": 0.7844826579093933, | |
| "learning_rate": 0.00013688228937137726, | |
| "loss": 0.473, | |
| "step": 40300 | |
| }, | |
| { | |
| "epoch": 1.3608191862031798, | |
| "grad_norm": 0.8559306859970093, | |
| "learning_rate": 0.00013660159155660974, | |
| "loss": 0.4708, | |
| "step": 40400 | |
| }, | |
| { | |
| "epoch": 1.3641875505254648, | |
| "grad_norm": 0.7995209693908691, | |
| "learning_rate": 0.00013632089374184222, | |
| "loss": 0.472, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 1.3675559148477499, | |
| "grad_norm": 0.845758855342865, | |
| "learning_rate": 0.0001360401959270747, | |
| "loss": 0.4714, | |
| "step": 40600 | |
| }, | |
| { | |
| "epoch": 1.3709242791700351, | |
| "grad_norm": 0.8122411370277405, | |
| "learning_rate": 0.0001357594981123072, | |
| "loss": 0.4715, | |
| "step": 40700 | |
| }, | |
| { | |
| "epoch": 1.3742926434923202, | |
| "grad_norm": 0.7860530614852905, | |
| "learning_rate": 0.0001354788002975397, | |
| "loss": 0.4718, | |
| "step": 40800 | |
| }, | |
| { | |
| "epoch": 1.3776610078146052, | |
| "grad_norm": 0.7795781493186951, | |
| "learning_rate": 0.0001351981024827722, | |
| "loss": 0.4696, | |
| "step": 40900 | |
| }, | |
| { | |
| "epoch": 1.3810293721368903, | |
| "grad_norm": 0.7595000267028809, | |
| "learning_rate": 0.00013491740466800467, | |
| "loss": 0.4703, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 1.3843977364591753, | |
| "grad_norm": 0.8687454462051392, | |
| "learning_rate": 0.00013463670685323715, | |
| "loss": 0.4698, | |
| "step": 41100 | |
| }, | |
| { | |
| "epoch": 1.3877661007814606, | |
| "grad_norm": 0.8719391226768494, | |
| "learning_rate": 0.00013435600903846962, | |
| "loss": 0.4689, | |
| "step": 41200 | |
| }, | |
| { | |
| "epoch": 1.3911344651037456, | |
| "grad_norm": 0.8451808094978333, | |
| "learning_rate": 0.00013407531122370213, | |
| "loss": 0.4681, | |
| "step": 41300 | |
| }, | |
| { | |
| "epoch": 1.3945028294260307, | |
| "grad_norm": 0.8027797341346741, | |
| "learning_rate": 0.0001337946134089346, | |
| "loss": 0.4717, | |
| "step": 41400 | |
| }, | |
| { | |
| "epoch": 1.397871193748316, | |
| "grad_norm": 0.7488086819648743, | |
| "learning_rate": 0.00013351391559416711, | |
| "loss": 0.4694, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 1.401239558070601, | |
| "grad_norm": 0.8326307535171509, | |
| "learning_rate": 0.0001332332177793996, | |
| "loss": 0.4693, | |
| "step": 41600 | |
| }, | |
| { | |
| "epoch": 1.404607922392886, | |
| "grad_norm": 0.8087652325630188, | |
| "learning_rate": 0.00013295251996463207, | |
| "loss": 0.4684, | |
| "step": 41700 | |
| }, | |
| { | |
| "epoch": 1.407976286715171, | |
| "grad_norm": 0.7918603420257568, | |
| "learning_rate": 0.00013267182214986458, | |
| "loss": 0.47, | |
| "step": 41800 | |
| }, | |
| { | |
| "epoch": 1.411344651037456, | |
| "grad_norm": 0.8231304883956909, | |
| "learning_rate": 0.00013239112433509705, | |
| "loss": 0.4694, | |
| "step": 41900 | |
| }, | |
| { | |
| "epoch": 1.4147130153597414, | |
| "grad_norm": 0.7812530994415283, | |
| "learning_rate": 0.00013211042652032953, | |
| "loss": 0.4695, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 1.4180813796820264, | |
| "grad_norm": 0.854972779750824, | |
| "learning_rate": 0.00013182972870556204, | |
| "loss": 0.47, | |
| "step": 42100 | |
| }, | |
| { | |
| "epoch": 1.4214497440043115, | |
| "grad_norm": 0.8728025555610657, | |
| "learning_rate": 0.00013154903089079452, | |
| "loss": 0.468, | |
| "step": 42200 | |
| }, | |
| { | |
| "epoch": 1.4248181083265967, | |
| "grad_norm": 0.8394129276275635, | |
| "learning_rate": 0.00013126833307602702, | |
| "loss": 0.4698, | |
| "step": 42300 | |
| }, | |
| { | |
| "epoch": 1.4281864726488818, | |
| "grad_norm": 0.7810468673706055, | |
| "learning_rate": 0.0001309876352612595, | |
| "loss": 0.4694, | |
| "step": 42400 | |
| }, | |
| { | |
| "epoch": 1.4315548369711668, | |
| "grad_norm": 0.8251649737358093, | |
| "learning_rate": 0.00013070693744649198, | |
| "loss": 0.4651, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 1.4349232012934519, | |
| "grad_norm": 0.8438547253608704, | |
| "learning_rate": 0.00013042623963172446, | |
| "loss": 0.4685, | |
| "step": 42600 | |
| }, | |
| { | |
| "epoch": 1.438291565615737, | |
| "grad_norm": 0.7687946557998657, | |
| "learning_rate": 0.00013014554181695696, | |
| "loss": 0.4657, | |
| "step": 42700 | |
| }, | |
| { | |
| "epoch": 1.4416599299380222, | |
| "grad_norm": 0.7573995590209961, | |
| "learning_rate": 0.00012986484400218947, | |
| "loss": 0.4667, | |
| "step": 42800 | |
| }, | |
| { | |
| "epoch": 1.4450282942603072, | |
| "grad_norm": 0.8200283646583557, | |
| "learning_rate": 0.00012958414618742195, | |
| "loss": 0.4666, | |
| "step": 42900 | |
| }, | |
| { | |
| "epoch": 1.4483966585825923, | |
| "grad_norm": 0.8411341905593872, | |
| "learning_rate": 0.00012930344837265443, | |
| "loss": 0.4679, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 1.4517650229048775, | |
| "grad_norm": 0.8489885330200195, | |
| "learning_rate": 0.0001290227505578869, | |
| "loss": 0.4679, | |
| "step": 43100 | |
| }, | |
| { | |
| "epoch": 1.4551333872271626, | |
| "grad_norm": 0.8161250352859497, | |
| "learning_rate": 0.00012874205274311938, | |
| "loss": 0.4688, | |
| "step": 43200 | |
| }, | |
| { | |
| "epoch": 1.4585017515494476, | |
| "grad_norm": 0.7844269871711731, | |
| "learning_rate": 0.0001284613549283519, | |
| "loss": 0.4666, | |
| "step": 43300 | |
| }, | |
| { | |
| "epoch": 1.4618701158717327, | |
| "grad_norm": 0.7773265838623047, | |
| "learning_rate": 0.00012818065711358437, | |
| "loss": 0.4687, | |
| "step": 43400 | |
| }, | |
| { | |
| "epoch": 1.4652384801940177, | |
| "grad_norm": 0.8081590533256531, | |
| "learning_rate": 0.00012789995929881687, | |
| "loss": 0.4643, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 1.468606844516303, | |
| "grad_norm": 0.7888718843460083, | |
| "learning_rate": 0.00012761926148404935, | |
| "loss": 0.4682, | |
| "step": 43600 | |
| }, | |
| { | |
| "epoch": 1.471975208838588, | |
| "grad_norm": 0.7907763719558716, | |
| "learning_rate": 0.00012733856366928186, | |
| "loss": 0.4653, | |
| "step": 43700 | |
| }, | |
| { | |
| "epoch": 1.475343573160873, | |
| "grad_norm": 0.7945205569267273, | |
| "learning_rate": 0.00012705786585451433, | |
| "loss": 0.4648, | |
| "step": 43800 | |
| }, | |
| { | |
| "epoch": 1.478711937483158, | |
| "grad_norm": 0.8834030032157898, | |
| "learning_rate": 0.0001267771680397468, | |
| "loss": 0.4664, | |
| "step": 43900 | |
| }, | |
| { | |
| "epoch": 1.4820803018054431, | |
| "grad_norm": 0.7815008759498596, | |
| "learning_rate": 0.0001264964702249793, | |
| "loss": 0.4662, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 1.4854486661277284, | |
| "grad_norm": 0.8282730579376221, | |
| "learning_rate": 0.0001262157724102118, | |
| "loss": 0.4652, | |
| "step": 44100 | |
| }, | |
| { | |
| "epoch": 1.4888170304500135, | |
| "grad_norm": 0.7864588499069214, | |
| "learning_rate": 0.0001259350745954443, | |
| "loss": 0.4651, | |
| "step": 44200 | |
| }, | |
| { | |
| "epoch": 1.4921853947722985, | |
| "grad_norm": 0.7972845435142517, | |
| "learning_rate": 0.00012565437678067678, | |
| "loss": 0.4656, | |
| "step": 44300 | |
| }, | |
| { | |
| "epoch": 1.4955537590945838, | |
| "grad_norm": 0.8192013502120972, | |
| "learning_rate": 0.00012537367896590926, | |
| "loss": 0.4665, | |
| "step": 44400 | |
| }, | |
| { | |
| "epoch": 1.4989221234168688, | |
| "grad_norm": 0.8526120185852051, | |
| "learning_rate": 0.00012509298115114174, | |
| "loss": 0.4654, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 1.5022904877391539, | |
| "grad_norm": 0.8241577744483948, | |
| "learning_rate": 0.00012481228333637422, | |
| "loss": 0.4648, | |
| "step": 44600 | |
| }, | |
| { | |
| "epoch": 1.5056588520614391, | |
| "grad_norm": 0.8311729431152344, | |
| "learning_rate": 0.00012453158552160672, | |
| "loss": 0.4662, | |
| "step": 44700 | |
| }, | |
| { | |
| "epoch": 1.509027216383724, | |
| "grad_norm": 0.7880195379257202, | |
| "learning_rate": 0.00012425088770683923, | |
| "loss": 0.4637, | |
| "step": 44800 | |
| }, | |
| { | |
| "epoch": 1.5123955807060092, | |
| "grad_norm": 0.7668688893318176, | |
| "learning_rate": 0.0001239701898920717, | |
| "loss": 0.465, | |
| "step": 44900 | |
| }, | |
| { | |
| "epoch": 1.5157639450282943, | |
| "grad_norm": 0.8149063587188721, | |
| "learning_rate": 0.00012368949207730418, | |
| "loss": 0.4634, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 1.5191323093505793, | |
| "grad_norm": 0.7656127214431763, | |
| "learning_rate": 0.00012340879426253666, | |
| "loss": 0.4635, | |
| "step": 45100 | |
| }, | |
| { | |
| "epoch": 1.5225006736728646, | |
| "grad_norm": 0.8114592432975769, | |
| "learning_rate": 0.00012312809644776917, | |
| "loss": 0.4635, | |
| "step": 45200 | |
| }, | |
| { | |
| "epoch": 1.5258690379951494, | |
| "grad_norm": 0.8734049797058105, | |
| "learning_rate": 0.00012284739863300165, | |
| "loss": 0.4629, | |
| "step": 45300 | |
| }, | |
| { | |
| "epoch": 1.5292374023174347, | |
| "grad_norm": 0.806281328201294, | |
| "learning_rate": 0.00012256670081823413, | |
| "loss": 0.4644, | |
| "step": 45400 | |
| }, | |
| { | |
| "epoch": 1.5326057666397197, | |
| "grad_norm": 0.8073423504829407, | |
| "learning_rate": 0.00012228600300346663, | |
| "loss": 0.4626, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 1.5359741309620047, | |
| "grad_norm": 0.8023707270622253, | |
| "learning_rate": 0.00012200530518869911, | |
| "loss": 0.4637, | |
| "step": 45600 | |
| }, | |
| { | |
| "epoch": 1.53934249528429, | |
| "grad_norm": 0.821060299873352, | |
| "learning_rate": 0.00012172460737393161, | |
| "loss": 0.4624, | |
| "step": 45700 | |
| }, | |
| { | |
| "epoch": 1.542710859606575, | |
| "grad_norm": 0.7743229866027832, | |
| "learning_rate": 0.00012144390955916409, | |
| "loss": 0.4631, | |
| "step": 45800 | |
| }, | |
| { | |
| "epoch": 1.54607922392886, | |
| "grad_norm": 0.8501706719398499, | |
| "learning_rate": 0.00012116321174439657, | |
| "loss": 0.4646, | |
| "step": 45900 | |
| }, | |
| { | |
| "epoch": 1.5494475882511454, | |
| "grad_norm": 0.798643946647644, | |
| "learning_rate": 0.00012088251392962906, | |
| "loss": 0.4618, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 1.5528159525734302, | |
| "grad_norm": 0.771360456943512, | |
| "learning_rate": 0.00012060181611486154, | |
| "loss": 0.464, | |
| "step": 46100 | |
| }, | |
| { | |
| "epoch": 1.5561843168957155, | |
| "grad_norm": 0.7841131687164307, | |
| "learning_rate": 0.00012032111830009405, | |
| "loss": 0.4618, | |
| "step": 46200 | |
| }, | |
| { | |
| "epoch": 1.5595526812180005, | |
| "grad_norm": 0.7240998148918152, | |
| "learning_rate": 0.00012004042048532653, | |
| "loss": 0.4648, | |
| "step": 46300 | |
| }, | |
| { | |
| "epoch": 1.5629210455402855, | |
| "grad_norm": 0.8445931673049927, | |
| "learning_rate": 0.00011975972267055902, | |
| "loss": 0.4606, | |
| "step": 46400 | |
| }, | |
| { | |
| "epoch": 1.5662894098625708, | |
| "grad_norm": 0.8375403881072998, | |
| "learning_rate": 0.0001194790248557915, | |
| "loss": 0.4633, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 1.5696577741848559, | |
| "grad_norm": 0.7885960340499878, | |
| "learning_rate": 0.00011919832704102399, | |
| "loss": 0.4632, | |
| "step": 46600 | |
| }, | |
| { | |
| "epoch": 1.573026138507141, | |
| "grad_norm": 0.8243712186813354, | |
| "learning_rate": 0.0001189176292262565, | |
| "loss": 0.4618, | |
| "step": 46700 | |
| }, | |
| { | |
| "epoch": 1.5763945028294262, | |
| "grad_norm": 0.8182551860809326, | |
| "learning_rate": 0.00011863693141148897, | |
| "loss": 0.4607, | |
| "step": 46800 | |
| }, | |
| { | |
| "epoch": 1.579762867151711, | |
| "grad_norm": 0.7784871459007263, | |
| "learning_rate": 0.00011835623359672145, | |
| "loss": 0.4628, | |
| "step": 46900 | |
| }, | |
| { | |
| "epoch": 1.5831312314739963, | |
| "grad_norm": 0.8082338571548462, | |
| "learning_rate": 0.00011807553578195394, | |
| "loss": 0.4621, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 1.5864995957962813, | |
| "grad_norm": 0.8203257322311401, | |
| "learning_rate": 0.00011779483796718642, | |
| "loss": 0.461, | |
| "step": 47100 | |
| }, | |
| { | |
| "epoch": 1.5898679601185663, | |
| "grad_norm": 0.7920771837234497, | |
| "learning_rate": 0.00011751414015241893, | |
| "loss": 0.4611, | |
| "step": 47200 | |
| }, | |
| { | |
| "epoch": 1.5932363244408516, | |
| "grad_norm": 0.8124784827232361, | |
| "learning_rate": 0.0001172334423376514, | |
| "loss": 0.4598, | |
| "step": 47300 | |
| }, | |
| { | |
| "epoch": 1.5966046887631367, | |
| "grad_norm": 0.8094605803489685, | |
| "learning_rate": 0.0001169527445228839, | |
| "loss": 0.4605, | |
| "step": 47400 | |
| }, | |
| { | |
| "epoch": 1.5999730530854217, | |
| "grad_norm": 0.7639499306678772, | |
| "learning_rate": 0.00011667204670811638, | |
| "loss": 0.46, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 1.603341417407707, | |
| "grad_norm": 0.8600967526435852, | |
| "learning_rate": 0.00011639134889334887, | |
| "loss": 0.4623, | |
| "step": 47600 | |
| }, | |
| { | |
| "epoch": 1.6067097817299918, | |
| "grad_norm": 0.7747792601585388, | |
| "learning_rate": 0.00011611065107858137, | |
| "loss": 0.463, | |
| "step": 47700 | |
| }, | |
| { | |
| "epoch": 1.610078146052277, | |
| "grad_norm": 0.8040998578071594, | |
| "learning_rate": 0.00011582995326381385, | |
| "loss": 0.459, | |
| "step": 47800 | |
| }, | |
| { | |
| "epoch": 1.613446510374562, | |
| "grad_norm": 0.7648651003837585, | |
| "learning_rate": 0.00011554925544904633, | |
| "loss": 0.4618, | |
| "step": 47900 | |
| }, | |
| { | |
| "epoch": 1.6168148746968471, | |
| "grad_norm": 0.789125382900238, | |
| "learning_rate": 0.00011526855763427882, | |
| "loss": 0.4599, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 1.6201832390191324, | |
| "grad_norm": 0.8133670687675476, | |
| "learning_rate": 0.0001149878598195113, | |
| "loss": 0.4594, | |
| "step": 48100 | |
| }, | |
| { | |
| "epoch": 1.6235516033414175, | |
| "grad_norm": 0.7992141842842102, | |
| "learning_rate": 0.0001147071620047438, | |
| "loss": 0.4602, | |
| "step": 48200 | |
| }, | |
| { | |
| "epoch": 1.6269199676637025, | |
| "grad_norm": 0.780681312084198, | |
| "learning_rate": 0.00011442646418997628, | |
| "loss": 0.4587, | |
| "step": 48300 | |
| }, | |
| { | |
| "epoch": 1.6302883319859878, | |
| "grad_norm": 0.7979656457901001, | |
| "learning_rate": 0.00011414576637520878, | |
| "loss": 0.4587, | |
| "step": 48400 | |
| }, | |
| { | |
| "epoch": 1.6336566963082726, | |
| "grad_norm": 0.8527476787567139, | |
| "learning_rate": 0.00011386506856044125, | |
| "loss": 0.4586, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 1.6370250606305579, | |
| "grad_norm": 0.8187114000320435, | |
| "learning_rate": 0.00011358437074567375, | |
| "loss": 0.4611, | |
| "step": 48600 | |
| }, | |
| { | |
| "epoch": 1.640393424952843, | |
| "grad_norm": 0.7977433204650879, | |
| "learning_rate": 0.00011330367293090625, | |
| "loss": 0.4581, | |
| "step": 48700 | |
| }, | |
| { | |
| "epoch": 1.643761789275128, | |
| "grad_norm": 0.8355839252471924, | |
| "learning_rate": 0.00011302297511613873, | |
| "loss": 0.46, | |
| "step": 48800 | |
| }, | |
| { | |
| "epoch": 1.6471301535974132, | |
| "grad_norm": 0.7887241840362549, | |
| "learning_rate": 0.00011274227730137121, | |
| "loss": 0.4595, | |
| "step": 48900 | |
| }, | |
| { | |
| "epoch": 1.650498517919698, | |
| "grad_norm": 0.8219642639160156, | |
| "learning_rate": 0.0001124615794866037, | |
| "loss": 0.4605, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 1.6538668822419833, | |
| "grad_norm": 0.797517716884613, | |
| "learning_rate": 0.00011218088167183618, | |
| "loss": 0.46, | |
| "step": 49100 | |
| }, | |
| { | |
| "epoch": 1.6572352465642683, | |
| "grad_norm": 0.81880784034729, | |
| "learning_rate": 0.00011190018385706868, | |
| "loss": 0.4602, | |
| "step": 49200 | |
| }, | |
| { | |
| "epoch": 1.6606036108865534, | |
| "grad_norm": 0.8267971277236938, | |
| "learning_rate": 0.00011161948604230116, | |
| "loss": 0.4584, | |
| "step": 49300 | |
| }, | |
| { | |
| "epoch": 1.6639719752088387, | |
| "grad_norm": 0.8257302045822144, | |
| "learning_rate": 0.00011133878822753366, | |
| "loss": 0.4602, | |
| "step": 49400 | |
| }, | |
| { | |
| "epoch": 1.6673403395311237, | |
| "grad_norm": 0.7903374433517456, | |
| "learning_rate": 0.00011105809041276613, | |
| "loss": 0.4558, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 1.6707087038534087, | |
| "grad_norm": 0.7741321921348572, | |
| "learning_rate": 0.00011077739259799863, | |
| "loss": 0.4596, | |
| "step": 49600 | |
| }, | |
| { | |
| "epoch": 1.674077068175694, | |
| "grad_norm": 0.771134078502655, | |
| "learning_rate": 0.00011049669478323113, | |
| "loss": 0.4568, | |
| "step": 49700 | |
| }, | |
| { | |
| "epoch": 1.6774454324979788, | |
| "grad_norm": 0.7859461307525635, | |
| "learning_rate": 0.00011021599696846361, | |
| "loss": 0.4577, | |
| "step": 49800 | |
| }, | |
| { | |
| "epoch": 1.680813796820264, | |
| "grad_norm": 0.7759444117546082, | |
| "learning_rate": 0.00010993529915369609, | |
| "loss": 0.457, | |
| "step": 49900 | |
| }, | |
| { | |
| "epoch": 1.6841821611425492, | |
| "grad_norm": 0.8348528742790222, | |
| "learning_rate": 0.00010965460133892858, | |
| "loss": 0.4569, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 1.6875505254648342, | |
| "grad_norm": 0.8011546730995178, | |
| "learning_rate": 0.00010937390352416106, | |
| "loss": 0.4585, | |
| "step": 50100 | |
| }, | |
| { | |
| "epoch": 1.6909188897871195, | |
| "grad_norm": 0.790429413318634, | |
| "learning_rate": 0.00010909320570939356, | |
| "loss": 0.4582, | |
| "step": 50200 | |
| }, | |
| { | |
| "epoch": 1.6942872541094045, | |
| "grad_norm": 0.8371046781539917, | |
| "learning_rate": 0.00010881250789462604, | |
| "loss": 0.4591, | |
| "step": 50300 | |
| }, | |
| { | |
| "epoch": 1.6976556184316896, | |
| "grad_norm": 0.7836015820503235, | |
| "learning_rate": 0.00010853181007985853, | |
| "loss": 0.4581, | |
| "step": 50400 | |
| }, | |
| { | |
| "epoch": 1.7010239827539748, | |
| "grad_norm": 0.846708357334137, | |
| "learning_rate": 0.00010825111226509101, | |
| "loss": 0.4569, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 1.7043923470762596, | |
| "grad_norm": 0.797223687171936, | |
| "learning_rate": 0.00010797041445032352, | |
| "loss": 0.4569, | |
| "step": 50600 | |
| }, | |
| { | |
| "epoch": 1.707760711398545, | |
| "grad_norm": 0.8466051816940308, | |
| "learning_rate": 0.00010768971663555601, | |
| "loss": 0.4567, | |
| "step": 50700 | |
| }, | |
| { | |
| "epoch": 1.71112907572083, | |
| "grad_norm": 0.7285684943199158, | |
| "learning_rate": 0.00010740901882078849, | |
| "loss": 0.456, | |
| "step": 50800 | |
| }, | |
| { | |
| "epoch": 1.714497440043115, | |
| "grad_norm": 0.8624778985977173, | |
| "learning_rate": 0.00010712832100602097, | |
| "loss": 0.4588, | |
| "step": 50900 | |
| }, | |
| { | |
| "epoch": 1.7178658043654003, | |
| "grad_norm": 0.7958481311798096, | |
| "learning_rate": 0.00010684762319125346, | |
| "loss": 0.4566, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 1.7212341686876853, | |
| "grad_norm": 0.7974202036857605, | |
| "learning_rate": 0.00010656692537648596, | |
| "loss": 0.4566, | |
| "step": 51100 | |
| }, | |
| { | |
| "epoch": 1.7246025330099704, | |
| "grad_norm": 0.8782477378845215, | |
| "learning_rate": 0.00010628622756171844, | |
| "loss": 0.4577, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 1.7279708973322556, | |
| "grad_norm": 0.8142967820167542, | |
| "learning_rate": 0.00010600552974695092, | |
| "loss": 0.4543, | |
| "step": 51300 | |
| }, | |
| { | |
| "epoch": 1.7313392616545404, | |
| "grad_norm": 0.7704757452011108, | |
| "learning_rate": 0.00010572483193218341, | |
| "loss": 0.4565, | |
| "step": 51400 | |
| }, | |
| { | |
| "epoch": 1.7347076259768257, | |
| "grad_norm": 0.8298918604850769, | |
| "learning_rate": 0.00010544413411741589, | |
| "loss": 0.4564, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 1.7380759902991108, | |
| "grad_norm": 0.7840197682380676, | |
| "learning_rate": 0.0001051634363026484, | |
| "loss": 0.457, | |
| "step": 51600 | |
| }, | |
| { | |
| "epoch": 1.7414443546213958, | |
| "grad_norm": 0.8080000281333923, | |
| "learning_rate": 0.00010488273848788088, | |
| "loss": 0.4563, | |
| "step": 51700 | |
| }, | |
| { | |
| "epoch": 1.744812718943681, | |
| "grad_norm": 0.8133041262626648, | |
| "learning_rate": 0.00010460204067311337, | |
| "loss": 0.4529, | |
| "step": 51800 | |
| }, | |
| { | |
| "epoch": 1.7481810832659659, | |
| "grad_norm": 0.8792639374732971, | |
| "learning_rate": 0.00010432134285834585, | |
| "loss": 0.4536, | |
| "step": 51900 | |
| }, | |
| { | |
| "epoch": 1.7515494475882512, | |
| "grad_norm": 0.8580865263938904, | |
| "learning_rate": 0.00010404064504357834, | |
| "loss": 0.4542, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 1.7549178119105362, | |
| "grad_norm": 0.7759612798690796, | |
| "learning_rate": 0.00010375994722881084, | |
| "loss": 0.4557, | |
| "step": 52100 | |
| }, | |
| { | |
| "epoch": 1.7582861762328212, | |
| "grad_norm": 0.748423159122467, | |
| "learning_rate": 0.00010347924941404332, | |
| "loss": 0.454, | |
| "step": 52200 | |
| }, | |
| { | |
| "epoch": 1.7616545405551065, | |
| "grad_norm": 0.7873731851577759, | |
| "learning_rate": 0.0001031985515992758, | |
| "loss": 0.4543, | |
| "step": 52300 | |
| }, | |
| { | |
| "epoch": 1.7650229048773916, | |
| "grad_norm": 0.7736590504646301, | |
| "learning_rate": 0.00010291785378450829, | |
| "loss": 0.4556, | |
| "step": 52400 | |
| }, | |
| { | |
| "epoch": 1.7683912691996766, | |
| "grad_norm": 0.7629456520080566, | |
| "learning_rate": 0.00010263715596974077, | |
| "loss": 0.4545, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 1.7717596335219619, | |
| "grad_norm": 0.8270254731178284, | |
| "learning_rate": 0.00010235645815497328, | |
| "loss": 0.4546, | |
| "step": 52600 | |
| }, | |
| { | |
| "epoch": 1.7751279978442467, | |
| "grad_norm": 0.7610684633255005, | |
| "learning_rate": 0.00010207576034020576, | |
| "loss": 0.4527, | |
| "step": 52700 | |
| }, | |
| { | |
| "epoch": 1.778496362166532, | |
| "grad_norm": 0.8228756785392761, | |
| "learning_rate": 0.00010179506252543825, | |
| "loss": 0.4568, | |
| "step": 52800 | |
| }, | |
| { | |
| "epoch": 1.781864726488817, | |
| "grad_norm": 0.8317448496818542, | |
| "learning_rate": 0.00010151436471067073, | |
| "loss": 0.4543, | |
| "step": 52900 | |
| }, | |
| { | |
| "epoch": 1.785233090811102, | |
| "grad_norm": 0.7914367318153381, | |
| "learning_rate": 0.00010123366689590322, | |
| "loss": 0.4529, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 1.7886014551333873, | |
| "grad_norm": 0.8205628395080566, | |
| "learning_rate": 0.00010095296908113572, | |
| "loss": 0.4537, | |
| "step": 53100 | |
| }, | |
| { | |
| "epoch": 1.7919698194556724, | |
| "grad_norm": 0.794956386089325, | |
| "learning_rate": 0.0001006722712663682, | |
| "loss": 0.455, | |
| "step": 53200 | |
| }, | |
| { | |
| "epoch": 1.7953381837779574, | |
| "grad_norm": 0.8285955786705017, | |
| "learning_rate": 0.00010039157345160068, | |
| "loss": 0.4535, | |
| "step": 53300 | |
| }, | |
| { | |
| "epoch": 1.7987065481002427, | |
| "grad_norm": 0.8204521536827087, | |
| "learning_rate": 0.00010011087563683317, | |
| "loss": 0.4561, | |
| "step": 53400 | |
| }, | |
| { | |
| "epoch": 1.8020749124225275, | |
| "grad_norm": 0.8407822251319885, | |
| "learning_rate": 9.983017782206566e-05, | |
| "loss": 0.4563, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 1.8054432767448128, | |
| "grad_norm": 0.8138654828071594, | |
| "learning_rate": 9.954948000729814e-05, | |
| "loss": 0.4547, | |
| "step": 53600 | |
| }, | |
| { | |
| "epoch": 1.8088116410670978, | |
| "grad_norm": 0.8514792323112488, | |
| "learning_rate": 9.926878219253063e-05, | |
| "loss": 0.453, | |
| "step": 53700 | |
| }, | |
| { | |
| "epoch": 1.8121800053893828, | |
| "grad_norm": 0.836942195892334, | |
| "learning_rate": 9.898808437776313e-05, | |
| "loss": 0.4554, | |
| "step": 53800 | |
| }, | |
| { | |
| "epoch": 1.815548369711668, | |
| "grad_norm": 0.8424620628356934, | |
| "learning_rate": 9.87073865629956e-05, | |
| "loss": 0.4541, | |
| "step": 53900 | |
| }, | |
| { | |
| "epoch": 1.8189167340339532, | |
| "grad_norm": 0.7823119163513184, | |
| "learning_rate": 9.84266887482281e-05, | |
| "loss": 0.4539, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 1.8222850983562382, | |
| "grad_norm": 0.8232121467590332, | |
| "learning_rate": 9.814599093346059e-05, | |
| "loss": 0.4518, | |
| "step": 54100 | |
| }, | |
| { | |
| "epoch": 1.8256534626785235, | |
| "grad_norm": 0.7991457581520081, | |
| "learning_rate": 9.786529311869308e-05, | |
| "loss": 0.4516, | |
| "step": 54200 | |
| }, | |
| { | |
| "epoch": 1.8290218270008083, | |
| "grad_norm": 0.7749050855636597, | |
| "learning_rate": 9.758459530392556e-05, | |
| "loss": 0.4528, | |
| "step": 54300 | |
| }, | |
| { | |
| "epoch": 1.8323901913230936, | |
| "grad_norm": 0.7452788949012756, | |
| "learning_rate": 9.730389748915805e-05, | |
| "loss": 0.4555, | |
| "step": 54400 | |
| }, | |
| { | |
| "epoch": 1.8357585556453786, | |
| "grad_norm": 0.816150963306427, | |
| "learning_rate": 9.702319967439054e-05, | |
| "loss": 0.4514, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 1.8391269199676636, | |
| "grad_norm": 0.785351037979126, | |
| "learning_rate": 9.674250185962302e-05, | |
| "loss": 0.4517, | |
| "step": 54600 | |
| }, | |
| { | |
| "epoch": 1.842495284289949, | |
| "grad_norm": 0.828187108039856, | |
| "learning_rate": 9.646180404485551e-05, | |
| "loss": 0.4533, | |
| "step": 54700 | |
| }, | |
| { | |
| "epoch": 1.845863648612234, | |
| "grad_norm": 0.7950621247291565, | |
| "learning_rate": 9.6181106230088e-05, | |
| "loss": 0.4523, | |
| "step": 54800 | |
| }, | |
| { | |
| "epoch": 1.849232012934519, | |
| "grad_norm": 0.7881097197532654, | |
| "learning_rate": 9.590040841532048e-05, | |
| "loss": 0.4516, | |
| "step": 54900 | |
| }, | |
| { | |
| "epoch": 1.852600377256804, | |
| "grad_norm": 0.7643069624900818, | |
| "learning_rate": 9.561971060055298e-05, | |
| "loss": 0.453, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 1.855968741579089, | |
| "grad_norm": 0.7741556167602539, | |
| "learning_rate": 9.533901278578547e-05, | |
| "loss": 0.4528, | |
| "step": 55100 | |
| }, | |
| { | |
| "epoch": 1.8593371059013744, | |
| "grad_norm": 0.8289052844047546, | |
| "learning_rate": 9.505831497101796e-05, | |
| "loss": 0.452, | |
| "step": 55200 | |
| }, | |
| { | |
| "epoch": 1.8627054702236594, | |
| "grad_norm": 0.7747401595115662, | |
| "learning_rate": 9.477761715625044e-05, | |
| "loss": 0.453, | |
| "step": 55300 | |
| }, | |
| { | |
| "epoch": 1.8660738345459444, | |
| "grad_norm": 0.837910532951355, | |
| "learning_rate": 9.449691934148293e-05, | |
| "loss": 0.4532, | |
| "step": 55400 | |
| }, | |
| { | |
| "epoch": 1.8694421988682297, | |
| "grad_norm": 0.7754988670349121, | |
| "learning_rate": 9.421622152671542e-05, | |
| "loss": 0.4497, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 1.8728105631905145, | |
| "grad_norm": 0.8681314587593079, | |
| "learning_rate": 9.39355237119479e-05, | |
| "loss": 0.451, | |
| "step": 55600 | |
| }, | |
| { | |
| "epoch": 1.8761789275127998, | |
| "grad_norm": 0.8410942554473877, | |
| "learning_rate": 9.365482589718039e-05, | |
| "loss": 0.451, | |
| "step": 55700 | |
| }, | |
| { | |
| "epoch": 1.8795472918350848, | |
| "grad_norm": 0.8542850613594055, | |
| "learning_rate": 9.337412808241288e-05, | |
| "loss": 0.4524, | |
| "step": 55800 | |
| }, | |
| { | |
| "epoch": 1.8829156561573699, | |
| "grad_norm": 0.806122899055481, | |
| "learning_rate": 9.309343026764538e-05, | |
| "loss": 0.4535, | |
| "step": 55900 | |
| }, | |
| { | |
| "epoch": 1.8862840204796552, | |
| "grad_norm": 0.8103610277175903, | |
| "learning_rate": 9.281273245287786e-05, | |
| "loss": 0.4514, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 1.8896523848019402, | |
| "grad_norm": 0.7982548475265503, | |
| "learning_rate": 9.253203463811035e-05, | |
| "loss": 0.4504, | |
| "step": 56100 | |
| }, | |
| { | |
| "epoch": 1.8930207491242252, | |
| "grad_norm": 0.8081793189048767, | |
| "learning_rate": 9.225133682334284e-05, | |
| "loss": 0.4522, | |
| "step": 56200 | |
| }, | |
| { | |
| "epoch": 1.8963891134465105, | |
| "grad_norm": 0.8284481763839722, | |
| "learning_rate": 9.197063900857532e-05, | |
| "loss": 0.4501, | |
| "step": 56300 | |
| }, | |
| { | |
| "epoch": 1.8997574777687953, | |
| "grad_norm": 0.7722172737121582, | |
| "learning_rate": 9.168994119380781e-05, | |
| "loss": 0.4528, | |
| "step": 56400 | |
| }, | |
| { | |
| "epoch": 1.9031258420910806, | |
| "grad_norm": 0.8065896034240723, | |
| "learning_rate": 9.14092433790403e-05, | |
| "loss": 0.4527, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 1.9064942064133656, | |
| "grad_norm": 0.8267763257026672, | |
| "learning_rate": 9.112854556427278e-05, | |
| "loss": 0.4547, | |
| "step": 56600 | |
| }, | |
| { | |
| "epoch": 1.9098625707356507, | |
| "grad_norm": 0.803359866142273, | |
| "learning_rate": 9.084784774950527e-05, | |
| "loss": 0.4506, | |
| "step": 56700 | |
| }, | |
| { | |
| "epoch": 1.913230935057936, | |
| "grad_norm": 0.7984471321105957, | |
| "learning_rate": 9.056714993473776e-05, | |
| "loss": 0.4498, | |
| "step": 56800 | |
| }, | |
| { | |
| "epoch": 1.916599299380221, | |
| "grad_norm": 0.8118926286697388, | |
| "learning_rate": 9.028645211997026e-05, | |
| "loss": 0.4511, | |
| "step": 56900 | |
| }, | |
| { | |
| "epoch": 1.919967663702506, | |
| "grad_norm": 0.7954909205436707, | |
| "learning_rate": 9.000575430520273e-05, | |
| "loss": 0.45, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 1.9233360280247913, | |
| "grad_norm": 0.7925546765327454, | |
| "learning_rate": 8.972505649043523e-05, | |
| "loss": 0.4503, | |
| "step": 57100 | |
| }, | |
| { | |
| "epoch": 1.9267043923470761, | |
| "grad_norm": 0.7257952690124512, | |
| "learning_rate": 8.944435867566772e-05, | |
| "loss": 0.4501, | |
| "step": 57200 | |
| }, | |
| { | |
| "epoch": 1.9300727566693614, | |
| "grad_norm": 0.7644702196121216, | |
| "learning_rate": 8.91636608609002e-05, | |
| "loss": 0.4502, | |
| "step": 57300 | |
| }, | |
| { | |
| "epoch": 1.9334411209916464, | |
| "grad_norm": 0.8492504358291626, | |
| "learning_rate": 8.888296304613269e-05, | |
| "loss": 0.451, | |
| "step": 57400 | |
| }, | |
| { | |
| "epoch": 1.9368094853139315, | |
| "grad_norm": 0.7511376142501831, | |
| "learning_rate": 8.860226523136518e-05, | |
| "loss": 0.4511, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 1.9401778496362168, | |
| "grad_norm": 0.8549360036849976, | |
| "learning_rate": 8.832156741659766e-05, | |
| "loss": 0.4504, | |
| "step": 57600 | |
| }, | |
| { | |
| "epoch": 1.9435462139585018, | |
| "grad_norm": 0.7821473479270935, | |
| "learning_rate": 8.804086960183015e-05, | |
| "loss": 0.4508, | |
| "step": 57700 | |
| }, | |
| { | |
| "epoch": 1.9469145782807868, | |
| "grad_norm": 0.8527407050132751, | |
| "learning_rate": 8.776017178706264e-05, | |
| "loss": 0.4514, | |
| "step": 57800 | |
| }, | |
| { | |
| "epoch": 1.9502829426030721, | |
| "grad_norm": 0.8409647941589355, | |
| "learning_rate": 8.747947397229514e-05, | |
| "loss": 0.4498, | |
| "step": 57900 | |
| }, | |
| { | |
| "epoch": 1.953651306925357, | |
| "grad_norm": 0.8430731296539307, | |
| "learning_rate": 8.719877615752761e-05, | |
| "loss": 0.4498, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 1.9570196712476422, | |
| "grad_norm": 0.8346706032752991, | |
| "learning_rate": 8.69180783427601e-05, | |
| "loss": 0.4475, | |
| "step": 58100 | |
| }, | |
| { | |
| "epoch": 1.9603880355699272, | |
| "grad_norm": 0.7488289475440979, | |
| "learning_rate": 8.66373805279926e-05, | |
| "loss": 0.4488, | |
| "step": 58200 | |
| }, | |
| { | |
| "epoch": 1.9637563998922123, | |
| "grad_norm": 0.836130678653717, | |
| "learning_rate": 8.635668271322508e-05, | |
| "loss": 0.451, | |
| "step": 58300 | |
| }, | |
| { | |
| "epoch": 1.9671247642144976, | |
| "grad_norm": 0.7900556921958923, | |
| "learning_rate": 8.607598489845757e-05, | |
| "loss": 0.4463, | |
| "step": 58400 | |
| }, | |
| { | |
| "epoch": 1.9704931285367824, | |
| "grad_norm": 0.8496758341789246, | |
| "learning_rate": 8.579528708369006e-05, | |
| "loss": 0.4475, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 1.9738614928590676, | |
| "grad_norm": 0.8665506839752197, | |
| "learning_rate": 8.551458926892254e-05, | |
| "loss": 0.4474, | |
| "step": 58600 | |
| }, | |
| { | |
| "epoch": 1.9772298571813527, | |
| "grad_norm": 0.8058724999427795, | |
| "learning_rate": 8.523389145415503e-05, | |
| "loss": 0.4496, | |
| "step": 58700 | |
| }, | |
| { | |
| "epoch": 1.9805982215036377, | |
| "grad_norm": 0.8007978796958923, | |
| "learning_rate": 8.495319363938752e-05, | |
| "loss": 0.4494, | |
| "step": 58800 | |
| }, | |
| { | |
| "epoch": 1.983966585825923, | |
| "grad_norm": 0.7893068790435791, | |
| "learning_rate": 8.467249582462001e-05, | |
| "loss": 0.4477, | |
| "step": 58900 | |
| }, | |
| { | |
| "epoch": 1.987334950148208, | |
| "grad_norm": 0.8267046213150024, | |
| "learning_rate": 8.439179800985249e-05, | |
| "loss": 0.4479, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 1.990703314470493, | |
| "grad_norm": 0.8301923274993896, | |
| "learning_rate": 8.411110019508498e-05, | |
| "loss": 0.4486, | |
| "step": 59100 | |
| }, | |
| { | |
| "epoch": 1.9940716787927784, | |
| "grad_norm": 0.7466899156570435, | |
| "learning_rate": 8.383040238031748e-05, | |
| "loss": 0.4481, | |
| "step": 59200 | |
| }, | |
| { | |
| "epoch": 1.9974400431150632, | |
| "grad_norm": 0.8137242794036865, | |
| "learning_rate": 8.354970456554995e-05, | |
| "loss": 0.4501, | |
| "step": 59300 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 0.44603702425956726, | |
| "eval_runtime": 7.7293, | |
| "eval_samples_per_second": 646.889, | |
| "eval_steps_per_second": 10.221, | |
| "step": 59376 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 89064, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2413913702400000.0, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |