{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 1160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7661925554275513, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7661924958229065 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7468073964118958, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7468073964118958 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7663587927818298, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7663587927818298 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7558698654174805, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7558698058128357 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7633140087127686, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7633139491081238 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7495762705802917, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7495762705802917 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7946158051490784, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7946158647537231 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7658197283744812, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.765819787979126 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7647641897201538, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7647641897201538 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7540814280509949, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7540813684463501 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.75315260887146, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7531526684761047 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7455381751060486, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7455382347106934 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.762398362159729, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7623984217643738 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7403266429901123, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7403265833854675 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7445636987686157, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.744563639163971 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7662807106971741, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7662806510925293 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7362044453620911, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7362045049667358 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7684308886528015, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7684308886528015 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7461406588554382, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7461405992507935 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7721128463745117, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7721129059791565 }, { "epoch": 0.17391304347826086, "grad_norm": 5.440911724708523, "learning_rate": 7.758620689655173e-07, "loss": 1.5163, "step": 10 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7358056902885437, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7358057498931885 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.744896411895752, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7448963522911072 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7335748672485352, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7335748672485352 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7542490363121033, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7542489767074585 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7452700734138489, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7452700734138489 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7386376857757568, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7386377453804016 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7284501791000366, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7284501791000366 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7426540851593018, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7426540851593018 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7221911549568176, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7221912145614624 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7172442078590393, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7172443270683289 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7152324914932251, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7152324318885803 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7241677641868591, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7241678237915039 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6987519860267639, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6987520456314087 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6919834613800049, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6919834017753601 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7232632637023926, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7232632637023926 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7465508580207825, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7465508580207825 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7389459013938904, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7389459013938904 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7185537815093994, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.7185537815093994 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6907929182052612, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6907929182052612 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.7089409232139587, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.708940863609314 }, { "epoch": 0.34782608695652173, "grad_norm": 3.7171016583673797, "learning_rate": 1.6379310344827587e-06, "loss": 1.452, "step": 20 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6786553859710693, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6786553859710693 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6850727796554565, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6850728988647461 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6922683715820312, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6922683715820312 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6690276265144348, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.66902756690979 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6566859483718872, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6566859483718872 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6707991361618042, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6707991361618042 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.667239785194397, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.667239785194397 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6516051888465881, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6516051888465881 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6598553657531738, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6598554849624634 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.648436963558197, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6484370231628418 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6390278935432434, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6390278935432434 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6421687602996826, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6421687006950378 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.645864725112915, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6458646655082703 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6420966982841492, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6420966386795044 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.614678144454956, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.614678144454956 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6200800538063049, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6200799942016602 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.6047796607017517, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.6047796010971069 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.568564772605896, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.568564772605896 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5620425939559937, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5620427131652832 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5881592035293579, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5881591439247131 }, { "epoch": 0.5217391304347826, "grad_norm": 2.1835833998016314, "learning_rate": 2.5e-06, "loss": 1.2807, "step": 30 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.588189423084259, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5881893634796143 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5849539637565613, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5849539637565613 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5811046957969666, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5811046361923218 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5766334533691406, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5766335129737854 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5744068622589111, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5744068622589111 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5465148091316223, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5465148091316223 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5342298150062561, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5342297554016113 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5755828619003296, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5755828619003296 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.544634222984314, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.544634222984314 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5413680076599121, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5413680076599121 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5449234843254089, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5449234843254089 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5365327596664429, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5365327596664429 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.54535973072052, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5453597903251648 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5346130132675171, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5346130132675171 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5038959980010986, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5038959980010986 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5292502045631409, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5292502045631409 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5098983645439148, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.50989830493927 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5208277702331543, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5208277702331543 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5006983280181885, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5006983280181885 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5060795545578003, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5060795545578003 }, { "epoch": 0.6956521739130435, "grad_norm": 1.4749509049316518, "learning_rate": 3.362068965517242e-06, "loss": 1.088, "step": 40 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4993576407432556, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4993576407432556 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5133037567138672, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5133037567138672 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4990914762020111, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4990915358066559 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5008462071418762, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5008462071418762 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.499632865190506, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4996328353881836 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.5018905997276306, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.5018905997276306 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4870546758174896, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4870546758174896 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.49696972966194153, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4969697594642639 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4862103760242462, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4862103760242462 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4796815514564514, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4796815514564514 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.48138922452926636, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.48138922452926636 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.47158902883529663, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.47158902883529663 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4703315496444702, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4703315496444702 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.47751492261886597, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.47751492261886597 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4611895978450775, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4611895978450775 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4780290722846985, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4780291020870209 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4881938695907593, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4881938695907593 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.46718961000442505, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.46718961000442505 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4519549608230591, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4519549310207367 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.44847315549850464, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.44847309589385986 }, { "epoch": 0.8695652173913043, "grad_norm": 0.8888588894735165, "learning_rate": 4.224137931034483e-06, "loss": 0.966, "step": 50 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.45957186818122864, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.45957186818122864 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4459459185600281, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4459459185600281 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4676399528980255, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4676399528980255 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.44375351071357727, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.44375351071357727 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.45082688331604004, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.45082685351371765 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.43348655104637146, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.43348655104637146 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.44927725195884705, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.44927728176116943 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4304286241531372, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4304285943508148 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.44503191113471985, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.44503194093704224 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.45195963978767395, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.45195963978767395 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4157714545726776, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4157714247703552 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.443865031003952, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.44386500120162964 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.41265133023262024, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.41265133023262024 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4412020742893219, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4412020742893219 }, { "avg_delta_l": 0.0, "avg_loss_unweighted": 0.4410664141178131, "avg_weight": 1.0, "std_delta_l": 0.0, "std_weight": 0.0, "weighted_loss": 0.4410664141178131 }, { "avg_delta_l": 0.12360000610351562, "avg_loss_unweighted": 0.4422833323478699, "avg_weight": 0.5331349968910217, "std_delta_l": 0.12673261761665344, "std_weight": 0.06323915719985962, "weighted_loss": 0.23856259882450104 }, { "avg_delta_l": 0.09150006622076035, "avg_loss_unweighted": 0.4228287637233734, "avg_weight": 0.5287688970565796, "std_delta_l": 0.11404557526111603, "std_weight": 0.0464206226170063, "weighted_loss": 0.22724291682243347 }, { "avg_delta_l": 0.11911305785179138, "avg_loss_unweighted": 0.42481881380081177, "avg_weight": 0.527982234954834, "std_delta_l": 0.12401623278856277, "std_weight": 0.05013514682650566, "weighted_loss": 0.22757504880428314 }, { "avg_delta_l": 0.13577498495578766, "avg_loss_unweighted": 0.4232175946235657, "avg_weight": 0.5169554352760315, "std_delta_l": 0.12648946046829224, "std_weight": 0.03391091153025627, "weighted_loss": 0.21917317807674408 }, { "epoch": 1.0347826086956522, "grad_norm": 0.4255637288753066, "learning_rate": 5.086206896551724e-06, "loss": 0.7545, "step": 60 }, { "avg_delta_l": 0.11020226776599884, "avg_loss_unweighted": 0.41523101925849915, "avg_weight": 0.5286667346954346, "std_delta_l": 0.1216091588139534, "std_weight": 0.04627132788300514, "weighted_loss": 0.2218065857887268 }, { "avg_delta_l": 0.09627863019704819, "avg_loss_unweighted": 0.4195970892906189, "avg_weight": 0.5526347756385803, "std_delta_l": 0.14153510332107544, "std_weight": 0.08533143997192383, "weighted_loss": 0.23660564422607422 }, { "avg_delta_l": 0.10499399900436401, "avg_loss_unweighted": 0.413993239402771, "avg_weight": 0.5136427283287048, "std_delta_l": 0.09785003215074539, "std_weight": 0.027285531163215637, "weighted_loss": 0.21330712735652924 }, { "avg_delta_l": 0.12534749507904053, "avg_loss_unweighted": 0.4217275083065033, "avg_weight": 0.5183294415473938, "std_delta_l": 0.11896166950464249, "std_weight": 0.03465409204363823, "weighted_loss": 0.22043652832508087 }, { "avg_delta_l": 0.12789210677146912, "avg_loss_unweighted": 0.42242375016212463, "avg_weight": 0.5233901739120483, "std_delta_l": 0.13596540689468384, "std_weight": 0.04629649966955185, "weighted_loss": 0.22439342737197876 }, { "avg_delta_l": 0.10278799384832382, "avg_loss_unweighted": 0.4284025728702545, "avg_weight": 0.5304936170578003, "std_delta_l": 0.11105398833751678, "std_weight": 0.04090157896280289, "weighted_loss": 0.2307107150554657 }, { "avg_delta_l": 0.10597789287567139, "avg_loss_unweighted": 0.41373002529144287, "avg_weight": 0.5296016931533813, "std_delta_l": 0.11927531659603119, "std_weight": 0.05647275596857071, "weighted_loss": 0.22218111157417297 }, { "avg_delta_l": 0.11926649510860443, "avg_loss_unweighted": 0.40981173515319824, "avg_weight": 0.5390410423278809, "std_delta_l": 0.12124865502119064, "std_weight": 0.0590633898973465, "weighted_loss": 0.2237434983253479 }, { "avg_delta_l": 0.1088743805885315, "avg_loss_unweighted": 0.42024609446525574, "avg_weight": 0.5302600860595703, "std_delta_l": 0.11109235137701035, "std_weight": 0.046676453202962875, "weighted_loss": 0.2242150455713272 }, { "avg_delta_l": 0.11469797044992447, "avg_loss_unweighted": 0.4075780212879181, "avg_weight": 0.5400804281234741, "std_delta_l": 0.14749598503112793, "std_weight": 0.06806981563568115, "weighted_loss": 0.22505290806293488 }, { "avg_delta_l": 0.11734339594841003, "avg_loss_unweighted": 0.4114460349082947, "avg_weight": 0.5392206907272339, "std_delta_l": 0.142390638589859, "std_weight": 0.06240278482437134, "weighted_loss": 0.22460857033729553 }, { "avg_delta_l": 0.10354909300804138, "avg_loss_unweighted": 0.4048364460468292, "avg_weight": 0.5425217151641846, "std_delta_l": 0.12647385895252228, "std_weight": 0.07444199919700623, "weighted_loss": 0.2232099324464798 }, { "avg_delta_l": 0.10714691877365112, "avg_loss_unweighted": 0.39991381764411926, "avg_weight": 0.5307022929191589, "std_delta_l": 0.1076921671628952, "std_weight": 0.04898751154541969, "weighted_loss": 0.21417206525802612 }, { "avg_delta_l": 0.13694226741790771, "avg_loss_unweighted": 0.4223840832710266, "avg_weight": 0.526321530342102, "std_delta_l": 0.12318941950798035, "std_weight": 0.04039508476853371, "weighted_loss": 0.22586871683597565 }, { "avg_delta_l": 0.16913016140460968, "avg_loss_unweighted": 0.4057730436325073, "avg_weight": 0.5204014182090759, "std_delta_l": 0.14888255298137665, "std_weight": 0.040802739560604095, "weighted_loss": 0.2137545645236969 }, { "avg_delta_l": 0.10799924284219742, "avg_loss_unweighted": 0.39664530754089355, "avg_weight": 0.5178180932998657, "std_delta_l": 0.09291445463895798, "std_weight": 0.035636212676763535, "weighted_loss": 0.2080821543931961 }, { "avg_delta_l": 0.10859165340662003, "avg_loss_unweighted": 0.388203889131546, "avg_weight": 0.5253856778144836, "std_delta_l": 0.1189427524805069, "std_weight": 0.048479050397872925, "weighted_loss": 0.20619267225265503 }, { "avg_delta_l": 0.08572736382484436, "avg_loss_unweighted": 0.4157463014125824, "avg_weight": 0.563633143901825, "std_delta_l": 0.1475382298231125, "std_weight": 0.11288762837648392, "weighted_loss": 0.23935389518737793 }, { "avg_delta_l": 0.12189151346683502, "avg_loss_unweighted": 0.3901602327823639, "avg_weight": 0.5276501178741455, "std_delta_l": 0.1278057098388672, "std_weight": 0.05530024319887161, "weighted_loss": 0.20801040530204773 }, { "avg_delta_l": 0.11353133618831635, "avg_loss_unweighted": 0.3902943432331085, "avg_weight": 0.5352044105529785, "std_delta_l": 0.1281661093235016, "std_weight": 0.06213347613811493, "weighted_loss": 0.2126772105693817 }, { "epoch": 1.208695652173913, "grad_norm": 0.44603335356652407, "learning_rate": 5.9482758620689665e-06, "loss": 0.4418, "step": 70 }, { "avg_delta_l": 0.13652470707893372, "avg_loss_unweighted": 0.39998242259025574, "avg_weight": 0.5292884707450867, "std_delta_l": 0.12626004219055176, "std_weight": 0.05642187222838402, "weighted_loss": 0.21375560760498047 }, { "avg_delta_l": 0.12928222119808197, "avg_loss_unweighted": 0.37256044149398804, "avg_weight": 0.5263550281524658, "std_delta_l": 0.10462749004364014, "std_weight": 0.04572267085313797, "weighted_loss": 0.19903400540351868 }, { "avg_delta_l": 0.09579861164093018, "avg_loss_unweighted": 0.4099803566932678, "avg_weight": 0.5546783208847046, "std_delta_l": 0.14315176010131836, "std_weight": 0.09610356390476227, "weighted_loss": 0.2321847528219223 }, { "avg_delta_l": 0.09802892804145813, "avg_loss_unweighted": 0.4042203426361084, "avg_weight": 0.5386710166931152, "std_delta_l": 0.12050174921751022, "std_weight": 0.05935888737440109, "weighted_loss": 0.21938475966453552 }, { "avg_delta_l": 0.10681216418743134, "avg_loss_unweighted": 0.37947845458984375, "avg_weight": 0.5267486572265625, "std_delta_l": 0.11883628368377686, "std_weight": 0.053497329354286194, "weighted_loss": 0.20303107798099518 }, { "avg_delta_l": 0.12400802224874496, "avg_loss_unweighted": 0.38461053371429443, "avg_weight": 0.5252610445022583, "std_delta_l": 0.12213899940252304, "std_weight": 0.04632183909416199, "weighted_loss": 0.20457416772842407 }, { "avg_delta_l": 0.11811307817697525, "avg_loss_unweighted": 0.3645337224006653, "avg_weight": 0.5332066416740417, "std_delta_l": 0.11811977624893188, "std_weight": 0.05146361514925957, "weighted_loss": 0.1980368196964264 }, { "avg_delta_l": 0.1093902736902237, "avg_loss_unweighted": 0.36931172013282776, "avg_weight": 0.5433357954025269, "std_delta_l": 0.11673981696367264, "std_weight": 0.0688665583729744, "weighted_loss": 0.20492416620254517 }, { "avg_delta_l": 0.1239321231842041, "avg_loss_unweighted": 0.39158785343170166, "avg_weight": 0.5244609117507935, "std_delta_l": 0.11947477608919144, "std_weight": 0.04892178624868393, "weighted_loss": 0.20680129528045654 }, { "avg_delta_l": 0.06766971945762634, "avg_loss_unweighted": 0.39840686321258545, "avg_weight": 0.554015040397644, "std_delta_l": 0.10401690006256104, "std_weight": 0.09063249826431274, "weighted_loss": 0.22522865235805511 }, { "avg_delta_l": 0.11878176778554916, "avg_loss_unweighted": 0.3860102593898773, "avg_weight": 0.5213523507118225, "std_delta_l": 0.11634156107902527, "std_weight": 0.04270471632480621, "weighted_loss": 0.20230062305927277 }, { "avg_delta_l": 0.1321404129266739, "avg_loss_unweighted": 0.392222136259079, "avg_weight": 0.5476464629173279, "std_delta_l": 0.15578651428222656, "std_weight": 0.06989968568086624, "weighted_loss": 0.2187144011259079 }, { "avg_delta_l": 0.11027940362691879, "avg_loss_unweighted": 0.3949024975299835, "avg_weight": 0.5440661311149597, "std_delta_l": 0.1307816058397293, "std_weight": 0.07506117224693298, "weighted_loss": 0.21801447868347168 }, { "avg_delta_l": 0.13961072266101837, "avg_loss_unweighted": 0.39015722274780273, "avg_weight": 0.5214987993240356, "std_delta_l": 0.12164720147848129, "std_weight": 0.042997680604457855, "weighted_loss": 0.20560702681541443 }, { "avg_delta_l": 0.11139744520187378, "avg_loss_unweighted": 0.3896150290966034, "avg_weight": 0.5508718490600586, "std_delta_l": 0.14686653017997742, "std_weight": 0.0714639201760292, "weighted_loss": 0.21612827479839325 }, { "avg_delta_l": 0.12732942402362823, "avg_loss_unweighted": 0.3868478834629059, "avg_weight": 0.5244313478469849, "std_delta_l": 0.12250936776399612, "std_weight": 0.04798918962478638, "weighted_loss": 0.20402184128761292 }, { "avg_delta_l": 0.12022728472948074, "avg_loss_unweighted": 0.37790447473526, "avg_weight": 0.5311996936798096, "std_delta_l": 0.11618762463331223, "std_weight": 0.05554364621639252, "weighted_loss": 0.2027084231376648 }, { "avg_delta_l": 0.11574891209602356, "avg_loss_unweighted": 0.3718106746673584, "avg_weight": 0.5345856547355652, "std_delta_l": 0.11545813083648682, "std_weight": 0.057034291326999664, "weighted_loss": 0.20011524856090546 }, { "avg_delta_l": 0.11638812720775604, "avg_loss_unweighted": 0.3859742283821106, "avg_weight": 0.5441563725471497, "std_delta_l": 0.14075888693332672, "std_weight": 0.07336351275444031, "weighted_loss": 0.21309100091457367 }, { "avg_delta_l": 0.09202943742275238, "avg_loss_unweighted": 0.3879345953464508, "avg_weight": 0.5664360523223877, "std_delta_l": 0.16542716324329376, "std_weight": 0.10400541126728058, "weighted_loss": 0.22574298083782196 }, { "epoch": 1.382608695652174, "grad_norm": 0.4195005684003258, "learning_rate": 6.810344827586207e-06, "loss": 0.4213, "step": 80 }, { "avg_delta_l": 0.11405263096094131, "avg_loss_unweighted": 0.38839131593704224, "avg_weight": 0.5395442247390747, "std_delta_l": 0.13533642888069153, "std_weight": 0.060972582548856735, "weighted_loss": 0.21234872937202454 }, { "avg_delta_l": 0.15170417726039886, "avg_loss_unweighted": 0.3801925778388977, "avg_weight": 0.523012638092041, "std_delta_l": 0.12442187964916229, "std_weight": 0.042646341025829315, "weighted_loss": 0.20025396347045898 }, { "avg_delta_l": 0.05338989943265915, "avg_loss_unweighted": 0.39882221817970276, "avg_weight": 0.5699282884597778, "std_delta_l": 0.12135326862335205, "std_weight": 0.11086559295654297, "weighted_loss": 0.23336420953273773 }, { "avg_delta_l": 0.0636080652475357, "avg_loss_unweighted": 0.37799233198165894, "avg_weight": 0.5869836807250977, "std_delta_l": 0.1494600772857666, "std_weight": 0.12058055400848389, "weighted_loss": 0.22928111255168915 }, { "avg_delta_l": 0.08657005429267883, "avg_loss_unweighted": 0.3727377653121948, "avg_weight": 0.5602478384971619, "std_delta_l": 0.13281787931919098, "std_weight": 0.09562622010707855, "weighted_loss": 0.21473214030265808 }, { "avg_delta_l": 0.08728201687335968, "avg_loss_unweighted": 0.3753495216369629, "avg_weight": 0.5526108145713806, "std_delta_l": 0.1316734254360199, "std_weight": 0.08754785358905792, "weighted_loss": 0.21014586091041565 }, { "avg_delta_l": 0.11546390503644943, "avg_loss_unweighted": 0.3743175268173218, "avg_weight": 0.5283535122871399, "std_delta_l": 0.135368213057518, "std_weight": 0.05173797532916069, "weighted_loss": 0.19973120093345642 }, { "avg_delta_l": 0.08948896825313568, "avg_loss_unweighted": 0.3837243318557739, "avg_weight": 0.5764588117599487, "std_delta_l": 0.15031176805496216, "std_weight": 0.09051123261451721, "weighted_loss": 0.22803157567977905 }, { "avg_delta_l": 0.12406963109970093, "avg_loss_unweighted": 0.38077282905578613, "avg_weight": 0.5319032669067383, "std_delta_l": 0.1283748745918274, "std_weight": 0.05365610867738724, "weighted_loss": 0.2051205188035965 }, { "avg_delta_l": 0.0909406766295433, "avg_loss_unweighted": 0.3640882968902588, "avg_weight": 0.5553637146949768, "std_delta_l": 0.14149592816829681, "std_weight": 0.07671308517456055, "weighted_loss": 0.20888309180736542 }, { "avg_delta_l": 0.11948686093091965, "avg_loss_unweighted": 0.37041985988616943, "avg_weight": 0.5424971580505371, "std_delta_l": 0.14774899184703827, "std_weight": 0.07293998450040817, "weighted_loss": 0.20678216218948364 }, { "avg_delta_l": 0.0989232063293457, "avg_loss_unweighted": 0.37529364228248596, "avg_weight": 0.5713295340538025, "std_delta_l": 0.164472758769989, "std_weight": 0.09498895704746246, "weighted_loss": 0.22142593562602997 }, { "avg_delta_l": 0.06592076271772385, "avg_loss_unweighted": 0.3806704580783844, "avg_weight": 0.5562414526939392, "std_delta_l": 0.11987489461898804, "std_weight": 0.09065823256969452, "weighted_loss": 0.21586887538433075 }, { "avg_delta_l": 0.08288151770830154, "avg_loss_unweighted": 0.3863367736339569, "avg_weight": 0.5645678043365479, "std_delta_l": 0.13534177839756012, "std_weight": 0.08460119366645813, "weighted_loss": 0.22380799055099487 }, { "avg_delta_l": 0.0941685140132904, "avg_loss_unweighted": 0.3740473687648773, "avg_weight": 0.5450156331062317, "std_delta_l": 0.12886501848697662, "std_weight": 0.07566528022289276, "weighted_loss": 0.20512177050113678 }, { "avg_delta_l": 0.09540319442749023, "avg_loss_unweighted": 0.3706924021244049, "avg_weight": 0.5452516674995422, "std_delta_l": 0.12526893615722656, "std_weight": 0.0636177659034729, "weighted_loss": 0.2060866802930832 }, { "avg_delta_l": 0.07621756941080093, "avg_loss_unweighted": 0.3876403868198395, "avg_weight": 0.5742973685264587, "std_delta_l": 0.13877348601818085, "std_weight": 0.10003256052732468, "weighted_loss": 0.22794708609580994 }, { "avg_delta_l": 0.10639713704586029, "avg_loss_unweighted": 0.3608226776123047, "avg_weight": 0.5435827970504761, "std_delta_l": 0.13395603001117706, "std_weight": 0.07748337835073471, "weighted_loss": 0.19891424477100372 }, { "avg_delta_l": 0.11510254442691803, "avg_loss_unweighted": 0.3586878180503845, "avg_weight": 0.5291735529899597, "std_delta_l": 0.11429881304502487, "std_weight": 0.04426426440477371, "weighted_loss": 0.19154614210128784 }, { "avg_delta_l": 0.055856626480817795, "avg_loss_unweighted": 0.36513304710388184, "avg_weight": 0.557701587677002, "std_delta_l": 0.10944679379463196, "std_weight": 0.08959122002124786, "weighted_loss": 0.20788782835006714 }, { "epoch": 1.5565217391304347, "grad_norm": 0.4320465141967921, "learning_rate": 7.672413793103449e-06, "loss": 0.4247, "step": 90 }, { "avg_delta_l": 0.07889685034751892, "avg_loss_unweighted": 0.3772495687007904, "avg_weight": 0.5447753071784973, "std_delta_l": 0.11756213009357452, "std_weight": 0.07967305183410645, "weighted_loss": 0.2097976803779602 }, { "avg_delta_l": 0.11409979313611984, "avg_loss_unweighted": 0.3577146530151367, "avg_weight": 0.5398183465003967, "std_delta_l": 0.11919420212507248, "std_weight": 0.06651075184345245, "weighted_loss": 0.19548945128917694 }, { "avg_delta_l": 0.08171166479587555, "avg_loss_unweighted": 0.3728654384613037, "avg_weight": 0.5612470507621765, "std_delta_l": 0.13545289635658264, "std_weight": 0.09088841080665588, "weighted_loss": 0.21461434662342072 }, { "avg_delta_l": 0.08159765601158142, "avg_loss_unweighted": 0.36260315775871277, "avg_weight": 0.5553887486457825, "std_delta_l": 0.12540818750858307, "std_weight": 0.0838145911693573, "weighted_loss": 0.20665547251701355 }, { "avg_delta_l": 0.09383374452590942, "avg_loss_unweighted": 0.37063729763031006, "avg_weight": 0.5608471632003784, "std_delta_l": 0.14809565246105194, "std_weight": 0.10722330212593079, "weighted_loss": 0.21265548467636108 }, { "avg_delta_l": 0.05544435232877731, "avg_loss_unweighted": 0.37616491317749023, "avg_weight": 0.5960796475410461, "std_delta_l": 0.1652316451072693, "std_weight": 0.14960677921772003, "weighted_loss": 0.2322092056274414 }, { "avg_delta_l": 0.07333935052156448, "avg_loss_unweighted": 0.3722888231277466, "avg_weight": 0.5624762177467346, "std_delta_l": 0.1368538737297058, "std_weight": 0.08170213550329208, "weighted_loss": 0.21312043070793152 }, { "avg_delta_l": 0.06632471829652786, "avg_loss_unweighted": 0.36480775475502014, "avg_weight": 0.582015872001648, "std_delta_l": 0.150806725025177, "std_weight": 0.10491889715194702, "weighted_loss": 0.21975493431091309 }, { "avg_delta_l": 0.1173098236322403, "avg_loss_unweighted": 0.35604557394981384, "avg_weight": 0.5400232076644897, "std_delta_l": 0.12058677524328232, "std_weight": 0.06163029372692108, "weighted_loss": 0.19546693563461304 }, { "avg_delta_l": 0.08170995861291885, "avg_loss_unweighted": 0.3605515956878662, "avg_weight": 0.5663715600967407, "std_delta_l": 0.1363828033208847, "std_weight": 0.10655945539474487, "weighted_loss": 0.2099664807319641 }, { "avg_delta_l": 0.08999944478273392, "avg_loss_unweighted": 0.36718517541885376, "avg_weight": 0.5593604445457458, "std_delta_l": 0.12401191890239716, "std_weight": 0.09529672563076019, "weighted_loss": 0.2099662572145462 }, { "avg_delta_l": 0.08934436738491058, "avg_loss_unweighted": 0.37332889437675476, "avg_weight": 0.5529903173446655, "std_delta_l": 0.13154108822345734, "std_weight": 0.07362653315067291, "weighted_loss": 0.21001332998275757 }, { "avg_delta_l": 0.06363844871520996, "avg_loss_unweighted": 0.35829418897628784, "avg_weight": 0.5781877040863037, "std_delta_l": 0.14620722830295563, "std_weight": 0.09677626192569733, "weighted_loss": 0.21324609220027924 }, { "avg_delta_l": 0.06423717737197876, "avg_loss_unweighted": 0.33993062376976013, "avg_weight": 0.5644131898880005, "std_delta_l": 0.10799743980169296, "std_weight": 0.08337893337011337, "weighted_loss": 0.19696322083473206 }, { "avg_delta_l": 0.08803922683000565, "avg_loss_unweighted": 0.3579729497432709, "avg_weight": 0.5437818765640259, "std_delta_l": 0.11968450248241425, "std_weight": 0.062448546290397644, "weighted_loss": 0.19706295430660248 }, { "avg_delta_l": 0.06695061177015305, "avg_loss_unweighted": 0.3439165949821472, "avg_weight": 0.5485647320747375, "std_delta_l": 0.09924124926328659, "std_weight": 0.07642677426338196, "weighted_loss": 0.1922675371170044 }, { "avg_delta_l": 0.06561532616615295, "avg_loss_unweighted": 0.377594530582428, "avg_weight": 0.586716890335083, "std_delta_l": 0.14506399631500244, "std_weight": 0.1024317666888237, "weighted_loss": 0.22917823493480682 }, { "avg_delta_l": 0.07005654275417328, "avg_loss_unweighted": 0.34358933568000793, "avg_weight": 0.5626966953277588, "std_delta_l": 0.12171177566051483, "std_weight": 0.08949553221464157, "weighted_loss": 0.1975245624780655 }, { "avg_delta_l": 0.0653952956199646, "avg_loss_unweighted": 0.35363340377807617, "avg_weight": 0.5576915740966797, "std_delta_l": 0.12257247418165207, "std_weight": 0.09412967413663864, "weighted_loss": 0.20267674326896667 }, { "avg_delta_l": 0.07353992760181427, "avg_loss_unweighted": 0.35553625226020813, "avg_weight": 0.5414992570877075, "std_delta_l": 0.10092651098966599, "std_weight": 0.05568066984415054, "weighted_loss": 0.19543534517288208 }, { "epoch": 1.7304347826086957, "grad_norm": 0.5024755604581342, "learning_rate": 8.53448275862069e-06, "loss": 0.4154, "step": 100 }, { "avg_delta_l": 0.09052085131406784, "avg_loss_unweighted": 0.3575979173183441, "avg_weight": 0.551098108291626, "std_delta_l": 0.11818711459636688, "std_weight": 0.08115909993648529, "weighted_loss": 0.20056304335594177 }, { "avg_delta_l": 0.04795929417014122, "avg_loss_unweighted": 0.35914507508277893, "avg_weight": 0.5669584274291992, "std_delta_l": 0.12589195370674133, "std_weight": 0.10775643587112427, "weighted_loss": 0.20962676405906677 }, { "avg_delta_l": 0.0651022270321846, "avg_loss_unweighted": 0.36854180693626404, "avg_weight": 0.567306399345398, "std_delta_l": 0.12172622233629227, "std_weight": 0.08894288539886475, "weighted_loss": 0.21405208110809326 }, { "avg_delta_l": 0.053295835852622986, "avg_loss_unweighted": 0.3665301203727722, "avg_weight": 0.5684587359428406, "std_delta_l": 0.12589217722415924, "std_weight": 0.08967900276184082, "weighted_loss": 0.21565841138362885 }, { "avg_delta_l": 0.08348570019006729, "avg_loss_unweighted": 0.3376914858818054, "avg_weight": 0.5522847175598145, "std_delta_l": 0.12497366219758987, "std_weight": 0.0714554563164711, "weighted_loss": 0.1902841329574585 }, { "avg_delta_l": 0.04200267791748047, "avg_loss_unweighted": 0.36570632457733154, "avg_weight": 0.5812720060348511, "std_delta_l": 0.12058035284280777, "std_weight": 0.1238454207777977, "weighted_loss": 0.21881143748760223 }, { "avg_delta_l": 0.0798913836479187, "avg_loss_unweighted": 0.36608272790908813, "avg_weight": 0.5691516399383545, "std_delta_l": 0.13429385423660278, "std_weight": 0.10555758327245712, "weighted_loss": 0.21442680060863495 }, { "avg_delta_l": 0.039749182760715485, "avg_loss_unweighted": 0.36760446429252625, "avg_weight": 0.5719919800758362, "std_delta_l": 0.10390505194664001, "std_weight": 0.0970713347196579, "weighted_loss": 0.21397072076797485 }, { "avg_delta_l": 0.06338436156511307, "avg_loss_unweighted": 0.362619012594223, "avg_weight": 0.5741775035858154, "std_delta_l": 0.142997145652771, "std_weight": 0.09465671330690384, "weighted_loss": 0.21270503103733063 }, { "avg_delta_l": 0.06868117302656174, "avg_loss_unweighted": 0.3524933457374573, "avg_weight": 0.5468699932098389, "std_delta_l": 0.11125892400741577, "std_weight": 0.08013454079627991, "weighted_loss": 0.1990593820810318 }, { "avg_delta_l": 0.018343202769756317, "avg_loss_unweighted": 0.3681495189666748, "avg_weight": 0.5962863564491272, "std_delta_l": 0.10687077045440674, "std_weight": 0.1250355839729309, "weighted_loss": 0.2241075038909912 }, { "avg_delta_l": 0.06088175252079964, "avg_loss_unweighted": 0.35185664892196655, "avg_weight": 0.5609768629074097, "std_delta_l": 0.10936099290847778, "std_weight": 0.08828335255384445, "weighted_loss": 0.2038060873746872 }, { "avg_delta_l": 0.06094089150428772, "avg_loss_unweighted": 0.3466334640979767, "avg_weight": 0.5724457502365112, "std_delta_l": 0.13043111562728882, "std_weight": 0.10907435417175293, "weighted_loss": 0.20258846879005432 }, { "avg_delta_l": 0.056327249854803085, "avg_loss_unweighted": 0.33476635813713074, "avg_weight": 0.5569602847099304, "std_delta_l": 0.10420440882444382, "std_weight": 0.08398202061653137, "weighted_loss": 0.19088348746299744 }, { "avg_delta_l": 0.03143453225493431, "avg_loss_unweighted": 0.3727351725101471, "avg_weight": 0.5955535769462585, "std_delta_l": 0.14823970198631287, "std_weight": 0.12291218340396881, "weighted_loss": 0.23061791062355042 }, { "avg_delta_l": 0.057024478912353516, "avg_loss_unweighted": 0.35942062735557556, "avg_weight": 0.5756452083587646, "std_delta_l": 0.12397170066833496, "std_weight": 0.10347631573677063, "weighted_loss": 0.2119646966457367 }, { "avg_delta_l": 0.07651133835315704, "avg_loss_unweighted": 0.34110063314437866, "avg_weight": 0.5637298226356506, "std_delta_l": 0.11713196337223053, "std_weight": 0.07798025757074356, "weighted_loss": 0.19774341583251953 }, { "avg_delta_l": 0.0782494843006134, "avg_loss_unweighted": 0.35154297947883606, "avg_weight": 0.5545663237571716, "std_delta_l": 0.12219424545764923, "std_weight": 0.08941524475812912, "weighted_loss": 0.19849666953086853 }, { "avg_delta_l": 0.0431508906185627, "avg_loss_unweighted": 0.35438621044158936, "avg_weight": 0.581626296043396, "std_delta_l": 0.11028248816728592, "std_weight": 0.10309465229511261, "weighted_loss": 0.21316516399383545 }, { "avg_delta_l": 0.04651563614606857, "avg_loss_unweighted": 0.3448317348957062, "avg_weight": 0.5861396789550781, "std_delta_l": 0.13951563835144043, "std_weight": 0.11695605516433716, "weighted_loss": 0.2105066329240799 }, { "epoch": 1.9043478260869566, "grad_norm": 0.550781662597611, "learning_rate": 9.396551724137931e-06, "loss": 0.4173, "step": 110 }, { "avg_delta_l": 0.037263765931129456, "avg_loss_unweighted": 0.36605799198150635, "avg_weight": 0.5772643089294434, "std_delta_l": 0.11173078417778015, "std_weight": 0.11861343681812286, "weighted_loss": 0.21699047088623047 }, { "avg_delta_l": 0.03893228992819786, "avg_loss_unweighted": 0.3527145981788635, "avg_weight": 0.5905067920684814, "std_delta_l": 0.11868257820606232, "std_weight": 0.11748513579368591, "weighted_loss": 0.21440666913986206 }, { "avg_delta_l": 0.05157933384180069, "avg_loss_unweighted": 0.34673812985420227, "avg_weight": 0.5712972283363342, "std_delta_l": 0.10753461718559265, "std_weight": 0.08986782282590866, "weighted_loss": 0.2049693614244461 }, { "avg_delta_l": 0.04640709608793259, "avg_loss_unweighted": 0.35359835624694824, "avg_weight": 0.5912652611732483, "std_delta_l": 0.13064847886562347, "std_weight": 0.11883147060871124, "weighted_loss": 0.2172350287437439 }, { "avg_delta_l": 0.030465392395853996, "avg_loss_unweighted": 0.35542380809783936, "avg_weight": 0.5986889600753784, "std_delta_l": 0.1407461315393448, "std_weight": 0.1432180255651474, "weighted_loss": 0.2217884361743927 }, { "avg_delta_l": 0.03019985370337963, "avg_loss_unweighted": 0.36010363698005676, "avg_weight": 0.5980595946311951, "std_delta_l": 0.11554424464702606, "std_weight": 0.11600793898105621, "weighted_loss": 0.2250669151544571 }, { "avg_delta_l": 0.04433848708868027, "avg_loss_unweighted": 0.3381858170032501, "avg_weight": 0.5741024017333984, "std_delta_l": 0.1163366287946701, "std_weight": 0.13095888495445251, "weighted_loss": 0.20280475914478302 }, { "avg_delta_l": 0.05348242074251175, "avg_loss_unweighted": 0.34308281540870667, "avg_weight": 0.5722458362579346, "std_delta_l": 0.11920809745788574, "std_weight": 0.10097602009773254, "weighted_loss": 0.20501114428043365 }, { "avg_delta_l": 0.05320025235414505, "avg_loss_unweighted": 0.3503051698207855, "avg_weight": 0.5657894015312195, "std_delta_l": 0.1259656548500061, "std_weight": 0.09718091040849686, "weighted_loss": 0.20335274934768677 }, { "avg_delta_l": 0.02276686578989029, "avg_loss_unweighted": 0.3490937352180481, "avg_weight": 0.5899388790130615, "std_delta_l": 0.11183275282382965, "std_weight": 0.11554323881864548, "weighted_loss": 0.2121649533510208 }, { "avg_delta_l": 0.042011961340904236, "avg_loss_unweighted": 0.33876627683639526, "avg_weight": 0.5635848045349121, "std_delta_l": 0.10329463332891464, "std_weight": 0.10659510642290115, "weighted_loss": 0.19704650342464447 }, { "avg_delta_l": 0.01951202005147934, "avg_loss_unweighted": 0.34556815028190613, "avg_weight": 0.551224410533905, "std_delta_l": 0.06940672546625137, "std_weight": 0.08203892409801483, "weighted_loss": 0.1937689483165741 }, { "avg_delta_l": 0.05102841556072235, "avg_loss_unweighted": 0.3229859471321106, "avg_weight": 0.5188887119293213, "std_delta_l": 0.058231230825185776, "std_weight": 0.03558511659502983, "weighted_loss": 0.1684805005788803 }, { "avg_delta_l": 0.018471065908670425, "avg_loss_unweighted": 0.33247610926628113, "avg_weight": 0.5509437918663025, "std_delta_l": 0.07243549823760986, "std_weight": 0.09903277456760406, "weighted_loss": 0.18780763447284698 }, { "avg_delta_l": 0.025946717709302902, "avg_loss_unweighted": 0.3333665728569031, "avg_weight": 0.5563641786575317, "std_delta_l": 0.0774131566286087, "std_weight": 0.09224643558263779, "weighted_loss": 0.1900508999824524 }, { "avg_delta_l": 0.026630153879523277, "avg_loss_unweighted": 0.3382563889026642, "avg_weight": 0.5528491139411926, "std_delta_l": 0.08954901993274689, "std_weight": 0.09496333450078964, "weighted_loss": 0.19204488396644592 }, { "avg_delta_l": 0.033281825482845306, "avg_loss_unweighted": 0.32647520303726196, "avg_weight": 0.537727952003479, "std_delta_l": 0.07339173555374146, "std_weight": 0.06316563487052917, "weighted_loss": 0.1774093359708786 }, { "avg_delta_l": 0.04920445755124092, "avg_loss_unweighted": 0.3226529657840729, "avg_weight": 0.5361884236335754, "std_delta_l": 0.07526650279760361, "std_weight": 0.06412596255540848, "weighted_loss": 0.17561759054660797 }, { "avg_delta_l": 0.016668619588017464, "avg_loss_unweighted": 0.3417390286922455, "avg_weight": 0.5671137571334839, "std_delta_l": 0.07560178637504578, "std_weight": 0.08959954231977463, "weighted_loss": 0.19971419870853424 }, { "epoch": 2.0695652173913044, "grad_norm": 0.5156508111953939, "learning_rate": 9.999796259054765e-06, "loss": 0.3806, "step": 120 }, { "avg_delta_l": 0.0395650677382946, "avg_loss_unweighted": 0.3280797302722931, "avg_weight": 0.5601596236228943, "std_delta_l": 0.0921466276049614, "std_weight": 0.07256456464529037, "weighted_loss": 0.1896604597568512 }, { "avg_delta_l": 0.039926011115312576, "avg_loss_unweighted": 0.31180712580680847, "avg_weight": 0.5412822365760803, "std_delta_l": 0.07737559080123901, "std_weight": 0.08256446570158005, "weighted_loss": 0.17238757014274597 }, { "avg_delta_l": 0.04822110757231712, "avg_loss_unweighted": 0.33671456575393677, "avg_weight": 0.5480035543441772, "std_delta_l": 0.08908261358737946, "std_weight": 0.07526335120201111, "weighted_loss": 0.18775109946727753 }, { "avg_delta_l": 0.03738253563642502, "avg_loss_unweighted": 0.32044485211372375, "avg_weight": 0.560404896736145, "std_delta_l": 0.10000883787870407, "std_weight": 0.10506677627563477, "weighted_loss": 0.18368056416511536 }, { "avg_delta_l": 0.054452914744615555, "avg_loss_unweighted": 0.32136258482933044, "avg_weight": 0.5265464782714844, "std_delta_l": 0.07497384399175644, "std_weight": 0.04315180331468582, "weighted_loss": 0.17145614326000214 }, { "avg_delta_l": 0.0314483605325222, "avg_loss_unweighted": 0.30822810530662537, "avg_weight": 0.5391538143157959, "std_delta_l": 0.0579805001616478, "std_weight": 0.05610053613781929, "weighted_loss": 0.16878323256969452 }, { "avg_delta_l": 0.014527502469718456, "avg_loss_unweighted": 0.33107098937034607, "avg_weight": 0.5663619041442871, "std_delta_l": 0.08363670110702515, "std_weight": 0.10063459724187851, "weighted_loss": 0.19385696947574615 }, { "avg_delta_l": 0.04226018115878105, "avg_loss_unweighted": 0.31164026260375977, "avg_weight": 0.5471322536468506, "std_delta_l": 0.08007743209600449, "std_weight": 0.07612727582454681, "weighted_loss": 0.17522582411766052 }, { "avg_delta_l": 0.03898968547582626, "avg_loss_unweighted": 0.31198713183403015, "avg_weight": 0.5576525926589966, "std_delta_l": 0.08801516890525818, "std_weight": 0.09039678424596786, "weighted_loss": 0.17912152409553528 }, { "avg_delta_l": 0.029677659273147583, "avg_loss_unweighted": 0.3277643024921417, "avg_weight": 0.5605132579803467, "std_delta_l": 0.08338212221860886, "std_weight": 0.09593406319618225, "weighted_loss": 0.18792080879211426 }, { "avg_delta_l": 0.0561150461435318, "avg_loss_unweighted": 0.30518659949302673, "avg_weight": 0.5427989959716797, "std_delta_l": 0.08989511430263519, "std_weight": 0.07459453493356705, "weighted_loss": 0.16830074787139893 }, { "avg_delta_l": 0.030641132965683937, "avg_loss_unweighted": 0.31672653555870056, "avg_weight": 0.545841634273529, "std_delta_l": 0.06723060458898544, "std_weight": 0.07103796303272247, "weighted_loss": 0.17601002752780914 }, { "avg_delta_l": 0.04441983997821808, "avg_loss_unweighted": 0.3172282576560974, "avg_weight": 0.5472831726074219, "std_delta_l": 0.09357626736164093, "std_weight": 0.08968450874090195, "weighted_loss": 0.17787505686283112 }, { "avg_delta_l": 0.05081865191459656, "avg_loss_unweighted": 0.3278582692146301, "avg_weight": 0.5295311808586121, "std_delta_l": 0.07133889198303223, "std_weight": 0.059062398970127106, "weighted_loss": 0.17601081728935242 }, { "avg_delta_l": 0.03271795064210892, "avg_loss_unweighted": 0.32995927333831787, "avg_weight": 0.557592511177063, "std_delta_l": 0.08404121547937393, "std_weight": 0.09636307507753372, "weighted_loss": 0.18790902197360992 }, { "avg_delta_l": 0.03857465833425522, "avg_loss_unweighted": 0.32778793573379517, "avg_weight": 0.5620187520980835, "std_delta_l": 0.09685446321964264, "std_weight": 0.0964430570602417, "weighted_loss": 0.18741683661937714 }, { "avg_delta_l": 0.027633853256702423, "avg_loss_unweighted": 0.341582328081131, "avg_weight": 0.5691813826560974, "std_delta_l": 0.09947579354047775, "std_weight": 0.11279944330453873, "weighted_loss": 0.2002142071723938 }, { "avg_delta_l": 0.026381727308034897, "avg_loss_unweighted": 0.32209426164627075, "avg_weight": 0.5502736568450928, "std_delta_l": 0.07244274020195007, "std_weight": 0.09329427778720856, "weighted_loss": 0.18147116899490356 }, { "avg_delta_l": 0.04525351896882057, "avg_loss_unweighted": 0.3035469055175781, "avg_weight": 0.5297105312347412, "std_delta_l": 0.0627364069223404, "std_weight": 0.051994387060403824, "weighted_loss": 0.16283400356769562 }, { "avg_delta_l": 0.06270533055067062, "avg_loss_unweighted": 0.30552423000335693, "avg_weight": 0.5353778600692749, "std_delta_l": 0.0887497067451477, "std_weight": 0.06200433894991875, "weighted_loss": 0.16677461564540863 }, { "epoch": 2.243478260869565, "grad_norm": 0.5067244877015039, "learning_rate": 9.996174659697248e-06, "loss": 0.3595, "step": 130 }, { "avg_delta_l": 0.04317775368690491, "avg_loss_unweighted": 0.3248911499977112, "avg_weight": 0.5478967428207397, "std_delta_l": 0.08325875550508499, "std_weight": 0.08930790424346924, "weighted_loss": 0.18376347422599792 }, { "avg_delta_l": 0.011496729217469692, "avg_loss_unweighted": 0.33087706565856934, "avg_weight": 0.5697777271270752, "std_delta_l": 0.07884110510349274, "std_weight": 0.10602997988462448, "weighted_loss": 0.1921403855085373 }, { "avg_delta_l": 0.04608100280165672, "avg_loss_unweighted": 0.3042893707752228, "avg_weight": 0.5504115223884583, "std_delta_l": 0.09735676646232605, "std_weight": 0.07348797470331192, "weighted_loss": 0.1721215546131134 }, { "avg_delta_l": 0.02903987467288971, "avg_loss_unweighted": 0.3107127547264099, "avg_weight": 0.550062894821167, "std_delta_l": 0.07221972942352295, "std_weight": 0.07906649261713028, "weighted_loss": 0.1752331703901291 }, { "avg_delta_l": 0.02951926551759243, "avg_loss_unweighted": 0.31790754199028015, "avg_weight": 0.5509128570556641, "std_delta_l": 0.07077503204345703, "std_weight": 0.08688954263925552, "weighted_loss": 0.1798461377620697 }, { "avg_delta_l": 0.004735156893730164, "avg_loss_unweighted": 0.3269495368003845, "avg_weight": 0.589762270450592, "std_delta_l": 0.09843584150075912, "std_weight": 0.11542101949453354, "weighted_loss": 0.20061232149600983 }, { "avg_delta_l": 0.03987099602818489, "avg_loss_unweighted": 0.3021817207336426, "avg_weight": 0.5281587243080139, "std_delta_l": 0.0566859170794487, "std_weight": 0.05083483085036278, "weighted_loss": 0.1617056280374527 }, { "avg_delta_l": 0.03916081413626671, "avg_loss_unweighted": 0.3124772608280182, "avg_weight": 0.5495955944061279, "std_delta_l": 0.09388484805822372, "std_weight": 0.09054113924503326, "weighted_loss": 0.17636673152446747 }, { "avg_delta_l": 0.03695179894566536, "avg_loss_unweighted": 0.31404054164886475, "avg_weight": 0.5514540076255798, "std_delta_l": 0.07698186486959457, "std_weight": 0.07750439643859863, "weighted_loss": 0.17818856239318848 }, { "avg_delta_l": 0.03377716243267059, "avg_loss_unweighted": 0.33142614364624023, "avg_weight": 0.5740692615509033, "std_delta_l": 0.11822889000177383, "std_weight": 0.11803306639194489, "weighted_loss": 0.1993585079908371 }, { "avg_delta_l": 0.027826620265841484, "avg_loss_unweighted": 0.3215627670288086, "avg_weight": 0.5700395107269287, "std_delta_l": 0.08105122298002243, "std_weight": 0.09343180060386658, "weighted_loss": 0.1879308521747589 }, { "avg_delta_l": 0.026576707139611244, "avg_loss_unweighted": 0.32007279992103577, "avg_weight": 0.5749254822731018, "std_delta_l": 0.09918597340583801, "std_weight": 0.1191043108701706, "weighted_loss": 0.1910400092601776 }, { "avg_delta_l": 0.023027822375297546, "avg_loss_unweighted": 0.30412590503692627, "avg_weight": 0.5732216835021973, "std_delta_l": 0.08581215888261795, "std_weight": 0.0962308794260025, "weighted_loss": 0.18184857070446014 }, { "avg_delta_l": 0.041040197014808655, "avg_loss_unweighted": 0.3074507415294647, "avg_weight": 0.5629310607910156, "std_delta_l": 0.0940471962094307, "std_weight": 0.08785375952720642, "weighted_loss": 0.17780621349811554 }, { "avg_delta_l": 0.04002157226204872, "avg_loss_unweighted": 0.30967405438423157, "avg_weight": 0.5361440777778625, "std_delta_l": 0.07091780751943588, "std_weight": 0.05307943373918533, "weighted_loss": 0.16879889369010925 }, { "avg_delta_l": 0.03404765576124191, "avg_loss_unweighted": 0.31092068552970886, "avg_weight": 0.5506799817085266, "std_delta_l": 0.0747108981013298, "std_weight": 0.07819407433271408, "weighted_loss": 0.17602911591529846 }, { "avg_delta_l": 0.03241525962948799, "avg_loss_unweighted": 0.3212367296218872, "avg_weight": 0.5523432493209839, "std_delta_l": 0.07218249142169952, "std_weight": 0.07989335805177689, "weighted_loss": 0.1803644299507141 }, { "avg_delta_l": 0.0520479753613472, "avg_loss_unweighted": 0.3088188171386719, "avg_weight": 0.5449076294898987, "std_delta_l": 0.09075340628623962, "std_weight": 0.08404015749692917, "weighted_loss": 0.1705493927001953 }, { "avg_delta_l": 0.02969064563512802, "avg_loss_unweighted": 0.29212287068367004, "avg_weight": 0.555463433265686, "std_delta_l": 0.0725395679473877, "std_weight": 0.07039055228233337, "weighted_loss": 0.16686409711837769 }, { "avg_delta_l": 0.03431478142738342, "avg_loss_unweighted": 0.3136028051376343, "avg_weight": 0.5628478527069092, "std_delta_l": 0.08611810952425003, "std_weight": 0.08544968068599701, "weighted_loss": 0.18148350715637207 }, { "epoch": 2.417391304347826, "grad_norm": 0.5170854372264676, "learning_rate": 9.98802925834003e-06, "loss": 0.3602, "step": 140 }, { "avg_delta_l": 0.042998410761356354, "avg_loss_unweighted": 0.310738742351532, "avg_weight": 0.5517234206199646, "std_delta_l": 0.0858965516090393, "std_weight": 0.0737023651599884, "weighted_loss": 0.1749216914176941 }, { "avg_delta_l": 0.028484143316745758, "avg_loss_unweighted": 0.32188260555267334, "avg_weight": 0.5619922280311584, "std_delta_l": 0.08708123862743378, "std_weight": 0.09635089337825775, "weighted_loss": 0.1842799186706543 }, { "avg_delta_l": 0.02859179861843586, "avg_loss_unweighted": 0.3113979697227478, "avg_weight": 0.556490421295166, "std_delta_l": 0.07942584902048111, "std_weight": 0.09198556840419769, "weighted_loss": 0.17753125727176666 }, { "avg_delta_l": 0.0370640903711319, "avg_loss_unweighted": 0.3033445179462433, "avg_weight": 0.5557569265365601, "std_delta_l": 0.08386765420436859, "std_weight": 0.0939638689160347, "weighted_loss": 0.17304767668247223 }, { "avg_delta_l": 0.04996879771351814, "avg_loss_unweighted": 0.31106165051460266, "avg_weight": 0.5332283973693848, "std_delta_l": 0.07673069089651108, "std_weight": 0.05107629671692848, "weighted_loss": 0.168688103556633 }, { "avg_delta_l": 0.03397570550441742, "avg_loss_unweighted": 0.3147435784339905, "avg_weight": 0.5624597668647766, "std_delta_l": 0.0914679691195488, "std_weight": 0.09284018725156784, "weighted_loss": 0.18090002238750458 }, { "avg_delta_l": 0.04134594649076462, "avg_loss_unweighted": 0.3148970305919647, "avg_weight": 0.5496701598167419, "std_delta_l": 0.08472533524036407, "std_weight": 0.08591895550489426, "weighted_loss": 0.17729321122169495 }, { "avg_delta_l": 0.02036041021347046, "avg_loss_unweighted": 0.33121976256370544, "avg_weight": 0.5754024386405945, "std_delta_l": 0.09373028576374054, "std_weight": 0.12013189494609833, "weighted_loss": 0.19365230202674866 }, { "avg_delta_l": 0.033839914947748184, "avg_loss_unweighted": 0.3063850700855255, "avg_weight": 0.5496431589126587, "std_delta_l": 0.0752810686826706, "std_weight": 0.07926821708679199, "weighted_loss": 0.1724427342414856 }, { "avg_delta_l": 0.023653531447052956, "avg_loss_unweighted": 0.3070662319660187, "avg_weight": 0.5631270408630371, "std_delta_l": 0.07953745126724243, "std_weight": 0.09251067787408829, "weighted_loss": 0.17598706483840942 }, { "avg_delta_l": 0.026783660054206848, "avg_loss_unweighted": 0.29498472809791565, "avg_weight": 0.5707302093505859, "std_delta_l": 0.09786196798086166, "std_weight": 0.10037996619939804, "weighted_loss": 0.17355512082576752 }, { "avg_delta_l": 0.030155034735798836, "avg_loss_unweighted": 0.30656811594963074, "avg_weight": 0.5547643899917603, "std_delta_l": 0.06950046122074127, "std_weight": 0.08355754613876343, "weighted_loss": 0.172822043299675 }, { "avg_delta_l": 0.04893834888935089, "avg_loss_unweighted": 0.30478206276893616, "avg_weight": 0.5378935933113098, "std_delta_l": 0.07386361807584763, "std_weight": 0.05912192910909653, "weighted_loss": 0.16665640473365784 }, { "avg_delta_l": 0.002523072063922882, "avg_loss_unweighted": 0.32033902406692505, "avg_weight": 0.5893202424049377, "std_delta_l": 0.08854945749044418, "std_weight": 0.11964622139930725, "weighted_loss": 0.19562746584415436 }, { "avg_delta_l": 0.016283400356769562, "avg_loss_unweighted": 0.3135753870010376, "avg_weight": 0.5783539414405823, "std_delta_l": 0.09906960278749466, "std_weight": 0.11819364130496979, "weighted_loss": 0.18765893578529358 }, { "avg_delta_l": 0.01624893955886364, "avg_loss_unweighted": 0.30847063660621643, "avg_weight": 0.58613520860672, "std_delta_l": 0.10183528810739517, "std_weight": 0.11213138699531555, "weighted_loss": 0.18732905387878418 }, { "avg_delta_l": 0.04997514933347702, "avg_loss_unweighted": 0.3068508207798004, "avg_weight": 0.5567362904548645, "std_delta_l": 0.0987430289387703, "std_weight": 0.07259588688611984, "weighted_loss": 0.17294426262378693 }, { "avg_delta_l": 0.025622988119721413, "avg_loss_unweighted": 0.30263668298721313, "avg_weight": 0.5658217668533325, "std_delta_l": 0.08593571186065674, "std_weight": 0.1008361205458641, "weighted_loss": 0.17575335502624512 }, { "avg_delta_l": 0.016104349866509438, "avg_loss_unweighted": 0.30504781007766724, "avg_weight": 0.5756675004959106, "std_delta_l": 0.08746235817670822, "std_weight": 0.10235615819692612, "weighted_loss": 0.1806108057498932 }, { "avg_delta_l": 0.04217356815934181, "avg_loss_unweighted": 0.3049801290035248, "avg_weight": 0.5702625513076782, "std_delta_l": 0.1103631928563118, "std_weight": 0.10617959499359131, "weighted_loss": 0.17905502021312714 }, { "epoch": 2.591304347826087, "grad_norm": 0.6297735532901086, "learning_rate": 9.975367430262289e-06, "loss": 0.3571, "step": 150 }, { "avg_delta_l": 0.03301140293478966, "avg_loss_unweighted": 0.30710068345069885, "avg_weight": 0.5854387283325195, "std_delta_l": 0.12137877196073532, "std_weight": 0.11745970696210861, "weighted_loss": 0.1897754967212677 }, { "avg_delta_l": 0.0034379600547254086, "avg_loss_unweighted": 0.3244893252849579, "avg_weight": 0.6013844013214111, "std_delta_l": 0.0962478443980217, "std_weight": 0.1299721896648407, "weighted_loss": 0.2013060599565506 }, { "avg_delta_l": 0.027068817988038063, "avg_loss_unweighted": 0.3116914927959442, "avg_weight": 0.5754093527793884, "std_delta_l": 0.0898495763540268, "std_weight": 0.09687294811010361, "weighted_loss": 0.18437202274799347 }, { "avg_delta_l": 0.031551294028759, "avg_loss_unweighted": 0.3066096305847168, "avg_weight": 0.5651648640632629, "std_delta_l": 0.08613172173500061, "std_weight": 0.08545635640621185, "weighted_loss": 0.17774376273155212 }, { "avg_delta_l": 0.018608860671520233, "avg_loss_unweighted": 0.29225921630859375, "avg_weight": 0.5808510184288025, "std_delta_l": 0.10172076523303986, "std_weight": 0.1285494565963745, "weighted_loss": 0.17546918988227844 }, { "avg_delta_l": 0.014098910614848137, "avg_loss_unweighted": 0.3124576508998871, "avg_weight": 0.5887715816497803, "std_delta_l": 0.09504952281713486, "std_weight": 0.11074794083833694, "weighted_loss": 0.19153709709644318 }, { "avg_delta_l": 0.056466177105903625, "avg_loss_unweighted": 0.29821911454200745, "avg_weight": 0.5317636728286743, "std_delta_l": 0.0847586840391159, "std_weight": 0.04776710644364357, "weighted_loss": 0.16240642964839935 }, { "avg_delta_l": 0.03153189271688461, "avg_loss_unweighted": 0.29723209142684937, "avg_weight": 0.5634925365447998, "std_delta_l": 0.08803154528141022, "std_weight": 0.08685842156410217, "weighted_loss": 0.17144371569156647 }, { "avg_delta_l": 0.03080788627266884, "avg_loss_unweighted": 0.2944837212562561, "avg_weight": 0.5716264843940735, "std_delta_l": 0.10693871974945068, "std_weight": 0.1151118353009224, "weighted_loss": 0.1741354614496231 }, { "avg_delta_l": 0.00784388929605484, "avg_loss_unweighted": 0.3161535859107971, "avg_weight": 0.5930792093276978, "std_delta_l": 0.10446744412183762, "std_weight": 0.13089542090892792, "weighted_loss": 0.19316674768924713 }, { "avg_delta_l": 0.02860248275101185, "avg_loss_unweighted": 0.3070221245288849, "avg_weight": 0.5738588571548462, "std_delta_l": 0.10407359898090363, "std_weight": 0.09948358684778214, "weighted_loss": 0.18186676502227783 }, { "avg_delta_l": 0.029494142159819603, "avg_loss_unweighted": 0.3232352137565613, "avg_weight": 0.5735418200492859, "std_delta_l": 0.10220284014940262, "std_weight": 0.10420560836791992, "weighted_loss": 0.18963587284088135 }, { "avg_delta_l": 0.035390499979257584, "avg_loss_unweighted": 0.29255223274230957, "avg_weight": 0.569622814655304, "std_delta_l": 0.10114005953073502, "std_weight": 0.10081636905670166, "weighted_loss": 0.17400114238262177 }, { "avg_delta_l": 0.030125832185149193, "avg_loss_unweighted": 0.3044716417789459, "avg_weight": 0.5584911108016968, "std_delta_l": 0.08231185376644135, "std_weight": 0.08511104434728622, "weighted_loss": 0.17516475915908813 }, { "avg_delta_l": 0.027014978229999542, "avg_loss_unweighted": 0.32523372769355774, "avg_weight": 0.5877562761306763, "std_delta_l": 0.10133005678653717, "std_weight": 0.104185089468956, "weighted_loss": 0.19704800844192505 }, { "avg_delta_l": 0.007397000212222338, "avg_loss_unweighted": 0.32078298926353455, "avg_weight": 0.5868707299232483, "std_delta_l": 0.08301066607236862, "std_weight": 0.10737638175487518, "weighted_loss": 0.19332164525985718 }, { "avg_delta_l": 0.023780254647135735, "avg_loss_unweighted": 0.30479973554611206, "avg_weight": 0.5988821983337402, "std_delta_l": 0.11930784583091736, "std_weight": 0.13558682799339294, "weighted_loss": 0.1892751306295395 }, { "avg_delta_l": 0.021201472729444504, "avg_loss_unweighted": 0.30217549204826355, "avg_weight": 0.5700823664665222, "std_delta_l": 0.08529983460903168, "std_weight": 0.11301208287477493, "weighted_loss": 0.17816109955310822 }, { "avg_delta_l": 0.010287296026945114, "avg_loss_unweighted": 0.3150200843811035, "avg_weight": 0.5930776596069336, "std_delta_l": 0.10333441197872162, "std_weight": 0.1177978366613388, "weighted_loss": 0.1930554360151291 }, { "avg_delta_l": 0.019959421828389168, "avg_loss_unweighted": 0.311036616563797, "avg_weight": 0.5716919898986816, "std_delta_l": 0.09121120721101761, "std_weight": 0.11396466940641403, "weighted_loss": 0.181712806224823 }, { "epoch": 2.765217391304348, "grad_norm": 0.5911868321932829, "learning_rate": 9.958200640155928e-06, "loss": 0.3675, "step": 160 }, { "avg_delta_l": 0.006133800372481346, "avg_loss_unweighted": 0.30233949422836304, "avg_weight": 0.6044599413871765, "std_delta_l": 0.10877612233161926, "std_weight": 0.13554833829402924, "weighted_loss": 0.19182926416397095 }, { "avg_delta_l": 0.029787875711917877, "avg_loss_unweighted": 0.2905745804309845, "avg_weight": 0.5581348538398743, "std_delta_l": 0.07467550784349442, "std_weight": 0.07703803479671478, "weighted_loss": 0.16577230393886566 }, { "avg_delta_l": 0.010300138965249062, "avg_loss_unweighted": 0.3044673800468445, "avg_weight": 0.5998170971870422, "std_delta_l": 0.09646614640951157, "std_weight": 0.11618566513061523, "weighted_loss": 0.1896713376045227 }, { "avg_delta_l": 0.014945215545594692, "avg_loss_unweighted": 0.3072667419910431, "avg_weight": 0.576383113861084, "std_delta_l": 0.08463585376739502, "std_weight": 0.09528454393148422, "weighted_loss": 0.1836763173341751 }, { "avg_delta_l": 0.027661751955747604, "avg_loss_unweighted": 0.3008608818054199, "avg_weight": 0.5745205879211426, "std_delta_l": 0.09408625215291977, "std_weight": 0.09791090339422226, "weighted_loss": 0.17689694464206696 }, { "avg_delta_l": 0.02925269491970539, "avg_loss_unweighted": 0.29600903391838074, "avg_weight": 0.5697059035301208, "std_delta_l": 0.08953654766082764, "std_weight": 0.09351524710655212, "weighted_loss": 0.17439004778862 }, { "avg_delta_l": -0.00976468250155449, "avg_loss_unweighted": 0.32178542017936707, "avg_weight": 0.6156356334686279, "std_delta_l": 0.10232347995042801, "std_weight": 0.14192931354045868, "weighted_loss": 0.20630653202533722 }, { "avg_delta_l": 0.0005801210645586252, "avg_loss_unweighted": 0.3064367175102234, "avg_weight": 0.5929704308509827, "std_delta_l": 0.08212850242853165, "std_weight": 0.11765653640031815, "weighted_loss": 0.1893693208694458 }, { "avg_delta_l": 0.007521163672208786, "avg_loss_unweighted": 0.3117219805717468, "avg_weight": 0.5997936129570007, "std_delta_l": 0.09427577257156372, "std_weight": 0.11357095092535019, "weighted_loss": 0.19375143945217133 }, { "avg_delta_l": 0.03169231489300728, "avg_loss_unweighted": 0.2885124981403351, "avg_weight": 0.5760589241981506, "std_delta_l": 0.1048140898346901, "std_weight": 0.11208866536617279, "weighted_loss": 0.17210403084754944 }, { "avg_delta_l": 0.026061411947011948, "avg_loss_unweighted": 0.29905691742897034, "avg_weight": 0.5848031044006348, "std_delta_l": 0.0994178056716919, "std_weight": 0.11337679624557495, "weighted_loss": 0.1797623485326767 }, { "avg_delta_l": 0.010462051257491112, "avg_loss_unweighted": 0.300773561000824, "avg_weight": 0.601003110408783, "std_delta_l": 0.11290518939495087, "std_weight": 0.13009704649448395, "weighted_loss": 0.19049593806266785 }, { "avg_delta_l": 0.021665651351213455, "avg_loss_unweighted": 0.2912117838859558, "avg_weight": 0.5867242217063904, "std_delta_l": 0.10487432032823563, "std_weight": 0.11372850835323334, "weighted_loss": 0.17812331020832062 }, { "avg_delta_l": 0.031328219920396805, "avg_loss_unweighted": 0.2952151894569397, "avg_weight": 0.5671963691711426, "std_delta_l": 0.09377192705869675, "std_weight": 0.08861123025417328, "weighted_loss": 0.17100149393081665 }, { "avg_delta_l": 0.020451296120882034, "avg_loss_unweighted": 0.30682289600372314, "avg_weight": 0.5919762253761292, "std_delta_l": 0.10494504868984222, "std_weight": 0.11025001108646393, "weighted_loss": 0.18998976051807404 }, { "avg_delta_l": 0.04762883856892586, "avg_loss_unweighted": 0.2852179706096649, "avg_weight": 0.5741288661956787, "std_delta_l": 0.11247563362121582, "std_weight": 0.11394599080085754, "weighted_loss": 0.17104972898960114 }, { "avg_delta_l": 0.016458239406347275, "avg_loss_unweighted": 0.30339136719703674, "avg_weight": 0.5815475583076477, "std_delta_l": 0.09113394469022751, "std_weight": 0.10275106877088547, "weighted_loss": 0.18029534816741943 }, { "avg_delta_l": 0.0269242525100708, "avg_loss_unweighted": 0.30112990736961365, "avg_weight": 0.5715745091438293, "std_delta_l": 0.08708041906356812, "std_weight": 0.09091410785913467, "weighted_loss": 0.1752837896347046 }, { "avg_delta_l": 0.0016909982077777386, "avg_loss_unweighted": 0.30830857157707214, "avg_weight": 0.6015232801437378, "std_delta_l": 0.10040076076984406, "std_weight": 0.13012221455574036, "weighted_loss": 0.19207945466041565 }, { "avg_delta_l": 0.027972033247351646, "avg_loss_unweighted": 0.2842269837856293, "avg_weight": 0.5918493270874023, "std_delta_l": 0.1051461398601532, "std_weight": 0.10639525204896927, "weighted_loss": 0.1750052273273468 }, { "epoch": 2.9391304347826086, "grad_norm": 0.550900987449537, "learning_rate": 9.936544431744839e-06, "loss": 0.3647, "step": 170 }, { "avg_delta_l": 0.026537645608186722, "avg_loss_unweighted": 0.29575273394584656, "avg_weight": 0.5725399851799011, "std_delta_l": 0.10136236995458603, "std_weight": 0.08476404845714569, "weighted_loss": 0.17417529225349426 }, { "avg_delta_l": 0.015571675263345242, "avg_loss_unweighted": 0.3029303252696991, "avg_weight": 0.5940877199172974, "std_delta_l": 0.09427326172590256, "std_weight": 0.11212538182735443, "weighted_loss": 0.18544739484786987 }, { "avg_delta_l": 0.02735673449933529, "avg_loss_unweighted": 0.2834835946559906, "avg_weight": 0.5692064166069031, "std_delta_l": 0.09609398245811462, "std_weight": 0.10921913385391235, "weighted_loss": 0.1653435230255127 }, { "avg_delta_l": 0.04144305735826492, "avg_loss_unweighted": 0.27916738390922546, "avg_weight": 0.5615522265434265, "std_delta_l": 0.09138951450586319, "std_weight": 0.07948474586009979, "weighted_loss": 0.16181601583957672 }, { "avg_delta_l": 0.004525887779891491, "avg_loss_unweighted": 0.3128912150859833, "avg_weight": 0.5945740342140198, "std_delta_l": 0.09748748689889908, "std_weight": 0.11242331564426422, "weighted_loss": 0.19213703274726868 }, { "avg_delta_l": -0.012920490466058254, "avg_loss_unweighted": 0.30981579422950745, "avg_weight": 0.6172437071800232, "std_delta_l": 0.08421831578016281, "std_weight": 0.12433348596096039, "weighted_loss": 0.19900144636631012 }, { "avg_delta_l": 0.017945248633623123, "avg_loss_unweighted": 0.29415470361709595, "avg_weight": 0.581298828125, "std_delta_l": 0.09475836157798767, "std_weight": 0.1075861006975174, "weighted_loss": 0.17723868787288666 }, { "avg_delta_l": 0.029713813215494156, "avg_loss_unweighted": 0.27310866117477417, "avg_weight": 0.5345670580863953, "std_delta_l": 0.06589744240045547, "std_weight": 0.0616687536239624, "weighted_loss": 0.14769524335861206 }, { "avg_delta_l": 0.04237682372331619, "avg_loss_unweighted": 0.26527562737464905, "avg_weight": 0.5329339504241943, "std_delta_l": 0.06957416981458664, "std_weight": 0.05989191308617592, "weighted_loss": 0.1440335065126419 }, { "avg_delta_l": 0.03786222264170647, "avg_loss_unweighted": 0.26556751132011414, "avg_weight": 0.5217189192771912, "std_delta_l": 0.05357944220304489, "std_weight": 0.040867697447538376, "weighted_loss": 0.14013144373893738 }, { "avg_delta_l": 0.036248259246349335, "avg_loss_unweighted": 0.26527419686317444, "avg_weight": 0.5294366478919983, "std_delta_l": 0.06442193686962128, "std_weight": 0.0521269291639328, "weighted_loss": 0.1423310488462448 }, { "avg_delta_l": 0.03267838433384895, "avg_loss_unweighted": 0.27717918157577515, "avg_weight": 0.5293540358543396, "std_delta_l": 0.05937223136425018, "std_weight": 0.037316106259822845, "weighted_loss": 0.14889837801456451 }, { "avg_delta_l": 0.0283013004809618, "avg_loss_unweighted": 0.25230517983436584, "avg_weight": 0.5434218049049377, "std_delta_l": 0.07414273172616959, "std_weight": 0.06050749123096466, "weighted_loss": 0.1400381326675415 }, { "avg_delta_l": 0.03623158484697342, "avg_loss_unweighted": 0.2636779248714447, "avg_weight": 0.5341254472732544, "std_delta_l": 0.061844851821660995, "std_weight": 0.05106770247220993, "weighted_loss": 0.14431150257587433 }, { "avg_delta_l": 0.032644517719745636, "avg_loss_unweighted": 0.27942872047424316, "avg_weight": 0.5395035743713379, "std_delta_l": 0.06685009598731995, "std_weight": 0.05300307646393776, "weighted_loss": 0.15450088679790497 }, { "avg_delta_l": 0.025517093017697334, "avg_loss_unweighted": 0.267502099275589, "avg_weight": 0.5421650409698486, "std_delta_l": 0.060364823788404465, "std_weight": 0.06017884239554405, "weighted_loss": 0.14824281632900238 }, { "avg_delta_l": 0.03424116224050522, "avg_loss_unweighted": 0.26837027072906494, "avg_weight": 0.5554147362709045, "std_delta_l": 0.08895385265350342, "std_weight": 0.10147908329963684, "weighted_loss": 0.1530066579580307 }, { "avg_delta_l": 0.04255843535065651, "avg_loss_unweighted": 0.2645798623561859, "avg_weight": 0.5407106876373291, "std_delta_l": 0.07232695817947388, "std_weight": 0.05501794070005417, "weighted_loss": 0.14610058069229126 }, { "avg_delta_l": 0.022036312147974968, "avg_loss_unweighted": 0.285745233297348, "avg_weight": 0.549157977104187, "std_delta_l": 0.06836108863353729, "std_weight": 0.07464267313480377, "weighted_loss": 0.16028320789337158 }, { "epoch": 3.1043478260869564, "grad_norm": 0.5975131377481779, "learning_rate": 9.91041841371078e-06, "loss": 0.3025, "step": 180 }, { "avg_delta_l": 0.03761916235089302, "avg_loss_unweighted": 0.2700180411338806, "avg_weight": 0.5319543480873108, "std_delta_l": 0.055756282061338425, "std_weight": 0.050807755440473557, "weighted_loss": 0.1459462195634842 }, { "avg_delta_l": 0.03890223428606987, "avg_loss_unweighted": 0.2549828886985779, "avg_weight": 0.5332978367805481, "std_delta_l": 0.07434648275375366, "std_weight": 0.066595658659935, "weighted_loss": 0.1394035518169403 }, { "avg_delta_l": 0.021774480119347572, "avg_loss_unweighted": 0.27746516466140747, "avg_weight": 0.5563706755638123, "std_delta_l": 0.07647085189819336, "std_weight": 0.09353747963905334, "weighted_loss": 0.15799842774868011 }, { "avg_delta_l": 0.03194907307624817, "avg_loss_unweighted": 0.2797035574913025, "avg_weight": 0.5357585549354553, "std_delta_l": 0.06083492562174797, "std_weight": 0.06659814715385437, "weighted_loss": 0.15331219136714935 }, { "avg_delta_l": 0.03079039976000786, "avg_loss_unweighted": 0.26267969608306885, "avg_weight": 0.5334467887878418, "std_delta_l": 0.06247498467564583, "std_weight": 0.060183942317962646, "weighted_loss": 0.14246521890163422 }, { "avg_delta_l": 0.0308360755443573, "avg_loss_unweighted": 0.2685239911079407, "avg_weight": 0.5592827200889587, "std_delta_l": 0.08168445527553558, "std_weight": 0.08603944629430771, "weighted_loss": 0.1550048291683197 }, { "avg_delta_l": 0.01728927530348301, "avg_loss_unweighted": 0.2832055687904358, "avg_weight": 0.5626421570777893, "std_delta_l": 0.06794313341379166, "std_weight": 0.08510570228099823, "weighted_loss": 0.16215652227401733 }, { "avg_delta_l": 0.0267656110227108, "avg_loss_unweighted": 0.271925687789917, "avg_weight": 0.5498865246772766, "std_delta_l": 0.0623871311545372, "std_weight": 0.07184014469385147, "weighted_loss": 0.15213583409786224 }, { "avg_delta_l": 0.016910182312130928, "avg_loss_unweighted": 0.2640504539012909, "avg_weight": 0.5659065842628479, "std_delta_l": 0.07316520810127258, "std_weight": 0.08576389402151108, "weighted_loss": 0.15372304618358612 }, { "avg_delta_l": 0.028933750465512276, "avg_loss_unweighted": 0.26188012957572937, "avg_weight": 0.5546417832374573, "std_delta_l": 0.07549473643302917, "std_weight": 0.08604920655488968, "weighted_loss": 0.1495038866996765 }, { "avg_delta_l": 0.01098401565104723, "avg_loss_unweighted": 0.2802900969982147, "avg_weight": 0.5611764192581177, "std_delta_l": 0.06443975120782852, "std_weight": 0.08670645952224731, "weighted_loss": 0.15937042236328125 }, { "avg_delta_l": 0.044881608337163925, "avg_loss_unweighted": 0.25602439045906067, "avg_weight": 0.538140058517456, "std_delta_l": 0.07776200026273727, "std_weight": 0.07072542607784271, "weighted_loss": 0.14056861400604248 }, { "avg_delta_l": 0.02864101156592369, "avg_loss_unweighted": 0.26810771226882935, "avg_weight": 0.5514994263648987, "std_delta_l": 0.06937252730131149, "std_weight": 0.05954289436340332, "weighted_loss": 0.15102918446063995 }, { "avg_delta_l": 0.026450572535395622, "avg_loss_unweighted": 0.2747378945350647, "avg_weight": 0.5567503571510315, "std_delta_l": 0.07669539004564285, "std_weight": 0.07940898090600967, "weighted_loss": 0.15652090311050415 }, { "avg_delta_l": 0.014574448578059673, "avg_loss_unweighted": 0.26708275079727173, "avg_weight": 0.5704107880592346, "std_delta_l": 0.08354394137859344, "std_weight": 0.11202509701251984, "weighted_loss": 0.1581411063671112 }, { "avg_delta_l": 0.04477774724364281, "avg_loss_unweighted": 0.2667483389377594, "avg_weight": 0.5319807529449463, "std_delta_l": 0.07311510294675827, "std_weight": 0.05533599480986595, "weighted_loss": 0.14470018446445465 }, { "avg_delta_l": 0.04209842532873154, "avg_loss_unweighted": 0.27328503131866455, "avg_weight": 0.5461872220039368, "std_delta_l": 0.08075668662786484, "std_weight": 0.06953100860118866, "weighted_loss": 0.15238316357135773 }, { "avg_delta_l": 0.02803366258740425, "avg_loss_unweighted": 0.26001548767089844, "avg_weight": 0.5475224256515503, "std_delta_l": 0.08061651140451431, "std_weight": 0.08176257461309433, "weighted_loss": 0.14756540954113007 }, { "avg_delta_l": 0.024758152663707733, "avg_loss_unweighted": 0.271634966135025, "avg_weight": 0.5466726422309875, "std_delta_l": 0.06420181691646576, "std_weight": 0.0707017332315445, "weighted_loss": 0.1518552154302597 }, { "avg_delta_l": 0.038986507803201675, "avg_loss_unweighted": 0.25531136989593506, "avg_weight": 0.5416052937507629, "std_delta_l": 0.07465767115354538, "std_weight": 0.06681425869464874, "weighted_loss": 0.1408923715353012 }, { "epoch": 3.2782608695652176, "grad_norm": 0.5444838226637001, "learning_rate": 9.87984624193864e-06, "loss": 0.3015, "step": 190 }, { "avg_delta_l": 0.04748285561800003, "avg_loss_unweighted": 0.24311663210391998, "avg_weight": 0.5399395227432251, "std_delta_l": 0.07821369916200638, "std_weight": 0.07156622409820557, "weighted_loss": 0.13498049974441528 }, { "avg_delta_l": 0.0394781157374382, "avg_loss_unweighted": 0.24808402359485626, "avg_weight": 0.5374687314033508, "std_delta_l": 0.07325731962919235, "std_weight": 0.060452938079833984, "weighted_loss": 0.1360354870557785 }, { "avg_delta_l": 0.029300089925527573, "avg_loss_unweighted": 0.27359509468078613, "avg_weight": 0.55894535779953, "std_delta_l": 0.0824345275759697, "std_weight": 0.0965067446231842, "weighted_loss": 0.15824751555919647 }, { "avg_delta_l": 0.027674928307533264, "avg_loss_unweighted": 0.26594337821006775, "avg_weight": 0.5668303966522217, "std_delta_l": 0.09527195245027542, "std_weight": 0.09922946244478226, "weighted_loss": 0.15599121153354645 }, { "avg_delta_l": 0.04025159776210785, "avg_loss_unweighted": 0.2620602250099182, "avg_weight": 0.5338308215141296, "std_delta_l": 0.06681255251169205, "std_weight": 0.06292210519313812, "weighted_loss": 0.14130015671253204 }, { "avg_delta_l": 0.024233030155301094, "avg_loss_unweighted": 0.26848798990249634, "avg_weight": 0.5800108313560486, "std_delta_l": 0.0886930599808693, "std_weight": 0.0927317813038826, "weighted_loss": 0.16177460551261902 }, { "avg_delta_l": 0.030315976589918137, "avg_loss_unweighted": 0.24957235157489777, "avg_weight": 0.5545258522033691, "std_delta_l": 0.07728360593318939, "std_weight": 0.08043602854013443, "weighted_loss": 0.14257551729679108 }, { "avg_delta_l": 0.005140320397913456, "avg_loss_unweighted": 0.2771289050579071, "avg_weight": 0.5703087449073792, "std_delta_l": 0.06486357748508453, "std_weight": 0.0949360579252243, "weighted_loss": 0.1630794256925583 }, { "avg_delta_l": 0.015953626483678818, "avg_loss_unweighted": 0.27373385429382324, "avg_weight": 0.5717732906341553, "std_delta_l": 0.07126154750585556, "std_weight": 0.08228856325149536, "weighted_loss": 0.16085200011730194 }, { "avg_delta_l": 0.03486095741391182, "avg_loss_unweighted": 0.25923311710357666, "avg_weight": 0.5440599918365479, "std_delta_l": 0.08259174227714539, "std_weight": 0.07502323389053345, "weighted_loss": 0.14410407841205597 }, { "avg_delta_l": 0.02094588801264763, "avg_loss_unweighted": 0.2617586851119995, "avg_weight": 0.5561248064041138, "std_delta_l": 0.07556591182947159, "std_weight": 0.09828191995620728, "weighted_loss": 0.14850351214408875 }, { "avg_delta_l": 0.006903247907757759, "avg_loss_unweighted": 0.26887547969818115, "avg_weight": 0.5771740078926086, "std_delta_l": 0.08988916128873825, "std_weight": 0.1194368451833725, "weighted_loss": 0.1621273010969162 }, { "avg_delta_l": 0.020819850265979767, "avg_loss_unweighted": 0.28017231822013855, "avg_weight": 0.5561807155609131, "std_delta_l": 0.0881006121635437, "std_weight": 0.09854961931705475, "weighted_loss": 0.16094925999641418 }, { "avg_delta_l": 0.014828715473413467, "avg_loss_unweighted": 0.2741553783416748, "avg_weight": 0.5770451426506042, "std_delta_l": 0.09025299549102783, "std_weight": 0.11214262247085571, "weighted_loss": 0.16250719130039215 }, { "avg_delta_l": 0.007021431811153889, "avg_loss_unweighted": 0.27177903056144714, "avg_weight": 0.5797998309135437, "std_delta_l": 0.09173190593719482, "std_weight": 0.12065385282039642, "weighted_loss": 0.16330985724925995 }, { "avg_delta_l": -0.007244094740599394, "avg_loss_unweighted": 0.28208956122398376, "avg_weight": 0.6018142104148865, "std_delta_l": 0.09983442723751068, "std_weight": 0.14567314088344574, "weighted_loss": 0.17638900876045227 }, { "avg_delta_l": 0.04183369502425194, "avg_loss_unweighted": 0.25601261854171753, "avg_weight": 0.5408201217651367, "std_delta_l": 0.07323620468378067, "std_weight": 0.06900502741336823, "weighted_loss": 0.14134769141674042 }, { "avg_delta_l": 0.025043215602636337, "avg_loss_unweighted": 0.26668912172317505, "avg_weight": 0.5622579455375671, "std_delta_l": 0.07888053357601166, "std_weight": 0.09473560750484467, "weighted_loss": 0.15493136644363403 }, { "avg_delta_l": 0.006335943005979061, "avg_loss_unweighted": 0.2825203835964203, "avg_weight": 0.5727135539054871, "std_delta_l": 0.07024237513542175, "std_weight": 0.1000305712223053, "weighted_loss": 0.16578590869903564 }, { "avg_delta_l": 0.006763104349374771, "avg_loss_unweighted": 0.2800004780292511, "avg_weight": 0.5856332778930664, "std_delta_l": 0.08605509996414185, "std_weight": 0.10309576243162155, "weighted_loss": 0.16892996430397034 }, { "epoch": 3.4521739130434783, "grad_norm": 0.6199137364194383, "learning_rate": 9.844855598097138e-06, "loss": 0.3104, "step": 200 }, { "avg_delta_l": 0.02743351459503174, "avg_loss_unweighted": 0.25980690121650696, "avg_weight": 0.5566321611404419, "std_delta_l": 0.0889536440372467, "std_weight": 0.09481742978096008, "weighted_loss": 0.14754575490951538 }, { "avg_delta_l": 0.02227971889078617, "avg_loss_unweighted": 0.2704847753047943, "avg_weight": 0.5620359778404236, "std_delta_l": 0.0814022645354271, "std_weight": 0.0913088396191597, "weighted_loss": 0.15620344877243042 }, { "avg_delta_l": 0.027944369241595268, "avg_loss_unweighted": 0.2631215453147888, "avg_weight": 0.5583649277687073, "std_delta_l": 0.07471009343862534, "std_weight": 0.08577363193035126, "weighted_loss": 0.14936389029026031 }, { "avg_delta_l": 0.029923090711236, "avg_loss_unweighted": 0.2568562626838684, "avg_weight": 0.558116614818573, "std_delta_l": 0.08692973852157593, "std_weight": 0.09835247695446014, "weighted_loss": 0.14706556499004364 }, { "avg_delta_l": 0.006727623753249645, "avg_loss_unweighted": 0.2688196003437042, "avg_weight": 0.5765517354011536, "std_delta_l": 0.0807337537407875, "std_weight": 0.11151419579982758, "weighted_loss": 0.1612635999917984 }, { "avg_delta_l": 0.019370175898075104, "avg_loss_unweighted": 0.26998016238212585, "avg_weight": 0.5602870583534241, "std_delta_l": 0.07556893676519394, "std_weight": 0.08765214681625366, "weighted_loss": 0.15619678795337677 }, { "avg_delta_l": 0.034576334059238434, "avg_loss_unweighted": 0.25530529022216797, "avg_weight": 0.5592368841171265, "std_delta_l": 0.08766478300094604, "std_weight": 0.08267111331224442, "weighted_loss": 0.14655888080596924 }, { "avg_delta_l": 0.038636937737464905, "avg_loss_unweighted": 0.26139718294143677, "avg_weight": 0.5571210384368896, "std_delta_l": 0.09104517102241516, "std_weight": 0.091854989528656, "weighted_loss": 0.14892128109931946 }, { "avg_delta_l": 0.019900884479284286, "avg_loss_unweighted": 0.2796992063522339, "avg_weight": 0.5665146112442017, "std_delta_l": 0.086662657558918, "std_weight": 0.10579211264848709, "weighted_loss": 0.1634603589773178 }, { "avg_delta_l": 0.003359459340572357, "avg_loss_unweighted": 0.27314409613609314, "avg_weight": 0.5905882120132446, "std_delta_l": 0.08845258504152298, "std_weight": 0.11012622714042664, "weighted_loss": 0.16758191585540771 }, { "avg_delta_l": 0.03165031597018242, "avg_loss_unweighted": 0.24568253755569458, "avg_weight": 0.5530843138694763, "std_delta_l": 0.08888628333806992, "std_weight": 0.08306212723255157, "weighted_loss": 0.1397993564605713 }, { "avg_delta_l": 0.024133790284395218, "avg_loss_unweighted": 0.25328460335731506, "avg_weight": 0.5488121509552002, "std_delta_l": 0.06682655960321426, "std_weight": 0.07789932191371918, "weighted_loss": 0.14394158124923706 }, { "avg_delta_l": 0.03920155018568039, "avg_loss_unweighted": 0.26694172620773315, "avg_weight": 0.532231867313385, "std_delta_l": 0.06098780035972595, "std_weight": 0.05945124477148056, "weighted_loss": 0.14327526092529297 }, { "avg_delta_l": -0.0037494113203138113, "avg_loss_unweighted": 0.2775689959526062, "avg_weight": 0.5958917737007141, "std_delta_l": 0.09654179960489273, "std_weight": 0.12711873650550842, "weighted_loss": 0.1730237603187561 }, { "avg_delta_l": 0.04365552216768265, "avg_loss_unweighted": 0.24986979365348816, "avg_weight": 0.5531938672065735, "std_delta_l": 0.09143228828907013, "std_weight": 0.09554613381624222, "weighted_loss": 0.14221560955047607 }, { "avg_delta_l": 0.01684064045548439, "avg_loss_unweighted": 0.2693435549736023, "avg_weight": 0.5638300180435181, "std_delta_l": 0.08186186105012894, "std_weight": 0.09169520437717438, "weighted_loss": 0.15542951226234436 }, { "avg_delta_l": 0.03930174559354782, "avg_loss_unweighted": 0.2504051923751831, "avg_weight": 0.5488225817680359, "std_delta_l": 0.08555278182029724, "std_weight": 0.08024322241544724, "weighted_loss": 0.14036111533641815 }, { "avg_delta_l": 0.028720704838633537, "avg_loss_unweighted": 0.25199586153030396, "avg_weight": 0.556788444519043, "std_delta_l": 0.08013488352298737, "std_weight": 0.08170044422149658, "weighted_loss": 0.14413601160049438 }, { "avg_delta_l": 0.008838748559355736, "avg_loss_unweighted": 0.2643531858921051, "avg_weight": 0.5718996524810791, "std_delta_l": 0.08169133216142654, "std_weight": 0.09623847156763077, "weighted_loss": 0.15698538720607758 }, { "avg_delta_l": 0.025094153359532356, "avg_loss_unweighted": 0.2637802064418793, "avg_weight": 0.559851884841919, "std_delta_l": 0.07926557213068008, "std_weight": 0.08687774091959, "weighted_loss": 0.15231063961982727 }, { "epoch": 3.626086956521739, "grad_norm": 0.5630064163453445, "learning_rate": 9.805478164574374e-06, "loss": 0.3036, "step": 210 }, { "avg_delta_l": -0.0061105042695999146, "avg_loss_unweighted": 0.27016907930374146, "avg_weight": 0.6102843880653381, "std_delta_l": 0.09819750487804413, "std_weight": 0.13487307727336884, "weighted_loss": 0.17110055685043335 }, { "avg_delta_l": 0.018742825835943222, "avg_loss_unweighted": 0.2712828814983368, "avg_weight": 0.5741217732429504, "std_delta_l": 0.07979816943407059, "std_weight": 0.09943455457687378, "weighted_loss": 0.15980255603790283 }, { "avg_delta_l": 0.013295512646436691, "avg_loss_unweighted": 0.26562461256980896, "avg_weight": 0.5766966342926025, "std_delta_l": 0.08235561847686768, "std_weight": 0.09700751304626465, "weighted_loss": 0.15718300640583038 }, { "avg_delta_l": -0.004819291643798351, "avg_loss_unweighted": 0.27312442660331726, "avg_weight": 0.5927308797836304, "std_delta_l": 0.08964505046606064, "std_weight": 0.1298186480998993, "weighted_loss": 0.16791760921478271 }, { "avg_delta_l": 0.02877928875386715, "avg_loss_unweighted": 0.2751935124397278, "avg_weight": 0.5629420280456543, "std_delta_l": 0.0791466161608696, "std_weight": 0.07807708531618118, "weighted_loss": 0.1588110476732254 }, { "avg_delta_l": 0.03598771616816521, "avg_loss_unweighted": 0.2569040358066559, "avg_weight": 0.5421684384346008, "std_delta_l": 0.06597773730754852, "std_weight": 0.06127109378576279, "weighted_loss": 0.14226682484149933 }, { "avg_delta_l": 0.00813376996666193, "avg_loss_unweighted": 0.2635095417499542, "avg_weight": 0.5915670394897461, "std_delta_l": 0.08338338881731033, "std_weight": 0.10447560250759125, "weighted_loss": 0.1608380377292633 }, { "avg_delta_l": 0.010829217731952667, "avg_loss_unweighted": 0.26646512746810913, "avg_weight": 0.580963134765625, "std_delta_l": 0.09376443922519684, "std_weight": 0.11566506326198578, "weighted_loss": 0.1599007099866867 }, { "avg_delta_l": 0.017830535769462585, "avg_loss_unweighted": 0.24825841188430786, "avg_weight": 0.5732647180557251, "std_delta_l": 0.08557199686765671, "std_weight": 0.09722540527582169, "weighted_loss": 0.14820018410682678 }, { "avg_delta_l": 0.0003112363629043102, "avg_loss_unweighted": 0.2759259343147278, "avg_weight": 0.5934813022613525, "std_delta_l": 0.09069332480430603, "std_weight": 0.11762817949056625, "weighted_loss": 0.1694624423980713 }, { "avg_delta_l": 0.012821967713534832, "avg_loss_unweighted": 0.2658672034740448, "avg_weight": 0.5722475051879883, "std_delta_l": 0.09216421842575073, "std_weight": 0.11684737354516983, "weighted_loss": 0.1578620970249176 }, { "avg_delta_l": 0.00979647971689701, "avg_loss_unweighted": 0.28396856784820557, "avg_weight": 0.5870572328567505, "std_delta_l": 0.10237212479114532, "std_weight": 0.11908897012472153, "weighted_loss": 0.1719394326210022 }, { "avg_delta_l": 0.008904391899704933, "avg_loss_unweighted": 0.2612650990486145, "avg_weight": 0.5826662182807922, "std_delta_l": 0.08247274905443192, "std_weight": 0.11560987681150436, "weighted_loss": 0.15643291175365448 }, { "avg_delta_l": 0.012132684700191021, "avg_loss_unweighted": 0.2554279863834381, "avg_weight": 0.595228374004364, "std_delta_l": 0.10602464526891708, "std_weight": 0.13764122128486633, "weighted_loss": 0.15804271399974823 }, { "avg_delta_l": 0.007750160992145538, "avg_loss_unweighted": 0.2635039985179901, "avg_weight": 0.579934298992157, "std_delta_l": 0.08669904619455338, "std_weight": 0.12302729487419128, "weighted_loss": 0.15922707319259644 }, { "avg_delta_l": 0.015283934772014618, "avg_loss_unweighted": 0.2539255917072296, "avg_weight": 0.5652536749839783, "std_delta_l": 0.06727532297372818, "std_weight": 0.08286447823047638, "weighted_loss": 0.14633134007453918 }, { "avg_delta_l": 0.023900877684354782, "avg_loss_unweighted": 0.2629268765449524, "avg_weight": 0.5690685510635376, "std_delta_l": 0.08739190548658371, "std_weight": 0.09678404033184052, "weighted_loss": 0.15269909799098969 }, { "avg_delta_l": 0.004215589724481106, "avg_loss_unweighted": 0.263738751411438, "avg_weight": 0.5865025520324707, "std_delta_l": 0.09170800447463989, "std_weight": 0.11969190835952759, "weighted_loss": 0.16165156662464142 }, { "avg_delta_l": 0.025843260809779167, "avg_loss_unweighted": 0.2753400206565857, "avg_weight": 0.5652322173118591, "std_delta_l": 0.0761726126074791, "std_weight": 0.07270815968513489, "weighted_loss": 0.1599368453025818 }, { "avg_delta_l": 0.00910801999270916, "avg_loss_unweighted": 0.25803858041763306, "avg_weight": 0.5840824842453003, "std_delta_l": 0.08871788531541824, "std_weight": 0.11199401319026947, "weighted_loss": 0.15538568794727325 }, { "epoch": 3.8, "grad_norm": 0.614880364236754, "learning_rate": 9.761749595790907e-06, "loss": 0.3175, "step": 220 }, { "avg_delta_l": 0.008949999697506428, "avg_loss_unweighted": 0.26361382007598877, "avg_weight": 0.5875194668769836, "std_delta_l": 0.09190472215414047, "std_weight": 0.10380861163139343, "weighted_loss": 0.16331037878990173 }, { "avg_delta_l": 0.008498532697558403, "avg_loss_unweighted": 0.24654719233512878, "avg_weight": 0.5825494527816772, "std_delta_l": 0.07699260115623474, "std_weight": 0.09040646255016327, "weighted_loss": 0.14861714839935303 }, { "avg_delta_l": 0.011849774047732353, "avg_loss_unweighted": 0.2643030881881714, "avg_weight": 0.5831953287124634, "std_delta_l": 0.07944957166910172, "std_weight": 0.09869883954524994, "weighted_loss": 0.1590207815170288 }, { "avg_delta_l": 0.007918551564216614, "avg_loss_unweighted": 0.25710752606391907, "avg_weight": 0.5931588411331177, "std_delta_l": 0.09022355079650879, "std_weight": 0.1161484494805336, "weighted_loss": 0.15898534655570984 }, { "avg_delta_l": -0.0022813421674072742, "avg_loss_unweighted": 0.25819969177246094, "avg_weight": 0.6160919070243835, "std_delta_l": 0.10317188501358032, "std_weight": 0.12395758926868439, "weighted_loss": 0.1668773889541626 }, { "avg_delta_l": 0.026993408799171448, "avg_loss_unweighted": 0.26526740193367004, "avg_weight": 0.5722802877426147, "std_delta_l": 0.09569071233272552, "std_weight": 0.10895846784114838, "weighted_loss": 0.1573610007762909 }, { "avg_delta_l": 0.004723793361335993, "avg_loss_unweighted": 0.27098050713539124, "avg_weight": 0.5910596251487732, "std_delta_l": 0.09028255194425583, "std_weight": 0.12171245366334915, "weighted_loss": 0.1637151837348938 }, { "avg_delta_l": 0.023722892627120018, "avg_loss_unweighted": 0.2570474445819855, "avg_weight": 0.5855447053909302, "std_delta_l": 0.10860302299261093, "std_weight": 0.11563372611999512, "weighted_loss": 0.15689143538475037 }, { "avg_delta_l": -0.007816582918167114, "avg_loss_unweighted": 0.27049896121025085, "avg_weight": 0.6122379302978516, "std_delta_l": 0.09289495646953583, "std_weight": 0.13153333961963654, "weighted_loss": 0.17229999601840973 }, { "avg_delta_l": 0.01153617911040783, "avg_loss_unweighted": 0.26334866881370544, "avg_weight": 0.5813209414482117, "std_delta_l": 0.08351358771324158, "std_weight": 0.10587695240974426, "weighted_loss": 0.1564939022064209 }, { "avg_delta_l": 0.014356577768921852, "avg_loss_unweighted": 0.2610740065574646, "avg_weight": 0.5838754177093506, "std_delta_l": 0.09857744723558426, "std_weight": 0.12072256207466125, "weighted_loss": 0.1579001247882843 }, { "avg_delta_l": 0.011129133403301239, "avg_loss_unweighted": 0.2771175503730774, "avg_weight": 0.5818697214126587, "std_delta_l": 0.08040887862443924, "std_weight": 0.09881656616926193, "weighted_loss": 0.1658954918384552 }, { "avg_delta_l": 0.0009276669006794691, "avg_loss_unweighted": 0.2582229673862457, "avg_weight": 0.595759928226471, "std_delta_l": 0.09338940680027008, "std_weight": 0.1241258755326271, "weighted_loss": 0.1607307493686676 }, { "avg_delta_l": 0.030937988311052322, "avg_loss_unweighted": 0.24836492538452148, "avg_weight": 0.5483681559562683, "std_delta_l": 0.07874396443367004, "std_weight": 0.08409389853477478, "weighted_loss": 0.1392272263765335 }, { "avg_delta_l": 0.0005769631825387478, "avg_loss_unweighted": 0.27628353238105774, "avg_weight": 0.6028632521629333, "std_delta_l": 0.1034807339310646, "std_weight": 0.12760531902313232, "weighted_loss": 0.1735897660255432 }, { "avg_delta_l": 0.0006299708038568497, "avg_loss_unweighted": 0.2565305233001709, "avg_weight": 0.599833607673645, "std_delta_l": 0.09453439712524414, "std_weight": 0.11403297632932663, "weighted_loss": 0.1596444696187973 }, { "avg_delta_l": 0.01924983784556389, "avg_loss_unweighted": 0.2533973455429077, "avg_weight": 0.590130090713501, "std_delta_l": 0.11038278043270111, "std_weight": 0.12527333199977875, "weighted_loss": 0.15524449944496155 }, { "avg_delta_l": 0.006133701652288437, "avg_loss_unweighted": 0.25738564133644104, "avg_weight": 0.5792673826217651, "std_delta_l": 0.08464352041482925, "std_weight": 0.10171469300985336, "weighted_loss": 0.15356121957302094 }, { "avg_delta_l": 0.0004960452206432819, "avg_loss_unweighted": 0.26023682951927185, "avg_weight": 0.5962468385696411, "std_delta_l": 0.09340096265077591, "std_weight": 0.10870984196662903, "weighted_loss": 0.15963903069496155 }, { "avg_delta_l": 0.01337285153567791, "avg_loss_unweighted": 0.25428295135498047, "avg_weight": 0.5762659311294556, "std_delta_l": 0.08913671225309372, "std_weight": 0.11733081936836243, "weighted_loss": 0.15200449526309967 }, { "epoch": 3.973913043478261, "grad_norm": 0.6127191710063332, "learning_rate": 9.713709485916357e-06, "loss": 0.3181, "step": 230 }, { "avg_delta_l": -0.016564300283789635, "avg_loss_unweighted": 0.27528488636016846, "avg_weight": 0.6302037239074707, "std_delta_l": 0.10786031186580658, "std_weight": 0.14663882553577423, "weighted_loss": 0.18153567612171173 }, { "avg_delta_l": 0.0003169747069478035, "avg_loss_unweighted": 0.2714025676250458, "avg_weight": 0.6020900011062622, "std_delta_l": 0.10153961926698685, "std_weight": 0.12183253467082977, "weighted_loss": 0.16887113451957703 }, { "avg_delta_l": 0.01802874356508255, "avg_loss_unweighted": 0.24253037571907043, "avg_weight": 0.5803259611129761, "std_delta_l": 0.08462043106555939, "std_weight": 0.09834115952253342, "weighted_loss": 0.1458134949207306 }, { "avg_delta_l": 0.023966655135154724, "avg_loss_unweighted": 0.22488529980182648, "avg_weight": 0.5461320877075195, "std_delta_l": 0.06514745205640793, "std_weight": 0.07466763257980347, "weighted_loss": 0.12598761916160583 }, { "avg_delta_l": 0.03633418679237366, "avg_loss_unweighted": 0.2343883216381073, "avg_weight": 0.5241269469261169, "std_delta_l": 0.05303064361214638, "std_weight": 0.042971398681402206, "weighted_loss": 0.12442727386951447 }, { "avg_delta_l": 0.02919413149356842, "avg_loss_unweighted": 0.24003329873085022, "avg_weight": 0.5439452528953552, "std_delta_l": 0.07682405412197113, "std_weight": 0.07593297213315964, "weighted_loss": 0.13368020951747894 }, { "avg_delta_l": 0.03365728259086609, "avg_loss_unweighted": 0.2255765199661255, "avg_weight": 0.5172070264816284, "std_delta_l": 0.04339213669300079, "std_weight": 0.03086302988231182, "weighted_loss": 0.11754897981882095 }, { "avg_delta_l": 0.02547474205493927, "avg_loss_unweighted": 0.2425059974193573, "avg_weight": 0.5384553074836731, "std_delta_l": 0.06088913604617119, "std_weight": 0.06291253864765167, "weighted_loss": 0.13202457129955292 }, { "avg_delta_l": 0.037365928292274475, "avg_loss_unweighted": 0.22104154527187347, "avg_weight": 0.544784665107727, "std_delta_l": 0.07368265092372894, "std_weight": 0.06945778429508209, "weighted_loss": 0.12417671829462051 }, { "avg_delta_l": 0.03174115717411041, "avg_loss_unweighted": 0.22816112637519836, "avg_weight": 0.5431941151618958, "std_delta_l": 0.07105766236782074, "std_weight": 0.0690654069185257, "weighted_loss": 0.1272515058517456 }, { "avg_delta_l": 0.029542364180088043, "avg_loss_unweighted": 0.21713568270206451, "avg_weight": 0.5364699363708496, "std_delta_l": 0.0675126239657402, "std_weight": 0.07157602161169052, "weighted_loss": 0.11974755674600601 }, { "avg_delta_l": 0.01785523071885109, "avg_loss_unweighted": 0.23234643042087555, "avg_weight": 0.5439033508300781, "std_delta_l": 0.05566810071468353, "std_weight": 0.06482897698879242, "weighted_loss": 0.12920667231082916 }, { "avg_delta_l": 0.013582100160419941, "avg_loss_unweighted": 0.24442394077777863, "avg_weight": 0.5675185322761536, "std_delta_l": 0.07162708044052124, "std_weight": 0.08630229532718658, "weighted_loss": 0.14302244782447815 }, { "avg_delta_l": 0.036240726709365845, "avg_loss_unweighted": 0.22108767926692963, "avg_weight": 0.5321891903877258, "std_delta_l": 0.06845993548631668, "std_weight": 0.05842169001698494, "weighted_loss": 0.12021315097808838 }, { "avg_delta_l": 0.04124891757965088, "avg_loss_unweighted": 0.22042405605316162, "avg_weight": 0.5170929431915283, "std_delta_l": 0.057853929698467255, "std_weight": 0.033392954617738724, "weighted_loss": 0.11511259526014328 }, { "avg_delta_l": 0.016883138567209244, "avg_loss_unweighted": 0.2451530247926712, "avg_weight": 0.5601122379302979, "std_delta_l": 0.07233192026615143, "std_weight": 0.08724617213010788, "weighted_loss": 0.14223267138004303 }, { "avg_delta_l": 0.013629773631691933, "avg_loss_unweighted": 0.24488896131515503, "avg_weight": 0.5633935332298279, "std_delta_l": 0.07135074585676193, "std_weight": 0.08958207815885544, "weighted_loss": 0.1416500210762024 }, { "avg_delta_l": 0.02598993107676506, "avg_loss_unweighted": 0.23688626289367676, "avg_weight": 0.5462181568145752, "std_delta_l": 0.06582408398389816, "std_weight": 0.07517820596694946, "weighted_loss": 0.13162392377853394 }, { "avg_delta_l": 0.03721656650304794, "avg_loss_unweighted": 0.2273084968328476, "avg_weight": 0.5324585437774658, "std_delta_l": 0.0630800798535347, "std_weight": 0.05765066295862198, "weighted_loss": 0.1237744688987732 }, { "epoch": 4.139130434782609, "grad_norm": 0.657804242328275, "learning_rate": 9.661401333018725e-06, "loss": 0.2548, "step": 240 }, { "avg_delta_l": 0.03044055588543415, "avg_loss_unweighted": 0.22376108169555664, "avg_weight": 0.5431849956512451, "std_delta_l": 0.06797395646572113, "std_weight": 0.06419277936220169, "weighted_loss": 0.12595143914222717 }, { "avg_delta_l": 0.03183881938457489, "avg_loss_unweighted": 0.23224949836730957, "avg_weight": 0.538821280002594, "std_delta_l": 0.058178775012493134, "std_weight": 0.06102651357650757, "weighted_loss": 0.12820228934288025 }, { "avg_delta_l": 0.014211181551218033, "avg_loss_unweighted": 0.23328745365142822, "avg_weight": 0.5479543209075928, "std_delta_l": 0.05823463574051857, "std_weight": 0.07587473839521408, "weighted_loss": 0.13079649209976196 }, { "avg_delta_l": 0.02710440196096897, "avg_loss_unweighted": 0.22358684241771698, "avg_weight": 0.5500825643539429, "std_delta_l": 0.0819384753704071, "std_weight": 0.08496131002902985, "weighted_loss": 0.1260986179113388 }, { "avg_delta_l": 0.032389216125011444, "avg_loss_unweighted": 0.21844598650932312, "avg_weight": 0.5484132766723633, "std_delta_l": 0.07391388714313507, "std_weight": 0.07915594428777695, "weighted_loss": 0.12307193130254745 }, { "avg_delta_l": 0.042446624487638474, "avg_loss_unweighted": 0.22569018602371216, "avg_weight": 0.5375620722770691, "std_delta_l": 0.07299796491861343, "std_weight": 0.05735534802079201, "weighted_loss": 0.12389422953128815 }, { "avg_delta_l": 0.032170362770557404, "avg_loss_unweighted": 0.21655979752540588, "avg_weight": 0.5344865322113037, "std_delta_l": 0.06460747867822647, "std_weight": 0.06078752875328064, "weighted_loss": 0.11790253221988678 }, { "avg_delta_l": 0.02001139707863331, "avg_loss_unweighted": 0.2414354383945465, "avg_weight": 0.548105001449585, "std_delta_l": 0.07259766757488251, "std_weight": 0.07384410500526428, "weighted_loss": 0.13420818746089935 }, { "avg_delta_l": 0.031178776174783707, "avg_loss_unweighted": 0.22820648550987244, "avg_weight": 0.5374515652656555, "std_delta_l": 0.05893474072217941, "std_weight": 0.06310848891735077, "weighted_loss": 0.12339750677347183 }, { "avg_delta_l": 0.03583267703652382, "avg_loss_unweighted": 0.22873422503471375, "avg_weight": 0.5265628099441528, "std_delta_l": 0.053480297327041626, "std_weight": 0.04946016147732735, "weighted_loss": 0.12260375916957855 }, { "avg_delta_l": 0.036242496222257614, "avg_loss_unweighted": 0.21983784437179565, "avg_weight": 0.5478429794311523, "std_delta_l": 0.0831243172287941, "std_weight": 0.07756184041500092, "weighted_loss": 0.12372009456157684 }, { "avg_delta_l": 0.04279624670743942, "avg_loss_unweighted": 0.21851271390914917, "avg_weight": 0.5277421474456787, "std_delta_l": 0.05745099484920502, "std_weight": 0.04340164735913277, "weighted_loss": 0.11694304645061493 }, { "avg_delta_l": 0.021921522915363312, "avg_loss_unweighted": 0.236055389046669, "avg_weight": 0.5502294898033142, "std_delta_l": 0.06524895876646042, "std_weight": 0.06930547952651978, "weighted_loss": 0.13301916420459747 }, { "avg_delta_l": 0.030607853084802628, "avg_loss_unweighted": 0.22635282576084137, "avg_weight": 0.5389941930770874, "std_delta_l": 0.064653180539608, "std_weight": 0.07055910676717758, "weighted_loss": 0.12476814538240433 }, { "avg_delta_l": 0.033500343561172485, "avg_loss_unweighted": 0.2319362759590149, "avg_weight": 0.551047682762146, "std_delta_l": 0.07917863130569458, "std_weight": 0.07750684767961502, "weighted_loss": 0.13077236711978912 }, { "avg_delta_l": 0.025331132113933563, "avg_loss_unweighted": 0.23364968597888947, "avg_weight": 0.5419639348983765, "std_delta_l": 0.06040961667895317, "std_weight": 0.06742171943187714, "weighted_loss": 0.12925440073013306 }, { "avg_delta_l": 0.021367041394114494, "avg_loss_unweighted": 0.2331531047821045, "avg_weight": 0.5554342865943909, "std_delta_l": 0.071442149579525, "std_weight": 0.07293003052473068, "weighted_loss": 0.1311119943857193 }, { "avg_delta_l": 0.00855722650885582, "avg_loss_unweighted": 0.23178595304489136, "avg_weight": 0.5645309686660767, "std_delta_l": 0.06672754138708115, "std_weight": 0.09142714738845825, "weighted_loss": 0.1345706284046173 }, { "avg_delta_l": 0.0244382843375206, "avg_loss_unweighted": 0.2276560366153717, "avg_weight": 0.5605854392051697, "std_delta_l": 0.0877842903137207, "std_weight": 0.09290502965450287, "weighted_loss": 0.13187973201274872 }, { "avg_delta_l": 0.029615789651870728, "avg_loss_unweighted": 0.22918511927127838, "avg_weight": 0.5481917262077332, "std_delta_l": 0.07050930708646774, "std_weight": 0.07148633152246475, "weighted_loss": 0.12868435680866241 }, { "epoch": 4.3130434782608695, "grad_norm": 0.581417251604484, "learning_rate": 9.604872499678947e-06, "loss": 0.2541, "step": 250 }, { "avg_delta_l": 0.02566026709973812, "avg_loss_unweighted": 0.23941628634929657, "avg_weight": 0.5353389978408813, "std_delta_l": 0.05006813257932663, "std_weight": 0.04608910530805588, "weighted_loss": 0.1297406107187271 }, { "avg_delta_l": 0.046444810926914215, "avg_loss_unweighted": 0.22242167592048645, "avg_weight": 0.5307565331459045, "std_delta_l": 0.07354804128408432, "std_weight": 0.05996858701109886, "weighted_loss": 0.1204724907875061 }, { "avg_delta_l": 0.03983630985021591, "avg_loss_unweighted": 0.21493962407112122, "avg_weight": 0.5460032820701599, "std_delta_l": 0.08568061888217926, "std_weight": 0.07345312833786011, "weighted_loss": 0.12069239467382431 }, { "avg_delta_l": 0.03974105790257454, "avg_loss_unweighted": 0.22725705802440643, "avg_weight": 0.543877124786377, "std_delta_l": 0.07537654787302017, "std_weight": 0.06616289913654327, "weighted_loss": 0.12672533094882965 }, { "avg_delta_l": 0.040102288126945496, "avg_loss_unweighted": 0.2225717008113861, "avg_weight": 0.5413161516189575, "std_delta_l": 0.07566631585359573, "std_weight": 0.06937819719314575, "weighted_loss": 0.12439332902431488 }, { "avg_delta_l": 0.031417086720466614, "avg_loss_unweighted": 0.21795690059661865, "avg_weight": 0.5465943217277527, "std_delta_l": 0.06913077086210251, "std_weight": 0.06786003708839417, "weighted_loss": 0.12363450974225998 }, { "avg_delta_l": 0.01689544878900051, "avg_loss_unweighted": 0.22645042836666107, "avg_weight": 0.5701426267623901, "std_delta_l": 0.08034346252679825, "std_weight": 0.09939645975828171, "weighted_loss": 0.13201448321342468 }, { "avg_delta_l": 0.040776487439870834, "avg_loss_unweighted": 0.21603260934352875, "avg_weight": 0.5388230681419373, "std_delta_l": 0.07347846776247025, "std_weight": 0.06615061312913895, "weighted_loss": 0.11882464587688446 }, { "avg_delta_l": -0.005334172397851944, "avg_loss_unweighted": 0.2611783742904663, "avg_weight": 0.5821146965026855, "std_delta_l": 0.08143040537834167, "std_weight": 0.12585079669952393, "weighted_loss": 0.15815378725528717 }, { "avg_delta_l": 0.011680684052407742, "avg_loss_unweighted": 0.2173726111650467, "avg_weight": 0.5740532875061035, "std_delta_l": 0.08064782619476318, "std_weight": 0.09590309113264084, "weighted_loss": 0.1299295723438263 }, { "avg_delta_l": 0.03404224291443825, "avg_loss_unweighted": 0.22922441363334656, "avg_weight": 0.5423085689544678, "std_delta_l": 0.07088226824998856, "std_weight": 0.07352916896343231, "weighted_loss": 0.1270100474357605 }, { "avg_delta_l": 0.039073020219802856, "avg_loss_unweighted": 0.2106626331806183, "avg_weight": 0.531536340713501, "std_delta_l": 0.07326702028512955, "std_weight": 0.05617419630289078, "weighted_loss": 0.11330605298280716 }, { "avg_delta_l": 0.03638143837451935, "avg_loss_unweighted": 0.2079980969429016, "avg_weight": 0.5352544188499451, "std_delta_l": 0.060803789645433426, "std_weight": 0.061234794557094574, "weighted_loss": 0.11405554413795471 }, { "avg_delta_l": 0.020493516698479652, "avg_loss_unweighted": 0.23174825310707092, "avg_weight": 0.5625099539756775, "std_delta_l": 0.07111599296331406, "std_weight": 0.09006655961275101, "weighted_loss": 0.13365139067173004 }, { "avg_delta_l": 0.019006218761205673, "avg_loss_unweighted": 0.22411271929740906, "avg_weight": 0.5766277313232422, "std_delta_l": 0.09617189317941666, "std_weight": 0.11505007743835449, "weighted_loss": 0.13519001007080078 }, { "avg_delta_l": 0.021012015640735626, "avg_loss_unweighted": 0.22149530053138733, "avg_weight": 0.5519355535507202, "std_delta_l": 0.06711502373218536, "std_weight": 0.07723359018564224, "weighted_loss": 0.12429332733154297 }, { "avg_delta_l": -0.00012038717977702618, "avg_loss_unweighted": 0.23767386376857758, "avg_weight": 0.5908476114273071, "std_delta_l": 0.07951042801141739, "std_weight": 0.11181667447090149, "weighted_loss": 0.14330540597438812 }, { "avg_delta_l": 0.01599942147731781, "avg_loss_unweighted": 0.23692941665649414, "avg_weight": 0.566291332244873, "std_delta_l": 0.08191832900047302, "std_weight": 0.10548770427703857, "weighted_loss": 0.13994628190994263 }, { "avg_delta_l": 0.025641242042183876, "avg_loss_unweighted": 0.22898393869400024, "avg_weight": 0.5558158755302429, "std_delta_l": 0.08110487461090088, "std_weight": 0.09106383472681046, "weighted_loss": 0.13210462033748627 }, { "avg_delta_l": 0.013831629417836666, "avg_loss_unweighted": 0.24158981442451477, "avg_weight": 0.5654975771903992, "std_delta_l": 0.07591420412063599, "std_weight": 0.09539204835891724, "weighted_loss": 0.14055681228637695 }, { "epoch": 4.48695652173913, "grad_norm": 0.5743919089542237, "learning_rate": 9.54417417010629e-06, "loss": 0.2588, "step": 260 }, { "avg_delta_l": 0.019746938720345497, "avg_loss_unweighted": 0.22765041887760162, "avg_weight": 0.5580691695213318, "std_delta_l": 0.07261300832033157, "std_weight": 0.0821821540594101, "weighted_loss": 0.13100557029247284 }, { "avg_delta_l": 0.02278286963701248, "avg_loss_unweighted": 0.23078730702400208, "avg_weight": 0.5620080828666687, "std_delta_l": 0.07505884766578674, "std_weight": 0.09207188338041306, "weighted_loss": 0.13300856947898865 }, { "avg_delta_l": 0.03149275854229927, "avg_loss_unweighted": 0.2229115217924118, "avg_weight": 0.5464797616004944, "std_delta_l": 0.08002986013889313, "std_weight": 0.07384485751390457, "weighted_loss": 0.12550285458564758 }, { "avg_delta_l": 0.03115096129477024, "avg_loss_unweighted": 0.2211015224456787, "avg_weight": 0.546120285987854, "std_delta_l": 0.08105447143316269, "std_weight": 0.07372032850980759, "weighted_loss": 0.12444008886814117 }, { "avg_delta_l": 0.008063612505793571, "avg_loss_unweighted": 0.22488628327846527, "avg_weight": 0.5691404342651367, "std_delta_l": 0.08205601572990417, "std_weight": 0.11371584981679916, "weighted_loss": 0.13122430443763733 }, { "avg_delta_l": 0.020603468641638756, "avg_loss_unweighted": 0.2369953989982605, "avg_weight": 0.5455553531646729, "std_delta_l": 0.07439298927783966, "std_weight": 0.0834798663854599, "weighted_loss": 0.1334947943687439 }, { "avg_delta_l": 0.021848849952220917, "avg_loss_unweighted": 0.23785154521465302, "avg_weight": 0.5581390857696533, "std_delta_l": 0.07379934191703796, "std_weight": 0.08522969484329224, "weighted_loss": 0.13504469394683838 }, { "avg_delta_l": 0.018772901967167854, "avg_loss_unweighted": 0.22274214029312134, "avg_weight": 0.5686567425727844, "std_delta_l": 0.0818013921380043, "std_weight": 0.08635079860687256, "weighted_loss": 0.1310768574476242 }, { "avg_delta_l": 0.011231089010834694, "avg_loss_unweighted": 0.2259463667869568, "avg_weight": 0.5743323564529419, "std_delta_l": 0.08014270663261414, "std_weight": 0.0978899747133255, "weighted_loss": 0.1343662440776825 }, { "avg_delta_l": 0.027979237958788872, "avg_loss_unweighted": 0.21875987946987152, "avg_weight": 0.5349370837211609, "std_delta_l": 0.05039745196700096, "std_weight": 0.05782818794250488, "weighted_loss": 0.11904681473970413 }, { "avg_delta_l": 0.012362666428089142, "avg_loss_unweighted": 0.2283363789319992, "avg_weight": 0.5727023482322693, "std_delta_l": 0.07722039520740509, "std_weight": 0.0885949581861496, "weighted_loss": 0.134966641664505 }, { "avg_delta_l": -0.00257724872790277, "avg_loss_unweighted": 0.2360500693321228, "avg_weight": 0.5902572870254517, "std_delta_l": 0.08223345130681992, "std_weight": 0.11793410032987595, "weighted_loss": 0.1447562575340271 }, { "avg_delta_l": 0.0054147616028785706, "avg_loss_unweighted": 0.21902792155742645, "avg_weight": 0.5716010928153992, "std_delta_l": 0.06283965706825256, "std_weight": 0.0894605815410614, "weighted_loss": 0.1291584074497223 }, { "avg_delta_l": 0.015942733734846115, "avg_loss_unweighted": 0.23931176960468292, "avg_weight": 0.5713256001472473, "std_delta_l": 0.07559506595134735, "std_weight": 0.08949881047010422, "weighted_loss": 0.14027343690395355 }, { "avg_delta_l": 0.007418196648359299, "avg_loss_unweighted": 0.23951886594295502, "avg_weight": 0.5717752575874329, "std_delta_l": 0.07421086728572845, "std_weight": 0.099332295358181, "weighted_loss": 0.14000476896762848 }, { "avg_delta_l": -0.003262812038883567, "avg_loss_unweighted": 0.23856639862060547, "avg_weight": 0.5947861075401306, "std_delta_l": 0.08454945683479309, "std_weight": 0.12117858231067657, "weighted_loss": 0.14596107602119446 }, { "avg_delta_l": 0.023670295253396034, "avg_loss_unweighted": 0.2114284187555313, "avg_weight": 0.5755243897438049, "std_delta_l": 0.09312345832586288, "std_weight": 0.10378521680831909, "weighted_loss": 0.12722167372703552 }, { "avg_delta_l": 0.017367444932460785, "avg_loss_unweighted": 0.22009938955307007, "avg_weight": 0.5703753232955933, "std_delta_l": 0.08982202410697937, "std_weight": 0.10140839219093323, "weighted_loss": 0.13011261820793152 }, { "avg_delta_l": 0.014180500991642475, "avg_loss_unweighted": 0.2403264343738556, "avg_weight": 0.5933457016944885, "std_delta_l": 0.10790781676769257, "std_weight": 0.12080790847539902, "weighted_loss": 0.14798538386821747 }, { "avg_delta_l": 0.012004596181213856, "avg_loss_unweighted": 0.22901684045791626, "avg_weight": 0.5716016888618469, "std_delta_l": 0.08265303075313568, "std_weight": 0.09320024400949478, "weighted_loss": 0.13465742766857147 }, { "epoch": 4.660869565217391, "grad_norm": 0.5976262716757978, "learning_rate": 9.479361303793441e-06, "loss": 0.2673, "step": 270 }, { "avg_delta_l": 0.016953295096755028, "avg_loss_unweighted": 0.2181272655725479, "avg_weight": 0.5662035942077637, "std_delta_l": 0.07584107667207718, "std_weight": 0.09902743995189667, "weighted_loss": 0.1263047307729721 }, { "avg_delta_l": -0.0024467248003929853, "avg_loss_unweighted": 0.2444465160369873, "avg_weight": 0.5950719714164734, "std_delta_l": 0.08460327237844467, "std_weight": 0.11444346606731415, "weighted_loss": 0.151201993227005 }, { "avg_delta_l": 0.016198059543967247, "avg_loss_unweighted": 0.22055289149284363, "avg_weight": 0.5740732550621033, "std_delta_l": 0.083397276699543, "std_weight": 0.0954396054148674, "weighted_loss": 0.13003093004226685 }, { "avg_delta_l": 0.017596114426851273, "avg_loss_unweighted": 0.22615046799182892, "avg_weight": 0.567724347114563, "std_delta_l": 0.07417179644107819, "std_weight": 0.08817291259765625, "weighted_loss": 0.13188578188419342 }, { "avg_delta_l": 0.0017661829479038715, "avg_loss_unweighted": 0.22966282069683075, "avg_weight": 0.588078498840332, "std_delta_l": 0.07417228072881699, "std_weight": 0.09639597684144974, "weighted_loss": 0.13711388409137726 }, { "avg_delta_l": -0.0015677199698984623, "avg_loss_unweighted": 0.22832287847995758, "avg_weight": 0.6094748377799988, "std_delta_l": 0.08775215595960617, "std_weight": 0.11270186305046082, "weighted_loss": 0.1467122733592987 }, { "avg_delta_l": 0.011042611673474312, "avg_loss_unweighted": 0.23354807496070862, "avg_weight": 0.5873245000839233, "std_delta_l": 0.10176162421703339, "std_weight": 0.12047713249921799, "weighted_loss": 0.14392751455307007 }, { "avg_delta_l": 0.01648951694369316, "avg_loss_unweighted": 0.21719443798065186, "avg_weight": 0.5649814009666443, "std_delta_l": 0.08493661135435104, "std_weight": 0.09060151129961014, "weighted_loss": 0.12671799957752228 }, { "avg_delta_l": 0.0026550847105681896, "avg_loss_unweighted": 0.23501668870449066, "avg_weight": 0.576397716999054, "std_delta_l": 0.076917365193367, "std_weight": 0.1105499416589737, "weighted_loss": 0.13772845268249512 }, { "avg_delta_l": 0.007084192708134651, "avg_loss_unweighted": 0.23665843904018402, "avg_weight": 0.5806531310081482, "std_delta_l": 0.0833190530538559, "std_weight": 0.10393043607473373, "weighted_loss": 0.1410636305809021 }, { "avg_delta_l": 0.006219428963959217, "avg_loss_unweighted": 0.23464007675647736, "avg_weight": 0.5936760306358337, "std_delta_l": 0.08607927709817886, "std_weight": 0.10785198956727982, "weighted_loss": 0.14389051496982574 }, { "avg_delta_l": -0.010981828905642033, "avg_loss_unweighted": 0.2260999232530594, "avg_weight": 0.620212733745575, "std_delta_l": 0.09785106778144836, "std_weight": 0.13541419804096222, "weighted_loss": 0.14691665768623352 }, { "avg_delta_l": 0.004964747000485659, "avg_loss_unweighted": 0.23100760579109192, "avg_weight": 0.5944772362709045, "std_delta_l": 0.08201621472835541, "std_weight": 0.10228541493415833, "weighted_loss": 0.14177531003952026 }, { "avg_delta_l": 0.015759656205773354, "avg_loss_unweighted": 0.23300470411777496, "avg_weight": 0.5713348388671875, "std_delta_l": 0.07839126884937286, "std_weight": 0.08826982229948044, "weighted_loss": 0.13748301565647125 }, { "avg_delta_l": -0.003935901448130608, "avg_loss_unweighted": 0.236049085855484, "avg_weight": 0.5885539054870605, "std_delta_l": 0.07073760032653809, "std_weight": 0.10581567883491516, "weighted_loss": 0.14235329627990723 }, { "avg_delta_l": 0.009314227849245071, "avg_loss_unweighted": 0.23364494740962982, "avg_weight": 0.5819597244262695, "std_delta_l": 0.08232598751783371, "std_weight": 0.10251844674348831, "weighted_loss": 0.13982117176055908 }, { "avg_delta_l": 0.021271515637636185, "avg_loss_unweighted": 0.2098718285560608, "avg_weight": 0.5663707256317139, "std_delta_l": 0.07631171494722366, "std_weight": 0.0845983549952507, "weighted_loss": 0.1226646900177002 }, { "avg_delta_l": 0.0258660726249218, "avg_loss_unweighted": 0.22088493406772614, "avg_weight": 0.5626281499862671, "std_delta_l": 0.08858534693717957, "std_weight": 0.09867088496685028, "weighted_loss": 0.1279594600200653 }, { "avg_delta_l": 0.013005932793021202, "avg_loss_unweighted": 0.22591201961040497, "avg_weight": 0.5868057012557983, "std_delta_l": 0.09309468418359756, "std_weight": 0.11036048084497452, "weighted_loss": 0.13806049525737762 }, { "avg_delta_l": 0.008537782356142998, "avg_loss_unweighted": 0.23795586824417114, "avg_weight": 0.5958568453788757, "std_delta_l": 0.09375917166471481, "std_weight": 0.11572928726673126, "weighted_loss": 0.1467548906803131 }, { "epoch": 4.834782608695652, "grad_norm": 0.6762214768404711, "learning_rate": 9.410492585753279e-06, "loss": 0.276, "step": 280 }, { "avg_delta_l": -0.0021364742424339056, "avg_loss_unweighted": 0.21344555914402008, "avg_weight": 0.6008198261260986, "std_delta_l": 0.10106101632118225, "std_weight": 0.13159452378749847, "weighted_loss": 0.13443665206432343 }, { "avg_delta_l": -0.0032808384858071804, "avg_loss_unweighted": 0.23071768879890442, "avg_weight": 0.5778655409812927, "std_delta_l": 0.061186935752630234, "std_weight": 0.08657439053058624, "weighted_loss": 0.13524079322814941 }, { "avg_delta_l": 0.02391616255044937, "avg_loss_unweighted": 0.21245728433132172, "avg_weight": 0.5846887826919556, "std_delta_l": 0.10775305330753326, "std_weight": 0.1223246306180954, "weighted_loss": 0.12932075560092926 }, { "avg_delta_l": 0.01099887490272522, "avg_loss_unweighted": 0.215638667345047, "avg_weight": 0.5659166574478149, "std_delta_l": 0.07096884399652481, "std_weight": 0.08946415036916733, "weighted_loss": 0.12462017685174942 }, { "avg_delta_l": 0.00510590523481369, "avg_loss_unweighted": 0.2371080070734024, "avg_weight": 0.5895024538040161, "std_delta_l": 0.08839292824268341, "std_weight": 0.11172930151224136, "weighted_loss": 0.14514589309692383 }, { "avg_delta_l": -0.011577753350138664, "avg_loss_unweighted": 0.22955787181854248, "avg_weight": 0.6138233542442322, "std_delta_l": 0.08711376786231995, "std_weight": 0.13160642981529236, "weighted_loss": 0.14725224673748016 }, { "avg_delta_l": -0.007905995473265648, "avg_loss_unweighted": 0.22962819039821625, "avg_weight": 0.5973848104476929, "std_delta_l": 0.07778982818126678, "std_weight": 0.10821892321109772, "weighted_loss": 0.14098069071769714 }, { "avg_delta_l": 0.009077277034521103, "avg_loss_unweighted": 0.22364875674247742, "avg_weight": 0.5798439979553223, "std_delta_l": 0.08169145882129669, "std_weight": 0.0980297178030014, "weighted_loss": 0.13376373052597046 }, { "avg_delta_l": -0.0005234354175627232, "avg_loss_unweighted": 0.23250380158424377, "avg_weight": 0.5922477841377258, "std_delta_l": 0.08274666965007782, "std_weight": 0.11158473789691925, "weighted_loss": 0.14068222045898438 }, { "avg_delta_l": 0.005115116946399212, "avg_loss_unweighted": 0.21589098870754242, "avg_weight": 0.5837323069572449, "std_delta_l": 0.0859031155705452, "std_weight": 0.10522980988025665, "weighted_loss": 0.1307857632637024 }, { "avg_delta_l": 0.008068966679275036, "avg_loss_unweighted": 0.23077528178691864, "avg_weight": 0.5806754231452942, "std_delta_l": 0.0694892555475235, "std_weight": 0.08473143726587296, "weighted_loss": 0.13839958608150482 }, { "avg_delta_l": 0.010232711210846901, "avg_loss_unweighted": 0.21656088531017303, "avg_weight": 0.5826488137245178, "std_delta_l": 0.08515461534261703, "std_weight": 0.10232909023761749, "weighted_loss": 0.13068713247776031 }, { "avg_delta_l": 0.006398229859769344, "avg_loss_unweighted": 0.23105491697788239, "avg_weight": 0.5885275602340698, "std_delta_l": 0.09704653173685074, "std_weight": 0.12449263036251068, "weighted_loss": 0.1419997364282608 }, { "avg_delta_l": 0.02149917371571064, "avg_loss_unweighted": 0.220382422208786, "avg_weight": 0.572694718837738, "std_delta_l": 0.09118252247571945, "std_weight": 0.10472739487886429, "weighted_loss": 0.13122646510601044 }, { "avg_delta_l": 0.002666510408744216, "avg_loss_unweighted": 0.23386898636817932, "avg_weight": 0.5868929624557495, "std_delta_l": 0.0913415178656578, "std_weight": 0.12384674698114395, "weighted_loss": 0.14111600816249847 }, { "avg_delta_l": 0.012347446754574776, "avg_loss_unweighted": 0.22033625841140747, "avg_weight": 0.5814957022666931, "std_delta_l": 0.09265228360891342, "std_weight": 0.10072387754917145, "weighted_loss": 0.1332709789276123 }, { "avg_delta_l": -0.0034135293681174517, "avg_loss_unweighted": 0.23141956329345703, "avg_weight": 0.5939611196517944, "std_delta_l": 0.0814204290509224, "std_weight": 0.11771488189697266, "weighted_loss": 0.14236809313297272 }, { "avg_delta_l": 0.000826922245323658, "avg_loss_unweighted": 0.22496463358402252, "avg_weight": 0.5904685854911804, "std_delta_l": 0.08643944561481476, "std_weight": 0.12415971606969833, "weighted_loss": 0.13835124671459198 }, { "avg_delta_l": 0.015015196986496449, "avg_loss_unweighted": 0.2080678939819336, "avg_weight": 0.5922461152076721, "std_delta_l": 0.10846180468797684, "std_weight": 0.12812046706676483, "weighted_loss": 0.12858419120311737 }, { "epoch": 5.0, "grad_norm": 0.5986530471801507, "learning_rate": 9.337630373382334e-06, "loss": 0.2588, "step": 290 }, { "avg_delta_l": 0.03462459146976471, "avg_loss_unweighted": 0.20333141088485718, "avg_weight": 0.5227078795433044, "std_delta_l": 0.058493711054325104, "std_weight": 0.04516444355249405, "weighted_loss": 0.10728234052658081 }, { "avg_delta_l": 0.03302193433046341, "avg_loss_unweighted": 0.18699698150157928, "avg_weight": 0.5289047956466675, "std_delta_l": 0.059018541127443314, "std_weight": 0.056753456592559814, "weighted_loss": 0.10049153864383698 }, { "avg_delta_l": 0.03101862221956253, "avg_loss_unweighted": 0.1942710131406784, "avg_weight": 0.5192471146583557, "std_delta_l": 0.043547362089157104, "std_weight": 0.03614124283194542, "weighted_loss": 0.102584607899189 }, { "avg_delta_l": 0.03498789295554161, "avg_loss_unweighted": 0.18547053635120392, "avg_weight": 0.5212494730949402, "std_delta_l": 0.04631999880075455, "std_weight": 0.03880223631858826, "weighted_loss": 0.09816304594278336 }, { "avg_delta_l": 0.03381470963358879, "avg_loss_unweighted": 0.19666524231433868, "avg_weight": 0.5322687029838562, "std_delta_l": 0.06347215175628662, "std_weight": 0.04449055716395378, "weighted_loss": 0.10720041394233704 }, { "avg_delta_l": 0.014319397509098053, "avg_loss_unweighted": 0.21031318604946136, "avg_weight": 0.5430040955543518, "std_delta_l": 0.04592891037464142, "std_weight": 0.060994990170001984, "weighted_loss": 0.11723816394805908 }, { "avg_delta_l": 0.03691967576742172, "avg_loss_unweighted": 0.19362647831439972, "avg_weight": 0.5330533981323242, "std_delta_l": 0.0677606388926506, "std_weight": 0.05018031224608421, "weighted_loss": 0.10607912391424179 }, { "avg_delta_l": 0.0325176864862442, "avg_loss_unweighted": 0.20017828047275543, "avg_weight": 0.5468651056289673, "std_delta_l": 0.08024387806653976, "std_weight": 0.08413305878639221, "weighted_loss": 0.11336055397987366 }, { "avg_delta_l": 0.029934927821159363, "avg_loss_unweighted": 0.190206378698349, "avg_weight": 0.5264229774475098, "std_delta_l": 0.05537328124046326, "std_weight": 0.049230851233005524, "weighted_loss": 0.10207878798246384 }, { "avg_delta_l": 0.021286487579345703, "avg_loss_unweighted": 0.2073691040277481, "avg_weight": 0.5429084300994873, "std_delta_l": 0.053411323577165604, "std_weight": 0.06894689053297043, "weighted_loss": 0.11470383405685425 }, { "avg_delta_l": 0.03576730936765671, "avg_loss_unweighted": 0.18734322488307953, "avg_weight": 0.5361723303794861, "std_delta_l": 0.06043379008769989, "std_weight": 0.05231642723083496, "weighted_loss": 0.10224631428718567 }, { "avg_delta_l": 0.040785375982522964, "avg_loss_unweighted": 0.18638306856155396, "avg_weight": 0.520983099937439, "std_delta_l": 0.05764177814126015, "std_weight": 0.0365857295691967, "weighted_loss": 0.09939208626747131 }, { "avg_delta_l": 0.018308384343981743, "avg_loss_unweighted": 0.21366721391677856, "avg_weight": 0.5507172346115112, "std_delta_l": 0.06969454884529114, "std_weight": 0.09367690980434418, "weighted_loss": 0.12054402381181717 }, { "avg_delta_l": 0.030259020626544952, "avg_loss_unweighted": 0.19542089104652405, "avg_weight": 0.544792890548706, "std_delta_l": 0.06754399836063385, "std_weight": 0.07163339853286743, "weighted_loss": 0.10973695665597916 }, { "avg_delta_l": 0.024695927277207375, "avg_loss_unweighted": 0.18438757956027985, "avg_weight": 0.5438575148582458, "std_delta_l": 0.06623794138431549, "std_weight": 0.07872093468904495, "weighted_loss": 0.10335668921470642 }, { "avg_delta_l": 0.011414040811359882, "avg_loss_unweighted": 0.20264695584774017, "avg_weight": 0.5569506287574768, "std_delta_l": 0.06456215679645538, "std_weight": 0.08490905910730362, "weighted_loss": 0.11667153984308243 }, { "avg_delta_l": 0.023779423907399178, "avg_loss_unweighted": 0.19183479249477386, "avg_weight": 0.5466909408569336, "std_delta_l": 0.06893142312765121, "std_weight": 0.08983741700649261, "weighted_loss": 0.10669803619384766 }, { "avg_delta_l": 0.046656444668769836, "avg_loss_unweighted": 0.17138813436031342, "avg_weight": 0.5325114130973816, "std_delta_l": 0.0832296833395958, "std_weight": 0.06502273678779602, "weighted_loss": 0.0935296043753624 }, { "avg_delta_l": 0.01541504729539156, "avg_loss_unweighted": 0.2011682689189911, "avg_weight": 0.54547518491745, "std_delta_l": 0.058475881814956665, "std_weight": 0.07644632458686829, "weighted_loss": 0.11187741160392761 }, { "avg_delta_l": 0.037394531071186066, "avg_loss_unweighted": 0.19613394141197205, "avg_weight": 0.522942304611206, "std_delta_l": 0.056347738951444626, "std_weight": 0.0369754359126091, "weighted_loss": 0.1038702055811882 }, { "epoch": 5.173913043478261, "grad_norm": 0.5959166431780275, "learning_rate": 9.26084063999909e-06, "loss": 0.2137, "step": 300 }, { "avg_delta_l": 0.04187878221273422, "avg_loss_unweighted": 0.18589958548545837, "avg_weight": 0.5280426740646362, "std_delta_l": 0.057649143040180206, "std_weight": 0.04630028456449509, "weighted_loss": 0.09917456656694412 }, { "avg_delta_l": 0.036620013415813446, "avg_loss_unweighted": 0.1917186826467514, "avg_weight": 0.5357245206832886, "std_delta_l": 0.06487005949020386, "std_weight": 0.060536619275808334, "weighted_loss": 0.10424718260765076 }, { "avg_delta_l": 0.016598718240857124, "avg_loss_unweighted": 0.18669570982456207, "avg_weight": 0.5496681332588196, "std_delta_l": 0.06289979815483093, "std_weight": 0.08688336610794067, "weighted_loss": 0.10627792030572891 }, { "avg_delta_l": 0.02606961317360401, "avg_loss_unweighted": 0.18974407017230988, "avg_weight": 0.5540347695350647, "std_delta_l": 0.07762141525745392, "std_weight": 0.09074453264474869, "weighted_loss": 0.10816861689090729 }, { "avg_delta_l": 0.01459505595266819, "avg_loss_unweighted": 0.19395354390144348, "avg_weight": 0.5484592914581299, "std_delta_l": 0.06352855265140533, "std_weight": 0.07696175575256348, "weighted_loss": 0.10936286300420761 }, { "avg_delta_l": 0.019323788583278656, "avg_loss_unweighted": 0.1955791413784027, "avg_weight": 0.5564508438110352, "std_delta_l": 0.06825312227010727, "std_weight": 0.09056174010038376, "weighted_loss": 0.11265324056148529 }, { "avg_delta_l": 0.01801386848092079, "avg_loss_unweighted": 0.1926630288362503, "avg_weight": 0.5482966303825378, "std_delta_l": 0.060403332114219666, "std_weight": 0.0811690092086792, "weighted_loss": 0.10914799571037292 }, { "avg_delta_l": 0.04446007311344147, "avg_loss_unweighted": 0.1878148913383484, "avg_weight": 0.5302553176879883, "std_delta_l": 0.06249367073178291, "std_weight": 0.048262789845466614, "weighted_loss": 0.10121200233697891 }, { "avg_delta_l": 0.02368367277085781, "avg_loss_unweighted": 0.20444133877754211, "avg_weight": 0.5412086248397827, "std_delta_l": 0.06500691175460815, "std_weight": 0.07344768941402435, "weighted_loss": 0.11333134770393372 }, { "avg_delta_l": 0.014128502458333969, "avg_loss_unweighted": 0.20948085188865662, "avg_weight": 0.5527945756912231, "std_delta_l": 0.05258464813232422, "std_weight": 0.06378041952848434, "weighted_loss": 0.11790521442890167 }, { "avg_delta_l": 0.03429758921265602, "avg_loss_unweighted": 0.18567295372486115, "avg_weight": 0.5426346063613892, "std_delta_l": 0.06660904735326767, "std_weight": 0.05957417190074921, "weighted_loss": 0.10225099325180054 }, { "avg_delta_l": 0.033411044627428055, "avg_loss_unweighted": 0.19633929431438446, "avg_weight": 0.534481942653656, "std_delta_l": 0.06430642306804657, "std_weight": 0.06447380781173706, "weighted_loss": 0.10710765421390533 }, { "avg_delta_l": 0.04756559059023857, "avg_loss_unweighted": 0.18625399470329285, "avg_weight": 0.5341474413871765, "std_delta_l": 0.07739444077014923, "std_weight": 0.05963265895843506, "weighted_loss": 0.1012820154428482 }, { "avg_delta_l": 0.028105173259973526, "avg_loss_unweighted": 0.1880675107240677, "avg_weight": 0.546288251876831, "std_delta_l": 0.07123297452926636, "std_weight": 0.07937611639499664, "weighted_loss": 0.10541587322950363 }, { "avg_delta_l": 0.008371362462639809, "avg_loss_unweighted": 0.20633500814437866, "avg_weight": 0.5599603652954102, "std_delta_l": 0.06348759680986404, "std_weight": 0.09192018955945969, "weighted_loss": 0.11776740103960037 }, { "avg_delta_l": 0.02875460684299469, "avg_loss_unweighted": 0.19207100570201874, "avg_weight": 0.5320910811424255, "std_delta_l": 0.0557713583111763, "std_weight": 0.04595941677689552, "weighted_loss": 0.10357677191495895 }, { "avg_delta_l": 0.03623118996620178, "avg_loss_unweighted": 0.18519984185695648, "avg_weight": 0.5477147102355957, "std_delta_l": 0.08305199444293976, "std_weight": 0.08043771237134933, "weighted_loss": 0.10499957948923111 }, { "avg_delta_l": 0.03099915385246277, "avg_loss_unweighted": 0.17674048244953156, "avg_weight": 0.5510647296905518, "std_delta_l": 0.06603118032217026, "std_weight": 0.07154979556798935, "weighted_loss": 0.09973568469285965 }, { "avg_delta_l": 0.017130672931671143, "avg_loss_unweighted": 0.1915680319070816, "avg_weight": 0.5640143156051636, "std_delta_l": 0.07481499016284943, "std_weight": 0.09254856407642365, "weighted_loss": 0.11160173267126083 }, { "avg_delta_l": 0.0007905708625912666, "avg_loss_unweighted": 0.2004246711730957, "avg_weight": 0.582212507724762, "std_delta_l": 0.06298336386680603, "std_weight": 0.08767011016607285, "weighted_loss": 0.12013916671276093 }, { "epoch": 5.3478260869565215, "grad_norm": 0.6404782536587312, "learning_rate": 9.180192915108235e-06, "loss": 0.2155, "step": 310 }, { "avg_delta_l": 0.025977956131100655, "avg_loss_unweighted": 0.19521911442279816, "avg_weight": 0.5335798859596252, "std_delta_l": 0.04993497207760811, "std_weight": 0.056174278259277344, "weighted_loss": 0.10595540702342987 }, { "avg_delta_l": 0.01580747775733471, "avg_loss_unweighted": 0.19319087266921997, "avg_weight": 0.5626088976860046, "std_delta_l": 0.0764622688293457, "std_weight": 0.09554696083068848, "weighted_loss": 0.11209210753440857 }, { "avg_delta_l": 0.03586338460445404, "avg_loss_unweighted": 0.19397687911987305, "avg_weight": 0.5365889668464661, "std_delta_l": 0.06032468006014824, "std_weight": 0.058288365602493286, "weighted_loss": 0.10580730438232422 }, { "avg_delta_l": 0.02990768291056156, "avg_loss_unweighted": 0.1816290318965912, "avg_weight": 0.5216947197914124, "std_delta_l": 0.04014422371983528, "std_weight": 0.037183087319135666, "weighted_loss": 0.09563979506492615 }, { "avg_delta_l": 0.02023037150502205, "avg_loss_unweighted": 0.18672898411750793, "avg_weight": 0.5433197021484375, "std_delta_l": 0.05252467468380928, "std_weight": 0.06707484275102615, "weighted_loss": 0.1028275266289711 }, { "avg_delta_l": 0.012091935612261295, "avg_loss_unweighted": 0.19367903470993042, "avg_weight": 0.5597488880157471, "std_delta_l": 0.0642288327217102, "std_weight": 0.08656038343906403, "weighted_loss": 0.11123187839984894 }, { "avg_delta_l": 0.03034065291285515, "avg_loss_unweighted": 0.18519330024719238, "avg_weight": 0.553448498249054, "std_delta_l": 0.07342693954706192, "std_weight": 0.0829262062907219, "weighted_loss": 0.10432307422161102 }, { "avg_delta_l": 0.011342501267790794, "avg_loss_unweighted": 0.20000813901424408, "avg_weight": 0.5718806982040405, "std_delta_l": 0.07316314429044724, "std_weight": 0.09662634134292603, "weighted_loss": 0.11766599118709564 }, { "avg_delta_l": 0.01660611480474472, "avg_loss_unweighted": 0.17833706736564636, "avg_weight": 0.5557650923728943, "std_delta_l": 0.07226254045963287, "std_weight": 0.08663439750671387, "weighted_loss": 0.10328228026628494 }, { "avg_delta_l": 0.018871374428272247, "avg_loss_unweighted": 0.200156107544899, "avg_weight": 0.5587958693504333, "std_delta_l": 0.07497601956129074, "std_weight": 0.09259922057390213, "weighted_loss": 0.11583149433135986 }, { "avg_delta_l": 0.025172455236315727, "avg_loss_unweighted": 0.18739992380142212, "avg_weight": 0.5425636172294617, "std_delta_l": 0.06351108849048615, "std_weight": 0.0704534575343132, "weighted_loss": 0.10365600883960724 }, { "avg_delta_l": 0.02417467162013054, "avg_loss_unweighted": 0.18159152567386627, "avg_weight": 0.5546954870223999, "std_delta_l": 0.07565408200025558, "std_weight": 0.08922260254621506, "weighted_loss": 0.10370298475027084 }, { "avg_delta_l": 0.00974220596253872, "avg_loss_unweighted": 0.18924595415592194, "avg_weight": 0.5727484822273254, "std_delta_l": 0.07580224424600601, "std_weight": 0.10304480791091919, "weighted_loss": 0.11520221084356308 }, { "avg_delta_l": 0.0178651325404644, "avg_loss_unweighted": 0.1904747486114502, "avg_weight": 0.5690783858299255, "std_delta_l": 0.08022714406251907, "std_weight": 0.087052121758461, "weighted_loss": 0.11211887747049332 }, { "avg_delta_l": 0.009135901927947998, "avg_loss_unweighted": 0.1927739977836609, "avg_weight": 0.5675010681152344, "std_delta_l": 0.06959860026836395, "std_weight": 0.08160152286291122, "weighted_loss": 0.11326727271080017 }, { "avg_delta_l": 0.011980496346950531, "avg_loss_unweighted": 0.20291754603385925, "avg_weight": 0.5573128461837769, "std_delta_l": 0.0669669434428215, "std_weight": 0.08449157327413559, "weighted_loss": 0.11680873483419418 }, { "avg_delta_l": 0.0049095298163592815, "avg_loss_unweighted": 0.19425304234027863, "avg_weight": 0.5748769044876099, "std_delta_l": 0.07194854319095612, "std_weight": 0.09750252962112427, "weighted_loss": 0.11624281108379364 }, { "avg_delta_l": 0.04121111333370209, "avg_loss_unweighted": 0.17736424505710602, "avg_weight": 0.5451043844223022, "std_delta_l": 0.08624506741762161, "std_weight": 0.08569249510765076, "weighted_loss": 0.09876720607280731 }, { "avg_delta_l": 0.02179957553744316, "avg_loss_unweighted": 0.19569697976112366, "avg_weight": 0.5573817491531372, "std_delta_l": 0.08015128970146179, "std_weight": 0.09519403427839279, "weighted_loss": 0.1124926432967186 }, { "avg_delta_l": 0.006612643599510193, "avg_loss_unweighted": 0.19564485549926758, "avg_weight": 0.5601025223731995, "std_delta_l": 0.05631612613797188, "std_weight": 0.08416526019573212, "weighted_loss": 0.11226743459701538 }, { "epoch": 5.521739130434782, "grad_norm": 0.5543032901416276, "learning_rate": 9.09576022144496e-06, "loss": 0.2179, "step": 320 }, { "avg_delta_l": 0.015333874151110649, "avg_loss_unweighted": 0.20022718608379364, "avg_weight": 0.572783887386322, "std_delta_l": 0.07747070491313934, "std_weight": 0.09254425019025803, "weighted_loss": 0.11883658915758133 }, { "avg_delta_l": 0.014435271732509136, "avg_loss_unweighted": 0.1873914897441864, "avg_weight": 0.5649715662002563, "std_delta_l": 0.07814667373895645, "std_weight": 0.0939125344157219, "weighted_loss": 0.11060185730457306 }, { "avg_delta_l": 0.031774863600730896, "avg_loss_unweighted": 0.18632391095161438, "avg_weight": 0.5591152906417847, "std_delta_l": 0.08817140758037567, "std_weight": 0.08115749806165695, "weighted_loss": 0.1091010719537735 }, { "avg_delta_l": 0.014411297626793385, "avg_loss_unweighted": 0.19650185108184814, "avg_weight": 0.5532315969467163, "std_delta_l": 0.06373697519302368, "std_weight": 0.08337216824293137, "weighted_loss": 0.11140087991952896 }, { "avg_delta_l": 0.024981359019875526, "avg_loss_unweighted": 0.20454151928424835, "avg_weight": 0.5656985640525818, "std_delta_l": 0.0877051129937172, "std_weight": 0.09079609811306, "weighted_loss": 0.12094482034444809 }, { "avg_delta_l": -0.0043851756490767, "avg_loss_unweighted": 0.20510023832321167, "avg_weight": 0.5961422920227051, "std_delta_l": 0.08664466440677643, "std_weight": 0.12640570104122162, "weighted_loss": 0.12794575095176697 }, { "avg_delta_l": 0.017872611060738564, "avg_loss_unweighted": 0.18952184915542603, "avg_weight": 0.5498156547546387, "std_delta_l": 0.06230190396308899, "std_weight": 0.07317095994949341, "weighted_loss": 0.10674962401390076 }, { "avg_delta_l": -0.0014542543794959784, "avg_loss_unweighted": 0.20125125348567963, "avg_weight": 0.5811693668365479, "std_delta_l": 0.07908681780099869, "std_weight": 0.11584621667861938, "weighted_loss": 0.1211593970656395 }, { "avg_delta_l": 0.022841308265924454, "avg_loss_unweighted": 0.1838388741016388, "avg_weight": 0.5530864596366882, "std_delta_l": 0.06541671603918076, "std_weight": 0.08157795667648315, "weighted_loss": 0.10490184277296066 }, { "avg_delta_l": 8.852663449943066e-05, "avg_loss_unweighted": 0.2019837647676468, "avg_weight": 0.578583300113678, "std_delta_l": 0.07102290540933609, "std_weight": 0.09832970798015594, "weighted_loss": 0.12052813917398453 }, { "avg_delta_l": 0.023906007409095764, "avg_loss_unweighted": 0.183289036154747, "avg_weight": 0.5531662702560425, "std_delta_l": 0.0627862960100174, "std_weight": 0.06787527352571487, "weighted_loss": 0.10302332788705826 }, { "avg_delta_l": 0.028922511264681816, "avg_loss_unweighted": 0.18734589219093323, "avg_weight": 0.5475125312805176, "std_delta_l": 0.06556626409292221, "std_weight": 0.08023788034915924, "weighted_loss": 0.10539838671684265 }, { "avg_delta_l": 0.029532840475440025, "avg_loss_unweighted": 0.1826191395521164, "avg_weight": 0.5441337823867798, "std_delta_l": 0.06509462743997574, "std_weight": 0.06265177577733994, "weighted_loss": 0.10092435777187347 }, { "avg_delta_l": 0.005448257550597191, "avg_loss_unweighted": 0.20344239473342896, "avg_weight": 0.5800712704658508, "std_delta_l": 0.0802454724907875, "std_weight": 0.10184674710035324, "weighted_loss": 0.12052305042743683 }, { "avg_delta_l": 0.0005344850942492485, "avg_loss_unweighted": 0.191356360912323, "avg_weight": 0.5819393992424011, "std_delta_l": 0.0707051232457161, "std_weight": 0.10688848048448563, "weighted_loss": 0.11513225734233856 }, { "avg_delta_l": -0.002280543791130185, "avg_loss_unweighted": 0.18097540736198425, "avg_weight": 0.593444287776947, "std_delta_l": 0.06742781400680542, "std_weight": 0.10261574387550354, "weighted_loss": 0.11355051398277283 }, { "avg_delta_l": 0.00897723063826561, "avg_loss_unweighted": 0.2005264312028885, "avg_weight": 0.5639069676399231, "std_delta_l": 0.06374335289001465, "std_weight": 0.08278076350688934, "weighted_loss": 0.11673399060964584 }, { "avg_delta_l": 0.02882581390440464, "avg_loss_unweighted": 0.1876099705696106, "avg_weight": 0.5427809357643127, "std_delta_l": 0.06757033616304398, "std_weight": 0.07240349799394608, "weighted_loss": 0.10372351109981537 }, { "avg_delta_l": 0.03805078938603401, "avg_loss_unweighted": 0.1912371814250946, "avg_weight": 0.5448547601699829, "std_delta_l": 0.07816608250141144, "std_weight": 0.0799359530210495, "weighted_loss": 0.1055750921368599 }, { "avg_delta_l": 0.008777984417974949, "avg_loss_unweighted": 0.1924445927143097, "avg_weight": 0.5823230743408203, "std_delta_l": 0.08113076537847519, "std_weight": 0.09993220120668411, "weighted_loss": 0.11634770780801773 }, { "epoch": 5.695652173913043, "grad_norm": 0.7624021871763095, "learning_rate": 9.007619008856287e-06, "loss": 0.2253, "step": 330 }, { "avg_delta_l": 0.007895178161561489, "avg_loss_unweighted": 0.19730018079280853, "avg_weight": 0.5681050419807434, "std_delta_l": 0.07044252753257751, "std_weight": 0.08720939606428146, "weighted_loss": 0.11614811420440674 }, { "avg_delta_l": 0.012338920496404171, "avg_loss_unweighted": 0.19448991119861603, "avg_weight": 0.5674214363098145, "std_delta_l": 0.0793067067861557, "std_weight": 0.10273486375808716, "weighted_loss": 0.11487401276826859 }, { "avg_delta_l": -0.015908798202872276, "avg_loss_unweighted": 0.20359304547309875, "avg_weight": 0.5971986055374146, "std_delta_l": 0.0752418041229248, "std_weight": 0.12148066610097885, "weighted_loss": 0.12650564312934875 }, { "avg_delta_l": 0.0171468835324049, "avg_loss_unweighted": 0.19299998879432678, "avg_weight": 0.5759909152984619, "std_delta_l": 0.08870866894721985, "std_weight": 0.10691752284765244, "weighted_loss": 0.1154666319489479 }, { "avg_delta_l": 0.019572898745536804, "avg_loss_unweighted": 0.1837642341852188, "avg_weight": 0.5653352737426758, "std_delta_l": 0.08653020858764648, "std_weight": 0.09237493574619293, "weighted_loss": 0.10791576653718948 }, { "avg_delta_l": 0.00937496405094862, "avg_loss_unweighted": 0.1960010826587677, "avg_weight": 0.5631929636001587, "std_delta_l": 0.05871815234422684, "std_weight": 0.07646767795085907, "weighted_loss": 0.113338403403759 }, { "avg_delta_l": 0.008123200386762619, "avg_loss_unweighted": 0.19780634343624115, "avg_weight": 0.5605422854423523, "std_delta_l": 0.06800650805234909, "std_weight": 0.09568499773740768, "weighted_loss": 0.11441171914339066 }, { "avg_delta_l": 0.01866491138935089, "avg_loss_unweighted": 0.19897063076496124, "avg_weight": 0.5633763074874878, "std_delta_l": 0.06904826313257217, "std_weight": 0.0801272988319397, "weighted_loss": 0.11486342549324036 }, { "avg_delta_l": -0.0008549368940293789, "avg_loss_unweighted": 0.20450235903263092, "avg_weight": 0.5864177346229553, "std_delta_l": 0.07817330211400986, "std_weight": 0.1187928318977356, "weighted_loss": 0.12404781579971313 }, { "avg_delta_l": -0.005366788245737553, "avg_loss_unweighted": 0.21519756317138672, "avg_weight": 0.5838557481765747, "std_delta_l": 0.06617799401283264, "std_weight": 0.09171366691589355, "weighted_loss": 0.12780264019966125 }, { "avg_delta_l": 0.014765736646950245, "avg_loss_unweighted": 0.18555088341236115, "avg_weight": 0.573917806148529, "std_delta_l": 0.07838568836450577, "std_weight": 0.10736502707004547, "weighted_loss": 0.10913217812776566 }, { "avg_delta_l": 0.007230023853480816, "avg_loss_unweighted": 0.20163799822330475, "avg_weight": 0.5780101418495178, "std_delta_l": 0.09013266116380692, "std_weight": 0.11292768269777298, "weighted_loss": 0.12134326249361038 }, { "avg_delta_l": 0.0004873485304415226, "avg_loss_unweighted": 0.197861909866333, "avg_weight": 0.5776052474975586, "std_delta_l": 0.06016232445836067, "std_weight": 0.08066296577453613, "weighted_loss": 0.11723864823579788 }, { "avg_delta_l": 0.0194527767598629, "avg_loss_unweighted": 0.19402587413787842, "avg_weight": 0.5617424249649048, "std_delta_l": 0.07332532852888107, "std_weight": 0.08868788182735443, "weighted_loss": 0.11164732277393341 }, { "avg_delta_l": 0.002047949004918337, "avg_loss_unweighted": 0.2002086043357849, "avg_weight": 0.5962346792221069, "std_delta_l": 0.08998730778694153, "std_weight": 0.1200900673866272, "weighted_loss": 0.12410144507884979 }, { "avg_delta_l": -0.018515735864639282, "avg_loss_unweighted": 0.21164408326148987, "avg_weight": 0.6248553395271301, "std_delta_l": 0.09595558792352676, "std_weight": 0.14175434410572052, "weighted_loss": 0.13947460055351257 }, { "avg_delta_l": -0.0006171402055770159, "avg_loss_unweighted": 0.19660109281539917, "avg_weight": 0.582534670829773, "std_delta_l": 0.08473310619592667, "std_weight": 0.123488649725914, "weighted_loss": 0.11916687339544296 }, { "avg_delta_l": -0.0007557353237643838, "avg_loss_unweighted": 0.1915411502122879, "avg_weight": 0.5952067375183105, "std_delta_l": 0.08265569806098938, "std_weight": 0.1110040694475174, "weighted_loss": 0.11764577031135559 }, { "avg_delta_l": -0.014219921082258224, "avg_loss_unweighted": 0.21136079728603363, "avg_weight": 0.6069086790084839, "std_delta_l": 0.0921110287308693, "std_weight": 0.11993330717086792, "weighted_loss": 0.13381701707839966 }, { "avg_delta_l": 0.005394175183027983, "avg_loss_unweighted": 0.19061078131198883, "avg_weight": 0.5956056118011475, "std_delta_l": 0.0923338383436203, "std_weight": 0.1240137368440628, "weighted_loss": 0.11915092170238495 }, { "epoch": 5.869565217391305, "grad_norm": 0.7115788153868419, "learning_rate": 8.915849085079322e-06, "loss": 0.2388, "step": 340 }, { "avg_delta_l": 0.0010841903276741505, "avg_loss_unweighted": 0.196523979306221, "avg_weight": 0.5848484635353088, "std_delta_l": 0.07653883099555969, "std_weight": 0.10623539984226227, "weighted_loss": 0.1181194931268692 }, { "avg_delta_l": -0.0023295111022889614, "avg_loss_unweighted": 0.19699175655841827, "avg_weight": 0.5873188376426697, "std_delta_l": 0.09223401546478271, "std_weight": 0.1230640783905983, "weighted_loss": 0.12043967843055725 }, { "avg_delta_l": 0.007701213471591473, "avg_loss_unweighted": 0.19252926111221313, "avg_weight": 0.5795022249221802, "std_delta_l": 0.0784793570637703, "std_weight": 0.10303943604230881, "weighted_loss": 0.11597444862127304 }, { "avg_delta_l": -0.013568209484219551, "avg_loss_unweighted": 0.2080659419298172, "avg_weight": 0.6019551753997803, "std_delta_l": 0.08151772618293762, "std_weight": 0.12729106843471527, "weighted_loss": 0.1297912895679474 }, { "avg_delta_l": 0.006641966290771961, "avg_loss_unweighted": 0.19893380999565125, "avg_weight": 0.576260507106781, "std_delta_l": 0.0861942395567894, "std_weight": 0.11272212117910385, "weighted_loss": 0.11853770166635513 }, { "avg_delta_l": -0.0036897023674100637, "avg_loss_unweighted": 0.18640708923339844, "avg_weight": 0.5904529690742493, "std_delta_l": 0.06661362946033478, "std_weight": 0.09312436729669571, "weighted_loss": 0.11289612948894501 }, { "avg_delta_l": -8.738413453102112e-05, "avg_loss_unweighted": 0.19342167675495148, "avg_weight": 0.5837435126304626, "std_delta_l": 0.0680369958281517, "std_weight": 0.09863834828138351, "weighted_loss": 0.11823024600744247 }, { "avg_delta_l": 0.016728419810533524, "avg_loss_unweighted": 0.18368683755397797, "avg_weight": 0.5567083954811096, "std_delta_l": 0.05541493743658066, "std_weight": 0.06368225067853928, "weighted_loss": 0.10432559996843338 }, { "avg_delta_l": 0.006511412560939789, "avg_loss_unweighted": 0.18936336040496826, "avg_weight": 0.5873347520828247, "std_delta_l": 0.09096114337444305, "std_weight": 0.11668577790260315, "weighted_loss": 0.11529404670000076 }, { "avg_delta_l": -0.0026382477954030037, "avg_loss_unweighted": 0.18428325653076172, "avg_weight": 0.6015142798423767, "std_delta_l": 0.09677864611148834, "std_weight": 0.1313624531030655, "weighted_loss": 0.1169511079788208 }, { "avg_delta_l": 0.0016714859521016479, "avg_loss_unweighted": 0.19401732087135315, "avg_weight": 0.593279242515564, "std_delta_l": 0.08480220288038254, "std_weight": 0.1094028502702713, "weighted_loss": 0.11971302330493927 }, { "avg_delta_l": -0.020742937922477722, "avg_loss_unweighted": 0.2179749310016632, "avg_weight": 0.6150237917900085, "std_delta_l": 0.06781923025846481, "std_weight": 0.11141278594732285, "weighted_loss": 0.13604040443897247 }, { "avg_delta_l": 0.03081609308719635, "avg_loss_unweighted": 0.18268372118473053, "avg_weight": 0.5558804869651794, "std_delta_l": 0.08500520884990692, "std_weight": 0.07407210022211075, "weighted_loss": 0.10408174246549606 }, { "avg_delta_l": -0.005344700999557972, "avg_loss_unweighted": 0.2006254345178604, "avg_weight": 0.5976359248161316, "std_delta_l": 0.08298031985759735, "std_weight": 0.11859247833490372, "weighted_loss": 0.1250494122505188 }, { "avg_delta_l": 0.016094239428639412, "avg_loss_unweighted": 0.1865849643945694, "avg_weight": 0.5699697136878967, "std_delta_l": 0.08263058215379715, "std_weight": 0.09758555144071579, "weighted_loss": 0.10992622375488281 }, { "avg_delta_l": 0.027780737727880478, "avg_loss_unweighted": 0.16376946866512299, "avg_weight": 0.5252218246459961, "std_delta_l": 0.05085879936814308, "std_weight": 0.04398263245820999, "weighted_loss": 0.08719006180763245 }, { "avg_delta_l": 0.03985077887773514, "avg_loss_unweighted": 0.1616857349872589, "avg_weight": 0.5154445171356201, "std_delta_l": 0.05070720985531807, "std_weight": 0.030889034271240234, "weighted_loss": 0.08386015892028809 }, { "avg_delta_l": 0.03590575233101845, "avg_loss_unweighted": 0.15398360788822174, "avg_weight": 0.529410183429718, "std_delta_l": 0.06228005513548851, "std_weight": 0.055298689752817154, "weighted_loss": 0.08333896100521088 }, { "avg_delta_l": 0.022574814036488533, "avg_loss_unweighted": 0.17118382453918457, "avg_weight": 0.5394666194915771, "std_delta_l": 0.05598306655883789, "std_weight": 0.05677827075123787, "weighted_loss": 0.09436169266700745 }, { "epoch": 6.034782608695652, "grad_norm": 0.5610637872852641, "learning_rate": 8.820533543479082e-06, "loss": 0.2114, "step": 350 }, { "avg_delta_l": 0.036304719746112823, "avg_loss_unweighted": 0.1660812497138977, "avg_weight": 0.5332018136978149, "std_delta_l": 0.0658281147480011, "std_weight": 0.05797513574361801, "weighted_loss": 0.09069176763296127 }, { "avg_delta_l": 0.02860870212316513, "avg_loss_unweighted": 0.1503060907125473, "avg_weight": 0.5268133282661438, "std_delta_l": 0.04626796394586563, "std_weight": 0.04001821205019951, "weighted_loss": 0.07981467992067337 }, { "avg_delta_l": 0.023317677900195122, "avg_loss_unweighted": 0.1628732681274414, "avg_weight": 0.5398280620574951, "std_delta_l": 0.06120661273598671, "std_weight": 0.06649298220872879, "weighted_loss": 0.08975313603878021 }, { "avg_delta_l": 0.016703668981790543, "avg_loss_unweighted": 0.16445595026016235, "avg_weight": 0.5623446702957153, "std_delta_l": 0.0754438042640686, "std_weight": 0.09184218943119049, "weighted_loss": 0.09721045196056366 }, { "avg_delta_l": 0.03852124139666557, "avg_loss_unweighted": 0.16225184500217438, "avg_weight": 0.5211113095283508, "std_delta_l": 0.0550529919564724, "std_weight": 0.04130353033542633, "weighted_loss": 0.08585379272699356 }, { "avg_delta_l": 0.03222885727882385, "avg_loss_unweighted": 0.15484938025474548, "avg_weight": 0.5186646580696106, "std_delta_l": 0.04114093631505966, "std_weight": 0.03406080976128578, "weighted_loss": 0.08107255399227142 }, { "avg_delta_l": 0.030267486348748207, "avg_loss_unweighted": 0.16209638118743896, "avg_weight": 0.5310333371162415, "std_delta_l": 0.05630816891789436, "std_weight": 0.053091492503881454, "weighted_loss": 0.08768747746944427 }, { "avg_delta_l": 0.023561207577586174, "avg_loss_unweighted": 0.1562909483909607, "avg_weight": 0.5429314374923706, "std_delta_l": 0.05906714126467705, "std_weight": 0.06228972598910332, "weighted_loss": 0.08717375248670578 }, { "avg_delta_l": 0.022190826013684273, "avg_loss_unweighted": 0.17290155589580536, "avg_weight": 0.5388760566711426, "std_delta_l": 0.05789538100361824, "std_weight": 0.06512521207332611, "weighted_loss": 0.09499958902597427 }, { "avg_delta_l": 0.01419832929968834, "avg_loss_unweighted": 0.169648215174675, "avg_weight": 0.5541292428970337, "std_delta_l": 0.06076875701546669, "std_weight": 0.07499650120735168, "weighted_loss": 0.09653469175100327 }, { "avg_delta_l": 0.03447703272104263, "avg_loss_unweighted": 0.15744680166244507, "avg_weight": 0.5338870286941528, "std_delta_l": 0.06081821024417877, "std_weight": 0.0531044602394104, "weighted_loss": 0.08523577451705933 }, { "avg_delta_l": 0.03224974125623703, "avg_loss_unweighted": 0.15680937469005585, "avg_weight": 0.5374976396560669, "std_delta_l": 0.061313796788454056, "std_weight": 0.0586368590593338, "weighted_loss": 0.08672033250331879 }, { "avg_delta_l": 0.024318180978298187, "avg_loss_unweighted": 0.16629691421985626, "avg_weight": 0.5391719937324524, "std_delta_l": 0.05990965664386749, "std_weight": 0.06956794857978821, "weighted_loss": 0.09179898351430893 }, { "avg_delta_l": 0.0348152220249176, "avg_loss_unweighted": 0.1590774804353714, "avg_weight": 0.5318779945373535, "std_delta_l": 0.0537673756480217, "std_weight": 0.04757345840334892, "weighted_loss": 0.08715356141328812 }, { "avg_delta_l": 0.0203531663864851, "avg_loss_unweighted": 0.1677587926387787, "avg_weight": 0.5475032329559326, "std_delta_l": 0.07176239788532257, "std_weight": 0.08720666170120239, "weighted_loss": 0.095569908618927 }, { "avg_delta_l": 0.008483096025884151, "avg_loss_unweighted": 0.16340544819831848, "avg_weight": 0.5603906512260437, "std_delta_l": 0.06037623807787895, "std_weight": 0.08832064270973206, "weighted_loss": 0.09411931782960892 }, { "avg_delta_l": 0.042490892112255096, "avg_loss_unweighted": 0.16168060898780823, "avg_weight": 0.5272088050842285, "std_delta_l": 0.06312643736600876, "std_weight": 0.043369192630052567, "weighted_loss": 0.08703551441431046 }, { "avg_delta_l": 0.039354972541332245, "avg_loss_unweighted": 0.1553604006767273, "avg_weight": 0.5307909846305847, "std_delta_l": 0.06150350719690323, "std_weight": 0.048225972801446915, "weighted_loss": 0.08400344103574753 }, { "avg_delta_l": 0.026215096935629845, "avg_loss_unweighted": 0.1629120260477066, "avg_weight": 0.5328804850578308, "std_delta_l": 0.050248779356479645, "std_weight": 0.059512387961149216, "weighted_loss": 0.0884554386138916 }, { "avg_delta_l": 0.037324510514736176, "avg_loss_unweighted": 0.16050568222999573, "avg_weight": 0.5258163213729858, "std_delta_l": 0.056647397577762604, "std_weight": 0.050165239721536636, "weighted_loss": 0.08513493835926056 }, { "epoch": 6.208695652173913, "grad_norm": 0.6090490759750596, "learning_rate": 8.721758687811353e-06, "loss": 0.1776, "step": 360 }, { "avg_delta_l": 0.043062880635261536, "avg_loss_unweighted": 0.1437038630247116, "avg_weight": 0.5228575468063354, "std_delta_l": 0.05297078192234039, "std_weight": 0.035464610904455185, "weighted_loss": 0.07632750272750854 }, { "avg_delta_l": 0.023649893701076508, "avg_loss_unweighted": 0.16086530685424805, "avg_weight": 0.5582073330879211, "std_delta_l": 0.07879013568162918, "std_weight": 0.09271430969238281, "weighted_loss": 0.09245248138904572 }, { "avg_delta_l": 0.01893262192606926, "avg_loss_unweighted": 0.16507588326931, "avg_weight": 0.5418493151664734, "std_delta_l": 0.06264107674360275, "std_weight": 0.0691085010766983, "weighted_loss": 0.09214841574430466 }, { "avg_delta_l": 0.011841537430882454, "avg_loss_unweighted": 0.165127232670784, "avg_weight": 0.5501499176025391, "std_delta_l": 0.0527651272714138, "std_weight": 0.06971286237239838, "weighted_loss": 0.09264390170574188 }, { "avg_delta_l": 0.011548560112714767, "avg_loss_unweighted": 0.16105438768863678, "avg_weight": 0.5490608215332031, "std_delta_l": 0.05745014548301697, "std_weight": 0.08158428966999054, "weighted_loss": 0.09064507484436035 }, { "avg_delta_l": 0.0358392558991909, "avg_loss_unweighted": 0.14876842498779297, "avg_weight": 0.5272306203842163, "std_delta_l": 0.057248443365097046, "std_weight": 0.043891824781894684, "weighted_loss": 0.08059398084878922 }, { "avg_delta_l": 0.024532098323106766, "avg_loss_unweighted": 0.16493961215019226, "avg_weight": 0.5337454080581665, "std_delta_l": 0.05005824565887451, "std_weight": 0.05267021059989929, "weighted_loss": 0.08940117806196213 }, { "avg_delta_l": 0.0344090573489666, "avg_loss_unweighted": 0.1573682725429535, "avg_weight": 0.5251339673995972, "std_delta_l": 0.05567064881324768, "std_weight": 0.039149995893239975, "weighted_loss": 0.08420778065919876 }, { "avg_delta_l": 0.03359810635447502, "avg_loss_unweighted": 0.16960643231868744, "avg_weight": 0.5238117575645447, "std_delta_l": 0.05422256141901016, "std_weight": 0.04691746458411217, "weighted_loss": 0.08911498636007309 }, { "avg_delta_l": 0.03489895910024643, "avg_loss_unweighted": 0.14439049363136292, "avg_weight": 0.5315151214599609, "std_delta_l": 0.06406518816947937, "std_weight": 0.05519518256187439, "weighted_loss": 0.07773265242576599 }, { "avg_delta_l": 0.02021654322743416, "avg_loss_unweighted": 0.15357166528701782, "avg_weight": 0.5583229660987854, "std_delta_l": 0.07118632644414902, "std_weight": 0.08775531500577927, "weighted_loss": 0.08921051770448685 }, { "avg_delta_l": 0.038186345249414444, "avg_loss_unweighted": 0.14145758748054504, "avg_weight": 0.5306152105331421, "std_delta_l": 0.06122756376862526, "std_weight": 0.04862202703952789, "weighted_loss": 0.07695484161376953 }, { "avg_delta_l": 0.02947559393942356, "avg_loss_unweighted": 0.16337880492210388, "avg_weight": 0.548286497592926, "std_delta_l": 0.07012872397899628, "std_weight": 0.07712328433990479, "weighted_loss": 0.09111874550580978 }, { "avg_delta_l": 0.03669004514813423, "avg_loss_unweighted": 0.161650151014328, "avg_weight": 0.5353915691375732, "std_delta_l": 0.07090125977993011, "std_weight": 0.06005263328552246, "weighted_loss": 0.08779493719339371 }, { "avg_delta_l": 0.02292964793741703, "avg_loss_unweighted": 0.15773281455039978, "avg_weight": 0.5483147501945496, "std_delta_l": 0.06477487087249756, "std_weight": 0.07647126913070679, "weighted_loss": 0.08986569941043854 }, { "avg_delta_l": 0.030767563730478287, "avg_loss_unweighted": 0.16146408021450043, "avg_weight": 0.5385851263999939, "std_delta_l": 0.05499255657196045, "std_weight": 0.052125394344329834, "weighted_loss": 0.08849098533391953 }, { "avg_delta_l": 0.035390086472034454, "avg_loss_unweighted": 0.15099377930164337, "avg_weight": 0.5366199612617493, "std_delta_l": 0.060876477509737015, "std_weight": 0.05828988924622536, "weighted_loss": 0.08322659134864807 }, { "avg_delta_l": 0.03852558881044388, "avg_loss_unweighted": 0.15718629956245422, "avg_weight": 0.536138117313385, "std_delta_l": 0.06291402876377106, "std_weight": 0.0633280798792839, "weighted_loss": 0.08555136620998383 }, { "avg_delta_l": 0.011692997999489307, "avg_loss_unweighted": 0.1628696769475937, "avg_weight": 0.5515860319137573, "std_delta_l": 0.05228447541594505, "std_weight": 0.0691496953368187, "weighted_loss": 0.09163925796747208 }, { "avg_delta_l": 0.018870526924729347, "avg_loss_unweighted": 0.16773229837417603, "avg_weight": 0.5530588626861572, "std_delta_l": 0.05838392674922943, "std_weight": 0.07733669131994247, "weighted_loss": 0.09566932916641235 }, { "epoch": 6.3826086956521735, "grad_norm": 0.7101666072890872, "learning_rate": 8.619613954078681e-06, "loss": 0.1745, "step": 370 }, { "avg_delta_l": 0.03373560309410095, "avg_loss_unweighted": 0.1507887840270996, "avg_weight": 0.557068407535553, "std_delta_l": 0.0871211588382721, "std_weight": 0.08057838678359985, "weighted_loss": 0.08839846402406693 }, { "avg_delta_l": 0.018250880762934685, "avg_loss_unweighted": 0.16012905538082123, "avg_weight": 0.5505179762840271, "std_delta_l": 0.06210945546627045, "std_weight": 0.07664507627487183, "weighted_loss": 0.08942428231239319 }, { "avg_delta_l": 0.029589716345071793, "avg_loss_unweighted": 0.1605612337589264, "avg_weight": 0.5425030589103699, "std_delta_l": 0.0570203997194767, "std_weight": 0.05277738347649574, "weighted_loss": 0.0889836996793747 }, { "avg_delta_l": 0.023952718824148178, "avg_loss_unweighted": 0.17419598996639252, "avg_weight": 0.5364450812339783, "std_delta_l": 0.06209466978907585, "std_weight": 0.06441882997751236, "weighted_loss": 0.09557827562093735 }, { "avg_delta_l": 0.026792213320732117, "avg_loss_unweighted": 0.1587594449520111, "avg_weight": 0.554678201675415, "std_delta_l": 0.0732438862323761, "std_weight": 0.06993594765663147, "weighted_loss": 0.09116785228252411 }, { "avg_delta_l": 0.013901183381676674, "avg_loss_unweighted": 0.16522058844566345, "avg_weight": 0.5527389049530029, "std_delta_l": 0.05698860064148903, "std_weight": 0.07496719807386398, "weighted_loss": 0.09299445897340775 }, { "avg_delta_l": 0.023202206939458847, "avg_loss_unweighted": 0.14358338713645935, "avg_weight": 0.5433206558227539, "std_delta_l": 0.0607219897210598, "std_weight": 0.06832507997751236, "weighted_loss": 0.08078684657812119 }, { "avg_delta_l": 0.01960369572043419, "avg_loss_unweighted": 0.1594921499490738, "avg_weight": 0.553935706615448, "std_delta_l": 0.07213889062404633, "std_weight": 0.08892621845006943, "weighted_loss": 0.0928337574005127 }, { "avg_delta_l": 0.016241025179624557, "avg_loss_unweighted": 0.1722337007522583, "avg_weight": 0.5505360960960388, "std_delta_l": 0.06094249710440636, "std_weight": 0.07045763731002808, "weighted_loss": 0.09804010391235352 }, { "avg_delta_l": 0.005349940154701471, "avg_loss_unweighted": 0.17695394158363342, "avg_weight": 0.5720739364624023, "std_delta_l": 0.07207995653152466, "std_weight": 0.10899167507886887, "weighted_loss": 0.10482323169708252 }, { "avg_delta_l": 0.02599812112748623, "avg_loss_unweighted": 0.1540282815694809, "avg_weight": 0.5545412302017212, "std_delta_l": 0.06584179401397705, "std_weight": 0.0741889625787735, "weighted_loss": 0.08774836361408234 }, { "avg_delta_l": 0.023946255445480347, "avg_loss_unweighted": 0.17250747978687286, "avg_weight": 0.5431427955627441, "std_delta_l": 0.05531582236289978, "std_weight": 0.05381859838962555, "weighted_loss": 0.09640708565711975 }, { "avg_delta_l": 0.009537833742797375, "avg_loss_unweighted": 0.16204112768173218, "avg_weight": 0.5603387355804443, "std_delta_l": 0.06405755877494812, "std_weight": 0.09353582561016083, "weighted_loss": 0.09478883445262909 }, { "avg_delta_l": 0.014259629882872105, "avg_loss_unweighted": 0.16320139169692993, "avg_weight": 0.5586378574371338, "std_delta_l": 0.06700735539197922, "std_weight": 0.0881577730178833, "weighted_loss": 0.09230807423591614 }, { "avg_delta_l": 0.01945844106376171, "avg_loss_unweighted": 0.173689603805542, "avg_weight": 0.5590063333511353, "std_delta_l": 0.07476502656936646, "std_weight": 0.09655298292636871, "weighted_loss": 0.10004616528749466 }, { "avg_delta_l": 0.015165631659328938, "avg_loss_unweighted": 0.13948313891887665, "avg_weight": 0.5548504590988159, "std_delta_l": 0.06566950678825378, "std_weight": 0.07629723846912384, "weighted_loss": 0.08018382638692856 }, { "avg_delta_l": 0.002397578675299883, "avg_loss_unweighted": 0.17974725365638733, "avg_weight": 0.5770857930183411, "std_delta_l": 0.07374946773052216, "std_weight": 0.1087651476264, "weighted_loss": 0.1067347601056099 }, { "avg_delta_l": 0.014964379370212555, "avg_loss_unweighted": 0.17395184934139252, "avg_weight": 0.5629411339759827, "std_delta_l": 0.07475418597459793, "std_weight": 0.09377302974462509, "weighted_loss": 0.10222455114126205 }, { "avg_delta_l": 0.004426570609211922, "avg_loss_unweighted": 0.15835878252983093, "avg_weight": 0.5808630585670471, "std_delta_l": 0.07574250549077988, "std_weight": 0.08974011242389679, "weighted_loss": 0.0976942628622055 }, { "avg_delta_l": 0.017420493066310883, "avg_loss_unweighted": 0.1651545912027359, "avg_weight": 0.5504087805747986, "std_delta_l": 0.06122888997197151, "std_weight": 0.077219158411026, "weighted_loss": 0.09285342693328857 }, { "epoch": 6.556521739130435, "grad_norm": 0.6401679037854245, "learning_rate": 8.514191829550264e-06, "loss": 0.1874, "step": 380 }, { "avg_delta_l": 0.013761603273451328, "avg_loss_unweighted": 0.1642816811800003, "avg_weight": 0.548249363899231, "std_delta_l": 0.05325727164745331, "std_weight": 0.07089182734489441, "weighted_loss": 0.09272457659244537 }, { "avg_delta_l": -0.007270365953445435, "avg_loss_unweighted": 0.16818495094776154, "avg_weight": 0.5802996754646301, "std_delta_l": 0.070262111723423, "std_weight": 0.11300235241651535, "weighted_loss": 0.10036952048540115 }, { "avg_delta_l": 0.0025288329925388098, "avg_loss_unweighted": 0.1742187887430191, "avg_weight": 0.5730654001235962, "std_delta_l": 0.07067830115556717, "std_weight": 0.1034511849284172, "weighted_loss": 0.10446241497993469 }, { "avg_delta_l": 0.015253969468176365, "avg_loss_unweighted": 0.17741838097572327, "avg_weight": 0.5587475299835205, "std_delta_l": 0.07447171211242676, "std_weight": 0.09695109724998474, "weighted_loss": 0.10228033363819122 }, { "avg_delta_l": 0.0027754749171435833, "avg_loss_unweighted": 0.16336233913898468, "avg_weight": 0.5807636380195618, "std_delta_l": 0.08328769356012344, "std_weight": 0.11544035375118256, "weighted_loss": 0.09900899976491928 }, { "avg_delta_l": 0.014754367992281914, "avg_loss_unweighted": 0.158174529671669, "avg_weight": 0.553555965423584, "std_delta_l": 0.06017402186989784, "std_weight": 0.0766366720199585, "weighted_loss": 0.09007517248392105 }, { "avg_delta_l": 0.011593649163842201, "avg_loss_unweighted": 0.1755593717098236, "avg_weight": 0.5721234083175659, "std_delta_l": 0.07692418992519379, "std_weight": 0.1109660193324089, "weighted_loss": 0.10457098484039307 }, { "avg_delta_l": 0.013670380227267742, "avg_loss_unweighted": 0.15767082571983337, "avg_weight": 0.5476328134536743, "std_delta_l": 0.051140185445547104, "std_weight": 0.060457147657871246, "weighted_loss": 0.08883675932884216 }, { "avg_delta_l": 0.006774057634174824, "avg_loss_unweighted": 0.1608394831418991, "avg_weight": 0.5684486627578735, "std_delta_l": 0.06713949888944626, "std_weight": 0.10205676406621933, "weighted_loss": 0.09539350867271423 }, { "avg_delta_l": -0.010092564858496189, "avg_loss_unweighted": 0.17233219742774963, "avg_weight": 0.5944679975509644, "std_delta_l": 0.07023513317108154, "std_weight": 0.11462309956550598, "weighted_loss": 0.1057373583316803 }, { "avg_delta_l": -0.014209370128810406, "avg_loss_unweighted": 0.16458500921726227, "avg_weight": 0.6029735803604126, "std_delta_l": 0.07285521924495697, "std_weight": 0.11982208490371704, "weighted_loss": 0.10284353792667389 }, { "avg_delta_l": 0.006711836904287338, "avg_loss_unweighted": 0.1617935299873352, "avg_weight": 0.5766006708145142, "std_delta_l": 0.07895278930664062, "std_weight": 0.10377341508865356, "weighted_loss": 0.09677910804748535 }, { "avg_delta_l": -0.006691123358905315, "avg_loss_unweighted": 0.17161841690540314, "avg_weight": 0.5898037552833557, "std_delta_l": 0.06736832112073898, "std_weight": 0.09610901772975922, "weighted_loss": 0.10424134880304337 }, { "avg_delta_l": 0.031002167612314224, "avg_loss_unweighted": 0.15219910442829132, "avg_weight": 0.5345308184623718, "std_delta_l": 0.061085931956768036, "std_weight": 0.0571356825530529, "weighted_loss": 0.08253710716962814 }, { "avg_delta_l": 0.0055652582086622715, "avg_loss_unweighted": 0.1626279652118683, "avg_weight": 0.5800094604492188, "std_delta_l": 0.06882040202617645, "std_weight": 0.08635399490594864, "weighted_loss": 0.09745583683252335 }, { "avg_delta_l": 0.007325612008571625, "avg_loss_unweighted": 0.1736491322517395, "avg_weight": 0.5772096514701843, "std_delta_l": 0.07553189992904663, "std_weight": 0.09673303365707397, "weighted_loss": 0.10229230672121048 }, { "avg_delta_l": 0.01445713359862566, "avg_loss_unweighted": 0.1559392511844635, "avg_weight": 0.556296706199646, "std_delta_l": 0.06707087904214859, "std_weight": 0.09052077680826187, "weighted_loss": 0.0890030711889267 }, { "avg_delta_l": 0.01563710905611515, "avg_loss_unweighted": 0.1609640121459961, "avg_weight": 0.5648635625839233, "std_delta_l": 0.07000359147787094, "std_weight": 0.08464062213897705, "weighted_loss": 0.09492214769124985 }, { "avg_delta_l": -0.010178716853260994, "avg_loss_unweighted": 0.1767815351486206, "avg_weight": 0.5966178774833679, "std_delta_l": 0.07290597259998322, "std_weight": 0.10934557020664215, "weighted_loss": 0.10939489305019379 }, { "avg_delta_l": -0.008246629498898983, "avg_loss_unweighted": 0.16269254684448242, "avg_weight": 0.596792995929718, "std_delta_l": 0.07906557619571686, "std_weight": 0.11062958836555481, "weighted_loss": 0.10184500366449356 }, { "epoch": 6.730434782608696, "grad_norm": 0.8900361132314417, "learning_rate": 8.405587769019072e-06, "loss": 0.1965, "step": 390 }, { "avg_delta_l": 0.006795146036893129, "avg_loss_unweighted": 0.1650816649198532, "avg_weight": 0.5678070783615112, "std_delta_l": 0.07083665579557419, "std_weight": 0.09369989484548569, "weighted_loss": 0.09627894312143326 }, { "avg_delta_l": 0.0032067662104964256, "avg_loss_unweighted": 0.1672046184539795, "avg_weight": 0.5768343210220337, "std_delta_l": 0.07662269473075867, "std_weight": 0.09649302065372467, "weighted_loss": 0.09938394278287888 }, { "avg_delta_l": 0.00017269491218030453, "avg_loss_unweighted": 0.16446591913700104, "avg_weight": 0.5762970447540283, "std_delta_l": 0.06248787045478821, "std_weight": 0.09109386056661606, "weighted_loss": 0.09673094749450684 }, { "avg_delta_l": 1.7028767615556717e-05, "avg_loss_unweighted": 0.16431909799575806, "avg_weight": 0.5852195024490356, "std_delta_l": 0.0698702484369278, "std_weight": 0.09756772965192795, "weighted_loss": 0.10040340572595596 }, { "avg_delta_l": 0.009203702211380005, "avg_loss_unweighted": 0.1632360816001892, "avg_weight": 0.5798183679580688, "std_delta_l": 0.07886692881584167, "std_weight": 0.09837306290864944, "weighted_loss": 0.09807202965021133 }, { "avg_delta_l": 0.012232599779963493, "avg_loss_unweighted": 0.16307567059993744, "avg_weight": 0.5666024684906006, "std_delta_l": 0.08018516004085541, "std_weight": 0.09937861561775208, "weighted_loss": 0.096177838742733 }, { "avg_delta_l": 0.0005073418724350631, "avg_loss_unweighted": 0.16785788536071777, "avg_weight": 0.5886992812156677, "std_delta_l": 0.07967959344387054, "std_weight": 0.100527822971344, "weighted_loss": 0.10230407118797302 }, { "avg_delta_l": 0.0072313291020691395, "avg_loss_unweighted": 0.15678389370441437, "avg_weight": 0.5796807408332825, "std_delta_l": 0.08613111823797226, "std_weight": 0.11607949435710907, "weighted_loss": 0.09556320309638977 }, { "avg_delta_l": 0.02242298796772957, "avg_loss_unweighted": 0.16375264525413513, "avg_weight": 0.563149631023407, "std_delta_l": 0.0733383521437645, "std_weight": 0.07821322977542877, "weighted_loss": 0.09730452299118042 }, { "avg_delta_l": 0.0119522949680686, "avg_loss_unweighted": 0.16670621931552887, "avg_weight": 0.5689558386802673, "std_delta_l": 0.07141474634408951, "std_weight": 0.08765606582164764, "weighted_loss": 0.09741758555173874 }, { "avg_delta_l": -0.007109478116035461, "avg_loss_unweighted": 0.16778069734573364, "avg_weight": 0.593454122543335, "std_delta_l": 0.07367901504039764, "std_weight": 0.11066718399524689, "weighted_loss": 0.10304879397153854 }, { "avg_delta_l": -0.002523935865610838, "avg_loss_unweighted": 0.17331837117671967, "avg_weight": 0.5794817209243774, "std_delta_l": 0.06597062945365906, "std_weight": 0.09909071773290634, "weighted_loss": 0.10412769019603729 }, { "avg_delta_l": 0.0033373096957802773, "avg_loss_unweighted": 0.16114117205142975, "avg_weight": 0.5858809351921082, "std_delta_l": 0.08753734081983566, "std_weight": 0.11617349088191986, "weighted_loss": 0.09843713045120239 }, { "avg_delta_l": 0.006085114553570747, "avg_loss_unweighted": 0.1630183756351471, "avg_weight": 0.5782232880592346, "std_delta_l": 0.07565804570913315, "std_weight": 0.1025250256061554, "weighted_loss": 0.09736952930688858 }, { "avg_delta_l": 0.013332558795809746, "avg_loss_unweighted": 0.14581477642059326, "avg_weight": 0.568508505821228, "std_delta_l": 0.07464617490768433, "std_weight": 0.08541735261678696, "weighted_loss": 0.08622189611196518 }, { "avg_delta_l": 0.005053448490798473, "avg_loss_unweighted": 0.1570214033126831, "avg_weight": 0.5727083086967468, "std_delta_l": 0.07276859134435654, "std_weight": 0.09874266386032104, "weighted_loss": 0.09337349236011505 }, { "avg_delta_l": -0.0068907602690160275, "avg_loss_unweighted": 0.1738646924495697, "avg_weight": 0.5858877897262573, "std_delta_l": 0.07324618101119995, "std_weight": 0.10758432745933533, "weighted_loss": 0.10485798120498657 }, { "avg_delta_l": 0.0084316935390234, "avg_loss_unweighted": 0.16290295124053955, "avg_weight": 0.5878986716270447, "std_delta_l": 0.08416283875703812, "std_weight": 0.10728392004966736, "weighted_loss": 0.10168282687664032 }, { "avg_delta_l": 0.00045787496492266655, "avg_loss_unweighted": 0.16190120577812195, "avg_weight": 0.593386709690094, "std_delta_l": 0.07875724881887436, "std_weight": 0.10726027190685272, "weighted_loss": 0.100656658411026 }, { "avg_delta_l": 0.010936356149613857, "avg_loss_unweighted": 0.1556427776813507, "avg_weight": 0.5828024744987488, "std_delta_l": 0.08557583391666412, "std_weight": 0.11205681413412094, "weighted_loss": 0.09466616064310074 }, { "epoch": 6.904347826086957, "grad_norm": 0.7240477639489437, "learning_rate": 8.293900108371988e-06, "loss": 0.1964, "step": 400 }, { "avg_delta_l": -0.009523330256342888, "avg_loss_unweighted": 0.16649514436721802, "avg_weight": 0.5943861603736877, "std_delta_l": 0.07353769987821579, "std_weight": 0.11529026180505753, "weighted_loss": 0.10261718183755875 }, { "avg_delta_l": 0.0031246112193912268, "avg_loss_unweighted": 0.15355412662029266, "avg_weight": 0.5879231691360474, "std_delta_l": 0.06682413071393967, "std_weight": 0.0937737375497818, "weighted_loss": 0.09445737302303314 }, { "avg_delta_l": 0.009395395405590534, "avg_loss_unweighted": 0.158554807305336, "avg_weight": 0.5607360005378723, "std_delta_l": 0.06222153827548027, "std_weight": 0.07853405922651291, "weighted_loss": 0.09117157012224197 }, { "avg_delta_l": 0.01160229742527008, "avg_loss_unweighted": 0.15770699083805084, "avg_weight": 0.5712028741836548, "std_delta_l": 0.07891538739204407, "std_weight": 0.09939723461866379, "weighted_loss": 0.0937541052699089 }, { "avg_delta_l": -0.005407131277024746, "avg_loss_unweighted": 0.16826559603214264, "avg_weight": 0.5902302265167236, "std_delta_l": 0.08192677050828934, "std_weight": 0.1183190643787384, "weighted_loss": 0.10378123074769974 }, { "avg_delta_l": -0.0002703567733988166, "avg_loss_unweighted": 0.15578041970729828, "avg_weight": 0.5708234906196594, "std_delta_l": 0.06472621113061905, "std_weight": 0.09317924082279205, "weighted_loss": 0.0919535756111145 }, { "avg_delta_l": 0.011942538432776928, "avg_loss_unweighted": 0.16118507087230682, "avg_weight": 0.5789998769760132, "std_delta_l": 0.0722949206829071, "std_weight": 0.08386804163455963, "weighted_loss": 0.0984267145395279 }, { "avg_delta_l": 0.009043366648256779, "avg_loss_unweighted": 0.16004028916358948, "avg_weight": 0.5718486905097961, "std_delta_l": 0.07039569318294525, "std_weight": 0.08820080757141113, "weighted_loss": 0.09483341872692108 }, { "avg_delta_l": -0.005156791303306818, "avg_loss_unweighted": 0.16034933924674988, "avg_weight": 0.5960103869438171, "std_delta_l": 0.08054545521736145, "std_weight": 0.11148133873939514, "weighted_loss": 0.09934712201356888 }, { "avg_delta_l": -0.009344159625470638, "avg_loss_unweighted": 0.16442784667015076, "avg_weight": 0.6029487252235413, "std_delta_l": 0.08204014599323273, "std_weight": 0.11933555454015732, "weighted_loss": 0.10262739658355713 }, { "avg_delta_l": 0.019892247393727303, "avg_loss_unweighted": 0.14393381774425507, "avg_weight": 0.5600284337997437, "std_delta_l": 0.07619714736938477, "std_weight": 0.08503115177154541, "weighted_loss": 0.0835261195898056 }, { "avg_delta_l": 0.031774863600730896, "avg_loss_unweighted": 0.13487312197685242, "avg_weight": 0.5321801900863647, "std_delta_l": 0.05333097279071808, "std_weight": 0.0466102734208107, "weighted_loss": 0.0737069770693779 }, { "avg_delta_l": 0.030289430171251297, "avg_loss_unweighted": 0.1306995004415512, "avg_weight": 0.5292291641235352, "std_delta_l": 0.05839293450117111, "std_weight": 0.050226226449012756, "weighted_loss": 0.07055070996284485 }, { "avg_delta_l": 0.02475684881210327, "avg_loss_unweighted": 0.1406899392604828, "avg_weight": 0.5204410552978516, "std_delta_l": 0.04053470119833946, "std_weight": 0.03253750875592232, "weighted_loss": 0.07394514977931976 }, { "avg_delta_l": 0.012141837738454342, "avg_loss_unweighted": 0.15369649231433868, "avg_weight": 0.5479903221130371, "std_delta_l": 0.04916955903172493, "std_weight": 0.07429156452417374, "weighted_loss": 0.08604803681373596 }, { "avg_delta_l": 0.034738991409540176, "avg_loss_unweighted": 0.14850780367851257, "avg_weight": 0.5146805047988892, "std_delta_l": 0.04452575370669365, "std_weight": 0.029361087828874588, "weighted_loss": 0.07706183195114136 }, { "avg_delta_l": 0.04252197593450546, "avg_loss_unweighted": 0.12768015265464783, "avg_weight": 0.521367073059082, "std_delta_l": 0.05136394128203392, "std_weight": 0.031764306128025055, "weighted_loss": 0.06723472476005554 }, { "avg_delta_l": 0.035951629281044006, "avg_loss_unweighted": 0.13097506761550903, "avg_weight": 0.5155403017997742, "std_delta_l": 0.04209107533097267, "std_weight": 0.02783689647912979, "weighted_loss": 0.06830036640167236 }, { "avg_delta_l": 0.03008589707314968, "avg_loss_unweighted": 0.12882108986377716, "avg_weight": 0.5384203791618347, "std_delta_l": 0.0605778694152832, "std_weight": 0.055486030876636505, "weighted_loss": 0.07075570523738861 }, { "epoch": 7.069565217391304, "grad_norm": 0.7490547148405121, "learning_rate": 8.179229975551288e-06, "loss": 0.1644, "step": 410 }, { "avg_delta_l": 0.02494729682803154, "avg_loss_unweighted": 0.13020920753479004, "avg_weight": 0.5243968367576599, "std_delta_l": 0.04242921620607376, "std_weight": 0.04457148164510727, "weighted_loss": 0.06926973164081573 }, { "avg_delta_l": 0.01709507778286934, "avg_loss_unweighted": 0.13024751842021942, "avg_weight": 0.5410402417182922, "std_delta_l": 0.050164371728897095, "std_weight": 0.0655798465013504, "weighted_loss": 0.07301820069551468 }, { "avg_delta_l": 0.027759628370404243, "avg_loss_unweighted": 0.13262693583965302, "avg_weight": 0.5245223641395569, "std_delta_l": 0.04355595260858536, "std_weight": 0.04217914119362831, "weighted_loss": 0.07016448676586151 }, { "avg_delta_l": 0.02991524338722229, "avg_loss_unweighted": 0.1397465169429779, "avg_weight": 0.541969895362854, "std_delta_l": 0.07009410113096237, "std_weight": 0.07494699954986572, "weighted_loss": 0.07794735580682755 }, { "avg_delta_l": 0.026802631095051765, "avg_loss_unweighted": 0.1447068750858307, "avg_weight": 0.5381745100021362, "std_delta_l": 0.0682494267821312, "std_weight": 0.06219696253538132, "weighted_loss": 0.08028366416692734 }, { "avg_delta_l": 0.022722434252500534, "avg_loss_unweighted": 0.1378956437110901, "avg_weight": 0.5365728139877319, "std_delta_l": 0.05186032876372337, "std_weight": 0.06561707705259323, "weighted_loss": 0.07592165470123291 }, { "avg_delta_l": 0.026900924742221832, "avg_loss_unweighted": 0.13757234811782837, "avg_weight": 0.5349971055984497, "std_delta_l": 0.05469810217618942, "std_weight": 0.05598612502217293, "weighted_loss": 0.07632039487361908 }, { "avg_delta_l": 0.034570422023534775, "avg_loss_unweighted": 0.11840182542800903, "avg_weight": 0.527704656124115, "std_delta_l": 0.05556800961494446, "std_weight": 0.0490858219563961, "weighted_loss": 0.06394396722316742 }, { "avg_delta_l": 0.026400115340948105, "avg_loss_unweighted": 0.12085241079330444, "avg_weight": 0.5371307730674744, "std_delta_l": 0.06583657115697861, "std_weight": 0.06914281845092773, "weighted_loss": 0.06706707179546356 }, { "avg_delta_l": 0.017905088141560555, "avg_loss_unweighted": 0.1388920396566391, "avg_weight": 0.5444962382316589, "std_delta_l": 0.056468091905117035, "std_weight": 0.07328815758228302, "weighted_loss": 0.07763536274433136 }, { "avg_delta_l": 0.026774941012263298, "avg_loss_unweighted": 0.1160021424293518, "avg_weight": 0.5348947048187256, "std_delta_l": 0.0566573403775692, "std_weight": 0.04816228151321411, "weighted_loss": 0.06385281682014465 }, { "avg_delta_l": 0.030375462025403976, "avg_loss_unweighted": 0.13547371327877045, "avg_weight": 0.5225087404251099, "std_delta_l": 0.04593419283628464, "std_weight": 0.03662140667438507, "weighted_loss": 0.07167576253414154 }, { "avg_delta_l": 0.030287599191069603, "avg_loss_unweighted": 0.13247492909431458, "avg_weight": 0.5283920764923096, "std_delta_l": 0.05492432415485382, "std_weight": 0.04975416138768196, "weighted_loss": 0.07163267582654953 }, { "avg_delta_l": 0.008759683929383755, "avg_loss_unweighted": 0.14047320187091827, "avg_weight": 0.5611911416053772, "std_delta_l": 0.062406010925769806, "std_weight": 0.08210565149784088, "weighted_loss": 0.08080485463142395 }, { "avg_delta_l": 0.027112262323498726, "avg_loss_unweighted": 0.1299310177564621, "avg_weight": 0.5365809798240662, "std_delta_l": 0.05635964497923851, "std_weight": 0.05163456127047539, "weighted_loss": 0.0715741366147995 }, { "avg_delta_l": 0.03181925415992737, "avg_loss_unweighted": 0.12098509818315506, "avg_weight": 0.5216790437698364, "std_delta_l": 0.04363148286938667, "std_weight": 0.039725616574287415, "weighted_loss": 0.06433870643377304 }, { "avg_delta_l": 0.029270507395267487, "avg_loss_unweighted": 0.1296357661485672, "avg_weight": 0.5431939959526062, "std_delta_l": 0.058726947754621506, "std_weight": 0.060362812131643295, "weighted_loss": 0.07233013212680817 }, { "avg_delta_l": 0.03218299150466919, "avg_loss_unweighted": 0.12562909722328186, "avg_weight": 0.5362892150878906, "std_delta_l": 0.06138734146952629, "std_weight": 0.055168602615594864, "weighted_loss": 0.0693613812327385 }, { "avg_delta_l": 0.03126091510057449, "avg_loss_unweighted": 0.125433087348938, "avg_weight": 0.5243082046508789, "std_delta_l": 0.04123827815055847, "std_weight": 0.04025173559784889, "weighted_loss": 0.06666218489408493 }, { "avg_delta_l": 0.030860960483551025, "avg_loss_unweighted": 0.13393917679786682, "avg_weight": 0.5274571776390076, "std_delta_l": 0.046613313257694244, "std_weight": 0.046924855560064316, "weighted_loss": 0.07211796194314957 }, { "epoch": 7.243478260869566, "grad_norm": 0.5387181787162598, "learning_rate": 8.06168119898802e-06, "loss": 0.1436, "step": 420 }, { "avg_delta_l": 0.015962500125169754, "avg_loss_unweighted": 0.12905436754226685, "avg_weight": 0.5543168187141418, "std_delta_l": 0.06294754147529602, "std_weight": 0.07766295224428177, "weighted_loss": 0.0738121047616005 }, { "avg_delta_l": 0.026875974610447884, "avg_loss_unweighted": 0.1256580352783203, "avg_weight": 0.5449163317680359, "std_delta_l": 0.06185498088598251, "std_weight": 0.07196792960166931, "weighted_loss": 0.07051683217287064 }, { "avg_delta_l": 0.0009186367969959974, "avg_loss_unweighted": 0.1467229723930359, "avg_weight": 0.576331377029419, "std_delta_l": 0.06552301347255707, "std_weight": 0.10737422108650208, "weighted_loss": 0.08678293228149414 }, { "avg_delta_l": 0.018280766904354095, "avg_loss_unweighted": 0.1449650526046753, "avg_weight": 0.5424960851669312, "std_delta_l": 0.05152864381670952, "std_weight": 0.0671306699514389, "weighted_loss": 0.08048368245363235 }, { "avg_delta_l": 0.020743247121572495, "avg_loss_unweighted": 0.13629740476608276, "avg_weight": 0.531622588634491, "std_delta_l": 0.043492190539836884, "std_weight": 0.04696264863014221, "weighted_loss": 0.07405781000852585 }, { "avg_delta_l": 0.007292138412594795, "avg_loss_unweighted": 0.1307334154844284, "avg_weight": 0.5606210827827454, "std_delta_l": 0.06041805073618889, "std_weight": 0.09208836406469345, "weighted_loss": 0.0757632702589035 }, { "avg_delta_l": 0.027775321155786514, "avg_loss_unweighted": 0.10975233465433121, "avg_weight": 0.5304939150810242, "std_delta_l": 0.05289187282323837, "std_weight": 0.0442335419356823, "weighted_loss": 0.05951748788356781 }, { "avg_delta_l": 0.022648988291621208, "avg_loss_unweighted": 0.1266893446445465, "avg_weight": 0.5351762771606445, "std_delta_l": 0.05352449417114258, "std_weight": 0.06052124872803688, "weighted_loss": 0.0687720775604248 }, { "avg_delta_l": 0.016047753393650055, "avg_loss_unweighted": 0.14293506741523743, "avg_weight": 0.5538417100906372, "std_delta_l": 0.06505847722291946, "std_weight": 0.08178842812776566, "weighted_loss": 0.08051203191280365 }, { "avg_delta_l": 0.018941137939691544, "avg_loss_unweighted": 0.1354338526725769, "avg_weight": 0.5532921552658081, "std_delta_l": 0.060907233506441116, "std_weight": 0.07755409926176071, "weighted_loss": 0.07811111211776733 }, { "avg_delta_l": 0.02383958175778389, "avg_loss_unweighted": 0.1391938328742981, "avg_weight": 0.5408105254173279, "std_delta_l": 0.053928591310977936, "std_weight": 0.07091425359249115, "weighted_loss": 0.07639217376708984 }, { "avg_delta_l": 0.020260760560631752, "avg_loss_unweighted": 0.1294076293706894, "avg_weight": 0.5381743311882019, "std_delta_l": 0.053041063249111176, "std_weight": 0.06733547896146774, "weighted_loss": 0.07224853336811066 }, { "avg_delta_l": 0.01992974616587162, "avg_loss_unweighted": 0.13328735530376434, "avg_weight": 0.5387576818466187, "std_delta_l": 0.04875605180859566, "std_weight": 0.052805256098508835, "weighted_loss": 0.07329221069812775 }, { "avg_delta_l": 0.019403286278247833, "avg_loss_unweighted": 0.13666687905788422, "avg_weight": 0.533003568649292, "std_delta_l": 0.0446355901658535, "std_weight": 0.05220106989145279, "weighted_loss": 0.07394490391016006 }, { "avg_delta_l": 0.02150299772620201, "avg_loss_unweighted": 0.13029710948467255, "avg_weight": 0.5546482801437378, "std_delta_l": 0.056964848190546036, "std_weight": 0.060367852449417114, "weighted_loss": 0.07501418888568878 }, { "avg_delta_l": 0.006506141275167465, "avg_loss_unweighted": 0.1411820352077484, "avg_weight": 0.5645853877067566, "std_delta_l": 0.057314593344926834, "std_weight": 0.0819176509976387, "weighted_loss": 0.08173363655805588 }, { "avg_delta_l": 0.028953157365322113, "avg_loss_unweighted": 0.13289272785186768, "avg_weight": 0.5454887747764587, "std_delta_l": 0.0652879998087883, "std_weight": 0.07262454926967621, "weighted_loss": 0.07584904879331589 }, { "avg_delta_l": 0.008514531888067722, "avg_loss_unweighted": 0.13554808497428894, "avg_weight": 0.562244176864624, "std_delta_l": 0.058807626366615295, "std_weight": 0.07310940325260162, "weighted_loss": 0.07872126996517181 }, { "avg_delta_l": 0.022812051698565483, "avg_loss_unweighted": 0.14304284751415253, "avg_weight": 0.5532628297805786, "std_delta_l": 0.0672152042388916, "std_weight": 0.08756186813116074, "weighted_loss": 0.08185716718435287 }, { "avg_delta_l": 0.037935659289360046, "avg_loss_unweighted": 0.1264592558145523, "avg_weight": 0.539679765701294, "std_delta_l": 0.07482864707708359, "std_weight": 0.056209202855825424, "weighted_loss": 0.06966943293809891 }, { "epoch": 7.417391304347826, "grad_norm": 0.6787151061807887, "learning_rate": 7.941360213590227e-06, "loss": 0.1507, "step": 430 }, { "avg_delta_l": 0.02171705849468708, "avg_loss_unweighted": 0.1424122303724289, "avg_weight": 0.5550550222396851, "std_delta_l": 0.06799021363258362, "std_weight": 0.07947882264852524, "weighted_loss": 0.0811237022280693 }, { "avg_delta_l": 0.010799011215567589, "avg_loss_unweighted": 0.14273712038993835, "avg_weight": 0.561170756816864, "std_delta_l": 0.06130601838231087, "std_weight": 0.08255741000175476, "weighted_loss": 0.0826043114066124 }, { "avg_delta_l": 0.004914775025099516, "avg_loss_unweighted": 0.1280168741941452, "avg_weight": 0.559808075428009, "std_delta_l": 0.05546281859278679, "std_weight": 0.07793392241001129, "weighted_loss": 0.07371796667575836 }, { "avg_delta_l": 0.005536318756639957, "avg_loss_unweighted": 0.13876552879810333, "avg_weight": 0.5675148367881775, "std_delta_l": 0.07061506062746048, "std_weight": 0.10071034729480743, "weighted_loss": 0.0819343850016594 }, { "avg_delta_l": 0.008034715428948402, "avg_loss_unweighted": 0.13622403144836426, "avg_weight": 0.5577333569526672, "std_delta_l": 0.05668250471353531, "std_weight": 0.07962063699960709, "weighted_loss": 0.07858489453792572 }, { "avg_delta_l": 0.012186960317194462, "avg_loss_unweighted": 0.1351487934589386, "avg_weight": 0.5603768229484558, "std_delta_l": 0.0608966164290905, "std_weight": 0.07924973964691162, "weighted_loss": 0.07875560969114304 }, { "avg_delta_l": 0.015260478481650352, "avg_loss_unweighted": 0.12838146090507507, "avg_weight": 0.5632750391960144, "std_delta_l": 0.07533632963895798, "std_weight": 0.1049981415271759, "weighted_loss": 0.07615072280168533 }, { "avg_delta_l": 0.021091101691126823, "avg_loss_unweighted": 0.13514599204063416, "avg_weight": 0.5493209362030029, "std_delta_l": 0.06359902769327164, "std_weight": 0.07122017443180084, "weighted_loss": 0.07647208869457245 }, { "avg_delta_l": 0.005768825300037861, "avg_loss_unweighted": 0.1393977403640747, "avg_weight": 0.5573598146438599, "std_delta_l": 0.05988599359989166, "std_weight": 0.08976710587739944, "weighted_loss": 0.07972899079322815 }, { "avg_delta_l": 0.018244873732328415, "avg_loss_unweighted": 0.12249334156513214, "avg_weight": 0.5488390326499939, "std_delta_l": 0.057104241102933884, "std_weight": 0.06452593952417374, "weighted_loss": 0.06951935589313507 }, { "avg_delta_l": 0.015129162929952145, "avg_loss_unweighted": 0.14299319684505463, "avg_weight": 0.5635589957237244, "std_delta_l": 0.06725267320871353, "std_weight": 0.08189665526151657, "weighted_loss": 0.08330385386943817 }, { "avg_delta_l": 0.0031361954752355814, "avg_loss_unweighted": 0.13654546439647675, "avg_weight": 0.562802255153656, "std_delta_l": 0.05589437112212181, "std_weight": 0.08919238299131393, "weighted_loss": 0.07945205271244049 }, { "avg_delta_l": 0.020424123853445053, "avg_loss_unweighted": 0.1369575560092926, "avg_weight": 0.5494465231895447, "std_delta_l": 0.05927437171339989, "std_weight": 0.07849840819835663, "weighted_loss": 0.07716159522533417 }, { "avg_delta_l": 0.006674997508525848, "avg_loss_unweighted": 0.13071154057979584, "avg_weight": 0.5568132400512695, "std_delta_l": 0.05784768611192703, "std_weight": 0.07552315294742584, "weighted_loss": 0.0755402073264122 }, { "avg_delta_l": 0.017785103991627693, "avg_loss_unweighted": 0.13325844705104828, "avg_weight": 0.5583164691925049, "std_delta_l": 0.06578122079372406, "std_weight": 0.08262814581394196, "weighted_loss": 0.07632764428853989 }, { "avg_delta_l": 0.0036897393874824047, "avg_loss_unweighted": 0.1425970494747162, "avg_weight": 0.5643807053565979, "std_delta_l": 0.057977091521024704, "std_weight": 0.09287985414266586, "weighted_loss": 0.08237764239311218 }, { "avg_delta_l": 0.01863783970475197, "avg_loss_unweighted": 0.12473791092634201, "avg_weight": 0.5476850867271423, "std_delta_l": 0.05941595137119293, "std_weight": 0.06774292886257172, "weighted_loss": 0.07053302973508835 }, { "avg_delta_l": 0.018139297142624855, "avg_loss_unweighted": 0.12695173919200897, "avg_weight": 0.543497622013092, "std_delta_l": 0.06060028076171875, "std_weight": 0.06517358124256134, "weighted_loss": 0.07088268548250198 }, { "avg_delta_l": 0.021770551800727844, "avg_loss_unweighted": 0.1305021345615387, "avg_weight": 0.5529831051826477, "std_delta_l": 0.0741787776350975, "std_weight": 0.091402068734169, "weighted_loss": 0.07446921616792679 }, { "avg_delta_l": 0.021359046921133995, "avg_loss_unweighted": 0.12727361917495728, "avg_weight": 0.5432214140892029, "std_delta_l": 0.05978413671255112, "std_weight": 0.07145452499389648, "weighted_loss": 0.07156552374362946 }, { "epoch": 7.591304347826087, "grad_norm": 0.580247863670325, "learning_rate": 7.818375964371132e-06, "loss": 0.154, "step": 440 }, { "avg_delta_l": 0.01727438159286976, "avg_loss_unweighted": 0.14193788170814514, "avg_weight": 0.557389497756958, "std_delta_l": 0.06416050344705582, "std_weight": 0.07124043256044388, "weighted_loss": 0.08145745098590851 }, { "avg_delta_l": 0.020719606429338455, "avg_loss_unweighted": 0.11912104487419128, "avg_weight": 0.5490430593490601, "std_delta_l": 0.05674558877944946, "std_weight": 0.06857990473508835, "weighted_loss": 0.06763313710689545 }, { "avg_delta_l": 0.02181415446102619, "avg_loss_unweighted": 0.12405417859554291, "avg_weight": 0.5483590960502625, "std_delta_l": 0.06423909217119217, "std_weight": 0.07175296545028687, "weighted_loss": 0.07004553824663162 }, { "avg_delta_l": 0.004938068334013224, "avg_loss_unweighted": 0.13228407502174377, "avg_weight": 0.5636654496192932, "std_delta_l": 0.06582720577716827, "std_weight": 0.08320992439985275, "weighted_loss": 0.07721560448408127 }, { "avg_delta_l": 0.009152732789516449, "avg_loss_unweighted": 0.1313195824623108, "avg_weight": 0.5673959851264954, "std_delta_l": 0.07261321693658829, "std_weight": 0.0894123911857605, "weighted_loss": 0.07862358540296555 }, { "avg_delta_l": 0.018745746463537216, "avg_loss_unweighted": 0.12691809237003326, "avg_weight": 0.5508409142494202, "std_delta_l": 0.07056795060634613, "std_weight": 0.07542316615581512, "weighted_loss": 0.07256332784891129 }, { "avg_delta_l": 0.0028365920297801495, "avg_loss_unweighted": 0.13352318108081818, "avg_weight": 0.5769119262695312, "std_delta_l": 0.07424765825271606, "std_weight": 0.11046671122312546, "weighted_loss": 0.0793863907456398 }, { "avg_delta_l": 0.015189852565526962, "avg_loss_unweighted": 0.12471558153629303, "avg_weight": 0.552949845790863, "std_delta_l": 0.05642645061016083, "std_weight": 0.07352743297815323, "weighted_loss": 0.07134521752595901 }, { "avg_delta_l": 0.003708215430378914, "avg_loss_unweighted": 0.12843948602676392, "avg_weight": 0.5661636590957642, "std_delta_l": 0.06587465107440948, "std_weight": 0.0938652828335762, "weighted_loss": 0.07615915685892105 }, { "avg_delta_l": 0.004702695645391941, "avg_loss_unweighted": 0.12776808440685272, "avg_weight": 0.5677977204322815, "std_delta_l": 0.06429985910654068, "std_weight": 0.09417243301868439, "weighted_loss": 0.07535112649202347 }, { "avg_delta_l": 0.007570988032966852, "avg_loss_unweighted": 0.12231381982564926, "avg_weight": 0.5750409364700317, "std_delta_l": 0.0673782154917717, "std_weight": 0.08732123672962189, "weighted_loss": 0.07314958423376083 }, { "avg_delta_l": 0.020817330107092857, "avg_loss_unweighted": 0.1134706363081932, "avg_weight": 0.5506293177604675, "std_delta_l": 0.06456787884235382, "std_weight": 0.06965503096580505, "weighted_loss": 0.06503168493509293 }, { "avg_delta_l": 0.009104805998504162, "avg_loss_unweighted": 0.14984723925590515, "avg_weight": 0.5693150162696838, "std_delta_l": 0.07553108036518097, "std_weight": 0.09623578935861588, "weighted_loss": 0.08751595765352249 }, { "avg_delta_l": -0.004056934267282486, "avg_loss_unweighted": 0.13847218453884125, "avg_weight": 0.5838955640792847, "std_delta_l": 0.07332678139209747, "std_weight": 0.10571406781673431, "weighted_loss": 0.08569276332855225 }, { "avg_delta_l": -0.011770937591791153, "avg_loss_unweighted": 0.1347850263118744, "avg_weight": 0.5881163477897644, "std_delta_l": 0.06300423294305801, "std_weight": 0.10232193022966385, "weighted_loss": 0.08383312821388245 }, { "avg_delta_l": 0.012465611100196838, "avg_loss_unweighted": 0.13717550039291382, "avg_weight": 0.5654704570770264, "std_delta_l": 0.06652370095252991, "std_weight": 0.08352479338645935, "weighted_loss": 0.0805349051952362 }, { "avg_delta_l": 0.0035288268700242043, "avg_loss_unweighted": 0.13752220571041107, "avg_weight": 0.5636508464813232, "std_delta_l": 0.05832124873995781, "std_weight": 0.08045662194490433, "weighted_loss": 0.07928914576768875 }, { "avg_delta_l": 0.012675780802965164, "avg_loss_unweighted": 0.12124550342559814, "avg_weight": 0.5556567311286926, "std_delta_l": 0.060268744826316833, "std_weight": 0.07323100417852402, "weighted_loss": 0.0697520449757576 }, { "avg_delta_l": -0.003985787741839886, "avg_loss_unweighted": 0.1349395364522934, "avg_weight": 0.586904764175415, "std_delta_l": 0.06953128427267075, "std_weight": 0.10621663182973862, "weighted_loss": 0.08257520198822021 }, { "avg_delta_l": 0.03464851900935173, "avg_loss_unweighted": 0.12366721779108047, "avg_weight": 0.5251407027244568, "std_delta_l": 0.045265018939971924, "std_weight": 0.03316252678632736, "weighted_loss": 0.06557202339172363 }, { "epoch": 7.765217391304348, "grad_norm": 0.6414826401193034, "learning_rate": 7.692839807804522e-06, "loss": 0.1523, "step": 450 }, { "avg_delta_l": 0.010570072568953037, "avg_loss_unweighted": 0.1263161301612854, "avg_weight": 0.5620128512382507, "std_delta_l": 0.0677076131105423, "std_weight": 0.08192526549100876, "weighted_loss": 0.07387000322341919 }, { "avg_delta_l": 0.000689418928232044, "avg_loss_unweighted": 0.13356253504753113, "avg_weight": 0.5791705846786499, "std_delta_l": 0.071337029337883, "std_weight": 0.09917944669723511, "weighted_loss": 0.08093539625406265 }, { "avg_delta_l": -0.001896645873785019, "avg_loss_unweighted": 0.14083576202392578, "avg_weight": 0.5768675208091736, "std_delta_l": 0.0609835609793663, "std_weight": 0.08845651149749756, "weighted_loss": 0.08322089910507202 }, { "avg_delta_l": 0.012722203508019447, "avg_loss_unweighted": 0.13063447177410126, "avg_weight": 0.5690644979476929, "std_delta_l": 0.07035917788743973, "std_weight": 0.08486757427453995, "weighted_loss": 0.07816760987043381 }, { "avg_delta_l": 0.00596989830955863, "avg_loss_unweighted": 0.13177822530269623, "avg_weight": 0.568569540977478, "std_delta_l": 0.06312090158462524, "std_weight": 0.08433092385530472, "weighted_loss": 0.07772479206323624 }, { "avg_delta_l": 0.00981688778847456, "avg_loss_unweighted": 0.12785758078098297, "avg_weight": 0.5600061416625977, "std_delta_l": 0.059549298137426376, "std_weight": 0.07217484712600708, "weighted_loss": 0.07341104000806808 }, { "avg_delta_l": 0.0065369619987905025, "avg_loss_unweighted": 0.1385820209980011, "avg_weight": 0.5621060729026794, "std_delta_l": 0.06178991124033928, "std_weight": 0.08181367814540863, "weighted_loss": 0.08069512248039246 }, { "avg_delta_l": 0.008525742217898369, "avg_loss_unweighted": 0.1350378692150116, "avg_weight": 0.5748069882392883, "std_delta_l": 0.07679642736911774, "std_weight": 0.09360072761774063, "weighted_loss": 0.08111037313938141 }, { "avg_delta_l": 0.008719151839613914, "avg_loss_unweighted": 0.1292824000120163, "avg_weight": 0.5723801851272583, "std_delta_l": 0.0720829889178276, "std_weight": 0.10326037555932999, "weighted_loss": 0.07644803076982498 }, { "avg_delta_l": 0.0066354586742818356, "avg_loss_unweighted": 0.13557329773902893, "avg_weight": 0.5835235714912415, "std_delta_l": 0.08696206659078598, "std_weight": 0.11567459255456924, "weighted_loss": 0.08354784548282623 }, { "avg_delta_l": -0.018753305077552795, "avg_loss_unweighted": 0.1330774873495102, "avg_weight": 0.6019282937049866, "std_delta_l": 0.07183503359556198, "std_weight": 0.1307636797428131, "weighted_loss": 0.08446303755044937 }, { "avg_delta_l": 0.006782238837331533, "avg_loss_unweighted": 0.13110807538032532, "avg_weight": 0.5655631422996521, "std_delta_l": 0.060940198600292206, "std_weight": 0.07905177772045135, "weighted_loss": 0.07682555168867111 }, { "avg_delta_l": -0.000988472020253539, "avg_loss_unweighted": 0.13870735466480255, "avg_weight": 0.5804022550582886, "std_delta_l": 0.07227376103401184, "std_weight": 0.10589173436164856, "weighted_loss": 0.08431515842676163 }, { "avg_delta_l": 0.004876396618783474, "avg_loss_unweighted": 0.12177228182554245, "avg_weight": 0.5808682441711426, "std_delta_l": 0.07221581786870956, "std_weight": 0.09829026460647583, "weighted_loss": 0.07355652004480362 }, { "avg_delta_l": 0.018585093319416046, "avg_loss_unweighted": 0.12184585630893707, "avg_weight": 0.5631401538848877, "std_delta_l": 0.06388220191001892, "std_weight": 0.08266401290893555, "weighted_loss": 0.07147999107837677 }, { "avg_delta_l": 0.014540961012244225, "avg_loss_unweighted": 0.13797500729560852, "avg_weight": 0.5615429282188416, "std_delta_l": 0.07002323865890503, "std_weight": 0.08429968357086182, "weighted_loss": 0.07963180541992188 }, { "avg_delta_l": 0.0015568860108032823, "avg_loss_unweighted": 0.12474370747804642, "avg_weight": 0.5798891186714172, "std_delta_l": 0.06208339333534241, "std_weight": 0.08010952919721603, "weighted_loss": 0.07528399676084518 }, { "avg_delta_l": 0.012391048483550549, "avg_loss_unweighted": 0.13069051504135132, "avg_weight": 0.5726934671401978, "std_delta_l": 0.07224328815937042, "std_weight": 0.08914019167423248, "weighted_loss": 0.07788269221782684 }, { "avg_delta_l": -0.002128953579813242, "avg_loss_unweighted": 0.13063055276870728, "avg_weight": 0.5746902227401733, "std_delta_l": 0.062480952590703964, "std_weight": 0.08375661075115204, "weighted_loss": 0.07702283561229706 }, { "avg_delta_l": 0.004493320360779762, "avg_loss_unweighted": 0.128777414560318, "avg_weight": 0.5763220191001892, "std_delta_l": 0.06675784289836884, "std_weight": 0.08872049301862717, "weighted_loss": 0.07792685925960541 }, { "epoch": 7.939130434782609, "grad_norm": 0.723149185808678, "learning_rate": 7.564865410996696e-06, "loss": 0.1568, "step": 460 }, { "avg_delta_l": -0.004646904766559601, "avg_loss_unweighted": 0.1363130658864975, "avg_weight": 0.5733515620231628, "std_delta_l": 0.053284652531147, "std_weight": 0.0765598937869072, "weighted_loss": 0.08089803159236908 }, { "avg_delta_l": -0.020143095403909683, "avg_loss_unweighted": 0.14655645191669464, "avg_weight": 0.606909990310669, "std_delta_l": 0.05735275149345398, "std_weight": 0.09457167237997055, "weighted_loss": 0.09225644171237946 }, { "avg_delta_l": -0.007658139802515507, "avg_loss_unweighted": 0.13660556077957153, "avg_weight": 0.5931398272514343, "std_delta_l": 0.06568354368209839, "std_weight": 0.10343540459871292, "weighted_loss": 0.08554453402757645 }, { "avg_delta_l": -0.005182798020541668, "avg_loss_unweighted": 0.13743866980075836, "avg_weight": 0.580508828163147, "std_delta_l": 0.06355209648609161, "std_weight": 0.10014573484659195, "weighted_loss": 0.08323101699352264 }, { "avg_delta_l": -0.0023978701792657375, "avg_loss_unweighted": 0.13171952962875366, "avg_weight": 0.5882174968719482, "std_delta_l": 0.06779923290014267, "std_weight": 0.09528691321611404, "weighted_loss": 0.0804801657795906 }, { "avg_delta_l": -0.015745539218187332, "avg_loss_unweighted": 0.14179594814777374, "avg_weight": 0.6135311126708984, "std_delta_l": 0.07385563105344772, "std_weight": 0.11029622703790665, "weighted_loss": 0.09104671329259872 }, { "avg_delta_l": 0.005104186479002237, "avg_loss_unweighted": 0.11918879300355911, "avg_weight": 0.5856052041053772, "std_delta_l": 0.07830242067575455, "std_weight": 0.10369928181171417, "weighted_loss": 0.0745447650551796 }, { "avg_delta_l": 0.025906376540660858, "avg_loss_unweighted": 0.10417144000530243, "avg_weight": 0.5261543393135071, "std_delta_l": 0.041859716176986694, "std_weight": 0.04677007719874382, "weighted_loss": 0.05546689033508301 }, { "avg_delta_l": 0.019016368314623833, "avg_loss_unweighted": 0.11079923808574677, "avg_weight": 0.5427213907241821, "std_delta_l": 0.05067481845617294, "std_weight": 0.07006736099720001, "weighted_loss": 0.06158077344298363 }, { "avg_delta_l": 0.03226272389292717, "avg_loss_unweighted": 0.11581115424633026, "avg_weight": 0.5327638983726501, "std_delta_l": 0.057679858058691025, "std_weight": 0.05995725467801094, "weighted_loss": 0.06285297870635986 }, { "avg_delta_l": 0.030952945351600647, "avg_loss_unweighted": 0.10038268566131592, "avg_weight": 0.5153931379318237, "std_delta_l": 0.037735987454652786, "std_weight": 0.028792010620236397, "weighted_loss": 0.052391260862350464 }, { "avg_delta_l": 0.02829352207481861, "avg_loss_unweighted": 0.10858127474784851, "avg_weight": 0.5176575183868408, "std_delta_l": 0.04279370233416557, "std_weight": 0.03507028520107269, "weighted_loss": 0.057172127068042755 }, { "avg_delta_l": 0.016416046768426895, "avg_loss_unweighted": 0.1101507693529129, "avg_weight": 0.5324910879135132, "std_delta_l": 0.04100189357995987, "std_weight": 0.05750541761517525, "weighted_loss": 0.06025038659572601 }, { "avg_delta_l": 0.03000103496015072, "avg_loss_unweighted": 0.11258123815059662, "avg_weight": 0.5256080031394958, "std_delta_l": 0.050245676189661026, "std_weight": 0.04435226693749428, "weighted_loss": 0.05984283611178398 }, { "avg_delta_l": 0.027173589915037155, "avg_loss_unweighted": 0.10703837871551514, "avg_weight": 0.5351132154464722, "std_delta_l": 0.05175235867500305, "std_weight": 0.05254615098237991, "weighted_loss": 0.05878612399101257 }, { "avg_delta_l": 0.00807180441915989, "avg_loss_unweighted": 0.11106422543525696, "avg_weight": 0.5500223636627197, "std_delta_l": 0.05606765300035477, "std_weight": 0.07672249525785446, "weighted_loss": 0.06324896961450577 }, { "avg_delta_l": 0.01510828547179699, "avg_loss_unweighted": 0.10316958278417587, "avg_weight": 0.5426549315452576, "std_delta_l": 0.05376703664660454, "std_weight": 0.07544325292110443, "weighted_loss": 0.05818777158856392 }, { "avg_delta_l": 0.03146156296133995, "avg_loss_unweighted": 0.10399128496646881, "avg_weight": 0.535428524017334, "std_delta_l": 0.055573076009750366, "std_weight": 0.046264924108982086, "weighted_loss": 0.05716005340218544 }, { "avg_delta_l": 0.026351628825068474, "avg_loss_unweighted": 0.10661149770021439, "avg_weight": 0.5241749286651611, "std_delta_l": 0.048308759927749634, "std_weight": 0.03940889611840248, "weighted_loss": 0.05660098046064377 }, { "epoch": 8.104347826086956, "grad_norm": 0.6642128915353179, "learning_rate": 7.434568648766211e-06, "loss": 0.1292, "step": 470 }, { "avg_delta_l": 0.02613714709877968, "avg_loss_unweighted": 0.09991399198770523, "avg_weight": 0.5325031876564026, "std_delta_l": 0.051386408507823944, "std_weight": 0.04975869879126549, "weighted_loss": 0.05451743304729462 }, { "avg_delta_l": 0.01577538438141346, "avg_loss_unweighted": 0.10662169754505157, "avg_weight": 0.5410759449005127, "std_delta_l": 0.051187388598918915, "std_weight": 0.06730636209249496, "weighted_loss": 0.05950365588068962 }, { "avg_delta_l": 0.0261496901512146, "avg_loss_unweighted": 0.09384238719940186, "avg_weight": 0.5208449363708496, "std_delta_l": 0.036783043295145035, "std_weight": 0.03395130857825279, "weighted_loss": 0.04950984567403793 }, { "avg_delta_l": 0.010967548936605453, "avg_loss_unweighted": 0.11442078649997711, "avg_weight": 0.5538625717163086, "std_delta_l": 0.056343186646699905, "std_weight": 0.07891642302274704, "weighted_loss": 0.06523733586072922 }, { "avg_delta_l": 0.019917458295822144, "avg_loss_unweighted": 0.1148720234632492, "avg_weight": 0.5385156869888306, "std_delta_l": 0.04891646280884743, "std_weight": 0.06264837831258774, "weighted_loss": 0.0634184181690216 }, { "avg_delta_l": 0.02750198170542717, "avg_loss_unweighted": 0.108611561357975, "avg_weight": 0.5262150764465332, "std_delta_l": 0.043037138879299164, "std_weight": 0.03762253373861313, "weighted_loss": 0.058037206530570984 }, { "avg_delta_l": 0.02978595159947872, "avg_loss_unweighted": 0.09952159225940704, "avg_weight": 0.5186375379562378, "std_delta_l": 0.040633976459503174, "std_weight": 0.03500483185052872, "weighted_loss": 0.051993515342473984 }, { "avg_delta_l": 0.015848433598876, "avg_loss_unweighted": 0.10634656995534897, "avg_weight": 0.530707836151123, "std_delta_l": 0.039063964039087296, "std_weight": 0.048775240778923035, "weighted_loss": 0.05759900435805321 }, { "avg_delta_l": 0.03637208417057991, "avg_loss_unweighted": 0.10983305424451828, "avg_weight": 0.5191561579704285, "std_delta_l": 0.05105477571487427, "std_weight": 0.034213822335004807, "weighted_loss": 0.058082666248083115 }, { "avg_delta_l": 0.0369139201939106, "avg_loss_unweighted": 0.0930577740073204, "avg_weight": 0.5123628973960876, "std_delta_l": 0.03868775814771652, "std_weight": 0.02215145155787468, "weighted_loss": 0.047905001789331436 }, { "avg_delta_l": 0.027040500193834305, "avg_loss_unweighted": 0.10452274233102798, "avg_weight": 0.5255968570709229, "std_delta_l": 0.04400111734867096, "std_weight": 0.04566124454140663, "weighted_loss": 0.05564623698592186 }, { "avg_delta_l": 0.01986701786518097, "avg_loss_unweighted": 0.11743969470262527, "avg_weight": 0.5342820882797241, "std_delta_l": 0.04724254459142685, "std_weight": 0.055000048130750656, "weighted_loss": 0.06372372061014175 }, { "avg_delta_l": 0.02242649532854557, "avg_loss_unweighted": 0.10170163214206696, "avg_weight": 0.5308541655540466, "std_delta_l": 0.04595629498362541, "std_weight": 0.04921155422925949, "weighted_loss": 0.0547587051987648 }, { "avg_delta_l": 0.009202934801578522, "avg_loss_unweighted": 0.11251437664031982, "avg_weight": 0.5450397729873657, "std_delta_l": 0.04244726896286011, "std_weight": 0.05140022933483124, "weighted_loss": 0.06244286149740219 }, { "avg_delta_l": 0.02055710181593895, "avg_loss_unweighted": 0.11471162736415863, "avg_weight": 0.5337332487106323, "std_delta_l": 0.05027509108185768, "std_weight": 0.05270445719361305, "weighted_loss": 0.06134457513689995 }, { "avg_delta_l": 0.017419207841157913, "avg_loss_unweighted": 0.11222060024738312, "avg_weight": 0.5423530340194702, "std_delta_l": 0.05004812404513359, "std_weight": 0.06820376217365265, "weighted_loss": 0.06277360022068024 }, { "avg_delta_l": 0.027708038687705994, "avg_loss_unweighted": 0.10444091260433197, "avg_weight": 0.5284454226493835, "std_delta_l": 0.04462745785713196, "std_weight": 0.040507715195417404, "weighted_loss": 0.056049272418022156 }, { "avg_delta_l": 0.020696362480521202, "avg_loss_unweighted": 0.11433861404657364, "avg_weight": 0.5430089235305786, "std_delta_l": 0.05162699148058891, "std_weight": 0.0648370087146759, "weighted_loss": 0.06408233940601349 }, { "avg_delta_l": 0.021014045923948288, "avg_loss_unweighted": 0.0970311313867569, "avg_weight": 0.5415319800376892, "std_delta_l": 0.060776688158512115, "std_weight": 0.06458556652069092, "weighted_loss": 0.054348621517419815 }, { "avg_delta_l": 0.029200471937656403, "avg_loss_unweighted": 0.10568279027938843, "avg_weight": 0.5318065285682678, "std_delta_l": 0.049592383205890656, "std_weight": 0.05083288252353668, "weighted_loss": 0.057232730090618134 }, { "epoch": 8.278260869565218, "grad_norm": 0.6607949960044979, "learning_rate": 7.302067498724681e-06, "loss": 0.1158, "step": 480 }, { "avg_delta_l": 0.005277095828205347, "avg_loss_unweighted": 0.10760121047496796, "avg_weight": 0.5639588236808777, "std_delta_l": 0.06570078432559967, "std_weight": 0.09182160347700119, "weighted_loss": 0.06363138556480408 }, { "avg_delta_l": 0.030426550656557083, "avg_loss_unweighted": 0.10167864710092545, "avg_weight": 0.52894526720047, "std_delta_l": 0.05277223885059357, "std_weight": 0.050532016903162, "weighted_loss": 0.055524155497550964 }, { "avg_delta_l": 0.017804285511374474, "avg_loss_unweighted": 0.10044807940721512, "avg_weight": 0.538941502571106, "std_delta_l": 0.044845614582300186, "std_weight": 0.05911697447299957, "weighted_loss": 0.055346082895994186 }, { "avg_delta_l": 0.021055011078715324, "avg_loss_unweighted": 0.10263319313526154, "avg_weight": 0.5450600385665894, "std_delta_l": 0.05652375519275665, "std_weight": 0.06399551779031754, "weighted_loss": 0.05798722803592682 }, { "avg_delta_l": 0.015605387277901173, "avg_loss_unweighted": 0.11607402563095093, "avg_weight": 0.5373063683509827, "std_delta_l": 0.04613791033625603, "std_weight": 0.05478287115693092, "weighted_loss": 0.06341134756803513 }, { "avg_delta_l": 0.021238984540104866, "avg_loss_unweighted": 0.10136698186397552, "avg_weight": 0.5332139134407043, "std_delta_l": 0.043113358318805695, "std_weight": 0.0400947667658329, "weighted_loss": 0.05612239986658096 }, { "avg_delta_l": 0.02248922735452652, "avg_loss_unweighted": 0.09914685040712357, "avg_weight": 0.5206320285797119, "std_delta_l": 0.03574265167117119, "std_weight": 0.03110455349087715, "weighted_loss": 0.052010469138622284 }, { "avg_delta_l": 0.01106533594429493, "avg_loss_unweighted": 0.1115320548415184, "avg_weight": 0.5464885234832764, "std_delta_l": 0.05170585215091705, "std_weight": 0.07012388855218887, "weighted_loss": 0.06250286102294922 }, { "avg_delta_l": 0.029032794758677483, "avg_loss_unweighted": 0.1047649160027504, "avg_weight": 0.5247571468353271, "std_delta_l": 0.04729805886745453, "std_weight": 0.030604520812630653, "weighted_loss": 0.05525803938508034 }, { "avg_delta_l": 0.018616117537021637, "avg_loss_unweighted": 0.10908513516187668, "avg_weight": 0.539698600769043, "std_delta_l": 0.04916922003030777, "std_weight": 0.05229034274816513, "weighted_loss": 0.06066707894206047 }, { "avg_delta_l": 0.006769158877432346, "avg_loss_unweighted": 0.10145117342472076, "avg_weight": 0.5584323406219482, "std_delta_l": 0.0573156364262104, "std_weight": 0.07360821217298508, "weighted_loss": 0.05839146673679352 }, { "avg_delta_l": 0.03180341050028801, "avg_loss_unweighted": 0.0976879671216011, "avg_weight": 0.5311712026596069, "std_delta_l": 0.05608392879366875, "std_weight": 0.04816499352455139, "weighted_loss": 0.053263191133737564 }, { "avg_delta_l": 0.016082553192973137, "avg_loss_unweighted": 0.103159599006176, "avg_weight": 0.54901522397995, "std_delta_l": 0.04916202649474144, "std_weight": 0.06461367011070251, "weighted_loss": 0.05835481360554695 }, { "avg_delta_l": 0.02280580997467041, "avg_loss_unweighted": 0.10827659070491791, "avg_weight": 0.5368667840957642, "std_delta_l": 0.055788617581129074, "std_weight": 0.06244128197431564, "weighted_loss": 0.060036346316337585 }, { "avg_delta_l": 0.016235269606113434, "avg_loss_unweighted": 0.09949155151844025, "avg_weight": 0.5406641364097595, "std_delta_l": 0.049521710723638535, "std_weight": 0.06276795268058777, "weighted_loss": 0.055171795189380646 }, { "avg_delta_l": 0.012521803379058838, "avg_loss_unweighted": 0.10306117683649063, "avg_weight": 0.5602532625198364, "std_delta_l": 0.0638774111866951, "std_weight": 0.08476528525352478, "weighted_loss": 0.060837846249341965 }, { "avg_delta_l": 0.015898820012807846, "avg_loss_unweighted": 0.1025116890668869, "avg_weight": 0.5443226099014282, "std_delta_l": 0.0516357496380806, "std_weight": 0.06371165812015533, "weighted_loss": 0.058017369359731674 }, { "avg_delta_l": 0.008989071473479271, "avg_loss_unweighted": 0.10777247697114944, "avg_weight": 0.5620471835136414, "std_delta_l": 0.06188110262155533, "std_weight": 0.08690772950649261, "weighted_loss": 0.06382674723863602 }, { "avg_delta_l": 0.016914451494812965, "avg_loss_unweighted": 0.1092049777507782, "avg_weight": 0.5540239214897156, "std_delta_l": 0.06563359498977661, "std_weight": 0.08366365730762482, "weighted_loss": 0.06229786202311516 }, { "avg_delta_l": 0.0027798102237284184, "avg_loss_unweighted": 0.11158845573663712, "avg_weight": 0.56492018699646, "std_delta_l": 0.057350143790245056, "std_weight": 0.09263268113136292, "weighted_loss": 0.06535539776086807 }, { "epoch": 8.452173913043477, "grad_norm": 0.6431600567298024, "learning_rate": 7.167481934453561e-06, "loss": 0.1178, "step": 490 }, { "avg_delta_l": 0.012493741698563099, "avg_loss_unweighted": 0.10952743887901306, "avg_weight": 0.5473500490188599, "std_delta_l": 0.04835996776819229, "std_weight": 0.06479023396968842, "weighted_loss": 0.06193779036402702 }, { "avg_delta_l": 0.008448466658592224, "avg_loss_unweighted": 0.10262709856033325, "avg_weight": 0.5547577738761902, "std_delta_l": 0.05548461899161339, "std_weight": 0.07507392019033432, "weighted_loss": 0.05877489969134331 }, { "avg_delta_l": 0.022598963230848312, "avg_loss_unweighted": 0.09881946444511414, "avg_weight": 0.5440632104873657, "std_delta_l": 0.05819965526461601, "std_weight": 0.061332814395427704, "weighted_loss": 0.055694203823804855 }, { "avg_delta_l": 0.013052679598331451, "avg_loss_unweighted": 0.10596594214439392, "avg_weight": 0.5511512756347656, "std_delta_l": 0.05177270993590355, "std_weight": 0.06681574881076813, "weighted_loss": 0.060061000287532806 }, { "avg_delta_l": 0.0012976002180948853, "avg_loss_unweighted": 0.11006573587656021, "avg_weight": 0.563387930393219, "std_delta_l": 0.05099286884069443, "std_weight": 0.08078858256340027, "weighted_loss": 0.06453125923871994 }, { "avg_delta_l": 0.007376436144113541, "avg_loss_unweighted": 0.11193180084228516, "avg_weight": 0.5505145192146301, "std_delta_l": 0.04720921814441681, "std_weight": 0.07504479587078094, "weighted_loss": 0.06365156918764114 }, { "avg_delta_l": 0.005033821798861027, "avg_loss_unweighted": 0.10700105130672455, "avg_weight": 0.5609780550003052, "std_delta_l": 0.06110982596874237, "std_weight": 0.08044539391994476, "weighted_loss": 0.06250443309545517 }, { "avg_delta_l": 0.0201334860175848, "avg_loss_unweighted": 0.096547931432724, "avg_weight": 0.5406399369239807, "std_delta_l": 0.056584905833005905, "std_weight": 0.06854868680238724, "weighted_loss": 0.05457431450486183 }, { "avg_delta_l": 0.009726200252771378, "avg_loss_unweighted": 0.10887693613767624, "avg_weight": 0.5505093336105347, "std_delta_l": 0.05129369720816612, "std_weight": 0.07185594737529755, "weighted_loss": 0.06109890341758728 }, { "avg_delta_l": 0.017573516815900803, "avg_loss_unweighted": 0.10260926932096481, "avg_weight": 0.5485047698020935, "std_delta_l": 0.05582386627793312, "std_weight": 0.07198566943407059, "weighted_loss": 0.05793709307909012 }, { "avg_delta_l": 0.010262702591717243, "avg_loss_unweighted": 0.11426982283592224, "avg_weight": 0.5654537081718445, "std_delta_l": 0.07103844732046127, "std_weight": 0.10231982916593552, "weighted_loss": 0.06794648617506027 }, { "avg_delta_l": 0.0026531261391937733, "avg_loss_unweighted": 0.11356547474861145, "avg_weight": 0.5660914778709412, "std_delta_l": 0.06321118772029877, "std_weight": 0.08882057666778564, "weighted_loss": 0.06676148623228073 }, { "avg_delta_l": 0.02021791785955429, "avg_loss_unweighted": 0.09799940139055252, "avg_weight": 0.5427203178405762, "std_delta_l": 0.05478411540389061, "std_weight": 0.061617154628038406, "weighted_loss": 0.055746179074048996 }, { "avg_delta_l": 0.014585420489311218, "avg_loss_unweighted": 0.10334665328264236, "avg_weight": 0.5501746535301208, "std_delta_l": 0.05449051037430763, "std_weight": 0.07029673457145691, "weighted_loss": 0.058462582528591156 }, { "avg_delta_l": 0.02650522254407406, "avg_loss_unweighted": 0.10454942286014557, "avg_weight": 0.5481464862823486, "std_delta_l": 0.06224631890654564, "std_weight": 0.06505776196718216, "weighted_loss": 0.060001276433467865 }, { "avg_delta_l": 0.015545647591352463, "avg_loss_unweighted": 0.09783819317817688, "avg_weight": 0.540031373500824, "std_delta_l": 0.050969623029232025, "std_weight": 0.0631251260638237, "weighted_loss": 0.0545809306204319 }, { "avg_delta_l": 0.008661996573209763, "avg_loss_unweighted": 0.10312268882989883, "avg_weight": 0.5534354448318481, "std_delta_l": 0.05444706976413727, "std_weight": 0.0785006731748581, "weighted_loss": 0.059157975018024445 }, { "avg_delta_l": 0.004423323553055525, "avg_loss_unweighted": 0.10987992584705353, "avg_weight": 0.5668765902519226, "std_delta_l": 0.053237784653902054, "std_weight": 0.07835618406534195, "weighted_loss": 0.06460445374250412 }, { "avg_delta_l": 0.017415430396795273, "avg_loss_unweighted": 0.1074891984462738, "avg_weight": 0.5336142778396606, "std_delta_l": 0.043860696256160736, "std_weight": 0.04744657501578331, "weighted_loss": 0.058372389525175095 }, { "avg_delta_l": 0.0059975930489599705, "avg_loss_unweighted": 0.10951381176710129, "avg_weight": 0.545983076095581, "std_delta_l": 0.044426329433918, "std_weight": 0.06655324250459671, "weighted_loss": 0.06090306490659714 }, { "epoch": 8.626086956521739, "grad_norm": 0.592105057042269, "learning_rate": 7.030933816873707e-06, "loss": 0.1207, "step": 500 }, { "avg_delta_l": 0.012844279408454895, "avg_loss_unweighted": 0.10690643638372421, "avg_weight": 0.5461716651916504, "std_delta_l": 0.0560435950756073, "std_weight": 0.06659366935491562, "weighted_loss": 0.0608908012509346 }, { "avg_delta_l": 0.009754398837685585, "avg_loss_unweighted": 0.10097967088222504, "avg_weight": 0.566287636756897, "std_delta_l": 0.06653331220149994, "std_weight": 0.07519745081663132, "weighted_loss": 0.06045887619256973 }, { "avg_delta_l": 0.014600603841245174, "avg_loss_unweighted": 0.11721890419721603, "avg_weight": 0.5543450117111206, "std_delta_l": 0.061837926506996155, "std_weight": 0.08400524407625198, "weighted_loss": 0.06747566908597946 }, { "avg_delta_l": 0.0075129675678908825, "avg_loss_unweighted": 0.10615946352481842, "avg_weight": 0.5537748336791992, "std_delta_l": 0.05482267215847969, "std_weight": 0.07501225918531418, "weighted_loss": 0.06063096225261688 }, { "avg_delta_l": -0.007514193654060364, "avg_loss_unweighted": 0.1112699955701828, "avg_weight": 0.580481231212616, "std_delta_l": 0.06536708027124405, "std_weight": 0.10516997426748276, "weighted_loss": 0.06691691279411316 }, { "avg_delta_l": 0.0168556347489357, "avg_loss_unweighted": 0.09750679135322571, "avg_weight": 0.5413818955421448, "std_delta_l": 0.048840947449207306, "std_weight": 0.06419365853071213, "weighted_loss": 0.054067231714725494 }, { "avg_delta_l": -0.003367471508681774, "avg_loss_unweighted": 0.10254273563623428, "avg_weight": 0.5702336430549622, "std_delta_l": 0.060085929930210114, "std_weight": 0.09767594188451767, "weighted_loss": 0.06212526187300682 }, { "avg_delta_l": 0.010372920893132687, "avg_loss_unweighted": 0.09684871882200241, "avg_weight": 0.5501589775085449, "std_delta_l": 0.04746868833899498, "std_weight": 0.05587010458111763, "weighted_loss": 0.055299028754234314 }, { "avg_delta_l": 0.016399599611759186, "avg_loss_unweighted": 0.0999491885304451, "avg_weight": 0.5532742738723755, "std_delta_l": 0.06651055067777634, "std_weight": 0.07694096863269806, "weighted_loss": 0.05726739764213562 }, { "avg_delta_l": 0.019335784018039703, "avg_loss_unweighted": 0.08911387622356415, "avg_weight": 0.5397242903709412, "std_delta_l": 0.05101621150970459, "std_weight": 0.05902494117617607, "weighted_loss": 0.04944375902414322 }, { "avg_delta_l": 0.0048985593020915985, "avg_loss_unweighted": 0.11903451383113861, "avg_weight": 0.5677234530448914, "std_delta_l": 0.06275421380996704, "std_weight": 0.08298827707767487, "weighted_loss": 0.06927964091300964 }, { "avg_delta_l": 0.025324983522295952, "avg_loss_unweighted": 0.1007220447063446, "avg_weight": 0.5504086017608643, "std_delta_l": 0.06842927634716034, "std_weight": 0.07736994326114655, "weighted_loss": 0.0581076517701149 }, { "avg_delta_l": 0.009076839312911034, "avg_loss_unweighted": 0.109233058989048, "avg_weight": 0.5607020854949951, "std_delta_l": 0.06381268054246902, "std_weight": 0.08794239163398743, "weighted_loss": 0.0642637386918068 }, { "avg_delta_l": 0.0075384932570159435, "avg_loss_unweighted": 0.10989794135093689, "avg_weight": 0.5551047921180725, "std_delta_l": 0.05471495911478996, "std_weight": 0.07028069347143173, "weighted_loss": 0.06252752244472504 }, { "avg_delta_l": 0.022937988862395287, "avg_loss_unweighted": 0.1050325483083725, "avg_weight": 0.5430395007133484, "std_delta_l": 0.059893302619457245, "std_weight": 0.06247628480195999, "weighted_loss": 0.05891264230012894 }, { "avg_delta_l": 0.014127999544143677, "avg_loss_unweighted": 0.1014852225780487, "avg_weight": 0.5495647192001343, "std_delta_l": 0.05789681524038315, "std_weight": 0.0644424557685852, "weighted_loss": 0.05799735337495804 }, { "avg_delta_l": -0.005645072087645531, "avg_loss_unweighted": 0.12401757389307022, "avg_weight": 0.5654849410057068, "std_delta_l": 0.04884786158800125, "std_weight": 0.07627609372138977, "weighted_loss": 0.07157677412033081 }, { "avg_delta_l": 0.006109137553721666, "avg_loss_unweighted": 0.10200498253107071, "avg_weight": 0.5679891705513, "std_delta_l": 0.05465430021286011, "std_weight": 0.06678922474384308, "weighted_loss": 0.060965970158576965 }, { "avg_delta_l": -0.0019652137998491526, "avg_loss_unweighted": 0.11103388667106628, "avg_weight": 0.5797684788703918, "std_delta_l": 0.06889446079730988, "std_weight": 0.10394863039255142, "weighted_loss": 0.06725437194108963 }, { "avg_delta_l": 0.00969067495316267, "avg_loss_unweighted": 0.10832221060991287, "avg_weight": 0.5613095164299011, "std_delta_l": 0.06196340173482895, "std_weight": 0.0798027366399765, "weighted_loss": 0.0636993944644928 }, { "epoch": 8.8, "grad_norm": 0.705616472556198, "learning_rate": 6.892546783906016e-06, "loss": 0.1229, "step": 510 }, { "avg_delta_l": 0.015144460834562778, "avg_loss_unweighted": 0.1055234894156456, "avg_weight": 0.547854483127594, "std_delta_l": 0.05074913799762726, "std_weight": 0.0595063678920269, "weighted_loss": 0.059047993272542953 }, { "avg_delta_l": -0.005408495664596558, "avg_loss_unweighted": 0.1127101257443428, "avg_weight": 0.5927286148071289, "std_delta_l": 0.07896792888641357, "std_weight": 0.12189454585313797, "weighted_loss": 0.06977537274360657 }, { "avg_delta_l": 0.0010108845308423042, "avg_loss_unweighted": 0.11666584014892578, "avg_weight": 0.5730714797973633, "std_delta_l": 0.057827770709991455, "std_weight": 0.08750525861978531, "weighted_loss": 0.06923724710941315 }, { "avg_delta_l": 0.007864834740757942, "avg_loss_unweighted": 0.10922183096408844, "avg_weight": 0.5615221858024597, "std_delta_l": 0.061133354902267456, "std_weight": 0.07879532128572464, "weighted_loss": 0.0635087639093399 }, { "avg_delta_l": 0.016732580959796906, "avg_loss_unweighted": 0.09477865695953369, "avg_weight": 0.5478761792182922, "std_delta_l": 0.05490582436323166, "std_weight": 0.06824366748332977, "weighted_loss": 0.05396794527769089 }, { "avg_delta_l": -0.005057917442172766, "avg_loss_unweighted": 0.114251047372818, "avg_weight": 0.5841420292854309, "std_delta_l": 0.06500865519046783, "std_weight": 0.08412826061248779, "weighted_loss": 0.06876272708177567 }, { "avg_delta_l": -0.003522012149915099, "avg_loss_unweighted": 0.1131959781050682, "avg_weight": 0.5804586410522461, "std_delta_l": 0.06307736039161682, "std_weight": 0.09352336078882217, "weighted_loss": 0.06812454760074615 }, { "avg_delta_l": -0.00837009772658348, "avg_loss_unweighted": 0.10317391157150269, "avg_weight": 0.5776991248130798, "std_delta_l": 0.05821358785033226, "std_weight": 0.10193924605846405, "weighted_loss": 0.06229637190699577 }, { "avg_delta_l": -0.01175160426646471, "avg_loss_unweighted": 0.11560182273387909, "avg_weight": 0.5967325568199158, "std_delta_l": 0.07439064979553223, "std_weight": 0.11765210330486298, "weighted_loss": 0.07234717905521393 }, { "avg_delta_l": 0.008856149390339851, "avg_loss_unweighted": 0.118619404733181, "avg_weight": 0.5473310351371765, "std_delta_l": 0.039862871170043945, "std_weight": 0.04814973101019859, "weighted_loss": 0.06622663140296936 }, { "avg_delta_l": -0.009051203727722168, "avg_loss_unweighted": 0.10901804268360138, "avg_weight": 0.5785717368125916, "std_delta_l": 0.054311905056238174, "std_weight": 0.0815148651599884, "weighted_loss": 0.06552140414714813 }, { "avg_delta_l": -0.012758714146912098, "avg_loss_unweighted": 0.11108863353729248, "avg_weight": 0.5927575826644897, "std_delta_l": 0.05462966114282608, "std_weight": 0.08675895631313324, "weighted_loss": 0.06877075135707855 }, { "avg_delta_l": 0.0007528618443757296, "avg_loss_unweighted": 0.11637550592422485, "avg_weight": 0.5852590799331665, "std_delta_l": 0.06770770251750946, "std_weight": 0.09253741800785065, "weighted_loss": 0.07198107242584229 }, { "avg_delta_l": -0.005545625928789377, "avg_loss_unweighted": 0.11620800197124481, "avg_weight": 0.5826970338821411, "std_delta_l": 0.05884307995438576, "std_weight": 0.09277018904685974, "weighted_loss": 0.07114720344543457 }, { "avg_delta_l": -0.005083635915070772, "avg_loss_unweighted": 0.11191399395465851, "avg_weight": 0.5718579292297363, "std_delta_l": 0.057223789393901825, "std_weight": 0.08280523866415024, "weighted_loss": 0.06615369766950607 }, { "avg_delta_l": -0.00962553359568119, "avg_loss_unweighted": 0.1094784215092659, "avg_weight": 0.5998408198356628, "std_delta_l": 0.06784698367118835, "std_weight": 0.10513878613710403, "weighted_loss": 0.06969059258699417 }, { "avg_delta_l": 0.0046001942828297615, "avg_loss_unweighted": 0.10833622515201569, "avg_weight": 0.5724513530731201, "std_delta_l": 0.06389766931533813, "std_weight": 0.0916552022099495, "weighted_loss": 0.06507716327905655 }, { "avg_delta_l": 0.004541160073131323, "avg_loss_unweighted": 0.10802045464515686, "avg_weight": 0.5860664248466492, "std_delta_l": 0.08171242475509644, "std_weight": 0.11119448393583298, "weighted_loss": 0.0684533566236496 }, { "avg_delta_l": -0.004588104784488678, "avg_loss_unweighted": 0.1012357696890831, "avg_weight": 0.5856305956840515, "std_delta_l": 0.07053564488887787, "std_weight": 0.10620692372322083, "weighted_loss": 0.0636243224143982 }, { "avg_delta_l": 0.01597176119685173, "avg_loss_unweighted": 0.10588943958282471, "avg_weight": 0.5507378578186035, "std_delta_l": 0.05975613370537758, "std_weight": 0.08392518758773804, "weighted_loss": 0.060130052268505096 }, { "epoch": 8.97391304347826, "grad_norm": 0.6136487279430954, "learning_rate": 6.7524461385230846e-06, "loss": 0.1324, "step": 520 }, { "avg_delta_l": 0.005335857160389423, "avg_loss_unweighted": 0.10668095201253891, "avg_weight": 0.5765553712844849, "std_delta_l": 0.06449318677186966, "std_weight": 0.07921883463859558, "weighted_loss": 0.06431356817483902 }, { "avg_delta_l": -0.017368897795677185, "avg_loss_unweighted": 0.11028249561786652, "avg_weight": 0.5927152633666992, "std_delta_l": 0.057627443224191666, "std_weight": 0.09470059722661972, "weighted_loss": 0.06824316829442978 }, { "avg_delta_l": 0.01128460094332695, "avg_loss_unweighted": 0.08967310935258865, "avg_weight": 0.565856397151947, "std_delta_l": 0.06827791035175323, "std_weight": 0.09348277747631073, "weighted_loss": 0.0531991682946682 }, { "avg_delta_l": 0.0253958310931921, "avg_loss_unweighted": 0.08819077908992767, "avg_weight": 0.517549991607666, "std_delta_l": 0.037399858236312866, "std_weight": 0.030023494735360146, "weighted_loss": 0.0461479052901268 }, { "avg_delta_l": 0.015210087411105633, "avg_loss_unweighted": 0.08469503372907639, "avg_weight": 0.5330420136451721, "std_delta_l": 0.041385747492313385, "std_weight": 0.04831070452928543, "weighted_loss": 0.04588181897997856 }, { "avg_delta_l": 0.01893678866326809, "avg_loss_unweighted": 0.07875081151723862, "avg_weight": 0.5318410992622375, "std_delta_l": 0.04546179622411728, "std_weight": 0.05156443268060684, "weighted_loss": 0.04322347790002823 }, { "avg_delta_l": 0.021993888542056084, "avg_loss_unweighted": 0.08981793373823166, "avg_weight": 0.5306181311607361, "std_delta_l": 0.04100843518972397, "std_weight": 0.041806772351264954, "weighted_loss": 0.0485912561416626 }, { "avg_delta_l": 0.03058657981455326, "avg_loss_unweighted": 0.08194926381111145, "avg_weight": 0.517204225063324, "std_delta_l": 0.041621845215559006, "std_weight": 0.027990413829684258, "weighted_loss": 0.04286421090364456 }, { "avg_delta_l": 0.02850784733891487, "avg_loss_unweighted": 0.08204053342342377, "avg_weight": 0.5227566957473755, "std_delta_l": 0.045312248170375824, "std_weight": 0.035494450479745865, "weighted_loss": 0.04364064335823059 }, { "avg_delta_l": 0.02600795589387417, "avg_loss_unweighted": 0.08002739399671555, "avg_weight": 0.5235485434532166, "std_delta_l": 0.04788128286600113, "std_weight": 0.04411466419696808, "weighted_loss": 0.04286886006593704 }, { "avg_delta_l": 0.029771897941827774, "avg_loss_unweighted": 0.08195489645004272, "avg_weight": 0.521639883518219, "std_delta_l": 0.04291977360844612, "std_weight": 0.039687663316726685, "weighted_loss": 0.04353821277618408 }, { "avg_delta_l": 0.02599574252963066, "avg_loss_unweighted": 0.08422308415174484, "avg_weight": 0.5183553695678711, "std_delta_l": 0.040119774639606476, "std_weight": 0.036230262368917465, "weighted_loss": 0.04434572905302048 }, { "avg_delta_l": 0.025754651054739952, "avg_loss_unweighted": 0.0825733095407486, "avg_weight": 0.530188262462616, "std_delta_l": 0.04996802285313606, "std_weight": 0.05232091248035431, "weighted_loss": 0.04488641023635864 }, { "avg_delta_l": 0.01686127856373787, "avg_loss_unweighted": 0.08556459844112396, "avg_weight": 0.5381121635437012, "std_delta_l": 0.04819086194038391, "std_weight": 0.057571060955524445, "weighted_loss": 0.047700054943561554 }, { "avg_delta_l": 0.020423153415322304, "avg_loss_unweighted": 0.09180381149053574, "avg_weight": 0.5272087454795837, "std_delta_l": 0.0410551093518734, "std_weight": 0.04152179881930351, "weighted_loss": 0.049364712089300156 }, { "avg_delta_l": 0.009356115013360977, "avg_loss_unweighted": 0.09379153698682785, "avg_weight": 0.5380805730819702, "std_delta_l": 0.03931356221437454, "std_weight": 0.056723810732364655, "weighted_loss": 0.051816266030073166 }, { "avg_delta_l": 0.019230106845498085, "avg_loss_unweighted": 0.07801052182912827, "avg_weight": 0.5224606394767761, "std_delta_l": 0.039568670094013214, "std_weight": 0.04416850581765175, "weighted_loss": 0.041545189917087555 }, { "avg_delta_l": 0.017219003289937973, "avg_loss_unweighted": 0.08867256343364716, "avg_weight": 0.5356301665306091, "std_delta_l": 0.04019680619239807, "std_weight": 0.05163895711302757, "weighted_loss": 0.04827241227030754 }, { "avg_delta_l": 0.0110124247148633, "avg_loss_unweighted": 0.08512429893016815, "avg_weight": 0.5314207077026367, "std_delta_l": 0.03929907828569412, "std_weight": 0.05741368979215622, "weighted_loss": 0.046260666102170944 }, { "epoch": 9.139130434782608, "grad_norm": 0.7641764661171118, "learning_rate": 6.6107587352932405e-06, "loss": 0.0917, "step": 530 }, { "avg_delta_l": 0.02183925360441208, "avg_loss_unweighted": 0.08453415334224701, "avg_weight": 0.5315126180648804, "std_delta_l": 0.04889364913105965, "std_weight": 0.0502706803381443, "weighted_loss": 0.04578400403261185 }, { "avg_delta_l": 0.008292334154248238, "avg_loss_unweighted": 0.08827546238899231, "avg_weight": 0.5422505736351013, "std_delta_l": 0.044996920973062515, "std_weight": 0.07173395156860352, "weighted_loss": 0.04927694797515869 }, { "avg_delta_l": 0.009965654462575912, "avg_loss_unweighted": 0.09027837216854095, "avg_weight": 0.5457941293716431, "std_delta_l": 0.04713093489408493, "std_weight": 0.0676967054605484, "weighted_loss": 0.05065762996673584 }, { "avg_delta_l": 0.00633829552680254, "avg_loss_unweighted": 0.08439864218235016, "avg_weight": 0.5403677821159363, "std_delta_l": 0.04379893094301224, "std_weight": 0.06856479495763779, "weighted_loss": 0.04688851535320282 }, { "avg_delta_l": 0.03416384384036064, "avg_loss_unweighted": 0.08251139521598816, "avg_weight": 0.5161672234535217, "std_delta_l": 0.04350375756621361, "std_weight": 0.022770851850509644, "weighted_loss": 0.04315406084060669 }, { "avg_delta_l": 0.028292221948504448, "avg_loss_unweighted": 0.07223822176456451, "avg_weight": 0.5253164768218994, "std_delta_l": 0.04314127936959267, "std_weight": 0.03978348150849342, "weighted_loss": 0.03899306058883667 }, { "avg_delta_l": 0.021487513557076454, "avg_loss_unweighted": 0.07333521544933319, "avg_weight": 0.5371414422988892, "std_delta_l": 0.0510980598628521, "std_weight": 0.0639457032084465, "weighted_loss": 0.040623873472213745 }, { "avg_delta_l": 0.027923235669732094, "avg_loss_unweighted": 0.08879859745502472, "avg_weight": 0.5195102691650391, "std_delta_l": 0.03707806020975113, "std_weight": 0.03168517351150513, "weighted_loss": 0.047176774591207504 }, { "avg_delta_l": 0.015866022557020187, "avg_loss_unweighted": 0.0810466930270195, "avg_weight": 0.5325359106063843, "std_delta_l": 0.038480184972286224, "std_weight": 0.04790189117193222, "weighted_loss": 0.04420355334877968 }, { "avg_delta_l": 0.019563551992177963, "avg_loss_unweighted": 0.08223767578601837, "avg_weight": 0.5248475670814514, "std_delta_l": 0.040135595947504044, "std_weight": 0.046495165675878525, "weighted_loss": 0.04408121109008789 }, { "avg_delta_l": 0.013594230636954308, "avg_loss_unweighted": 0.08416575938463211, "avg_weight": 0.5343593955039978, "std_delta_l": 0.03804599493741989, "std_weight": 0.04608644172549248, "weighted_loss": 0.04605646803975105 }, { "avg_delta_l": 0.004723014775663614, "avg_loss_unweighted": 0.08129919320344925, "avg_weight": 0.5558114051818848, "std_delta_l": 0.04794133082032204, "std_weight": 0.06716788560152054, "weighted_loss": 0.04687910154461861 }, { "avg_delta_l": 0.015085525810718536, "avg_loss_unweighted": 0.08928245306015015, "avg_weight": 0.5410894155502319, "std_delta_l": 0.052359361201524734, "std_weight": 0.06728009134531021, "weighted_loss": 0.04952572286128998 }, { "avg_delta_l": 0.013814542442560196, "avg_loss_unweighted": 0.07506068795919418, "avg_weight": 0.5326502919197083, "std_delta_l": 0.04134597256779671, "std_weight": 0.05242453143000603, "weighted_loss": 0.041315220296382904 }, { "avg_delta_l": 0.008204782381653786, "avg_loss_unweighted": 0.08792898803949356, "avg_weight": 0.5387768745422363, "std_delta_l": 0.04155803471803665, "std_weight": 0.051464613527059555, "weighted_loss": 0.048861417919397354 }, { "avg_delta_l": 0.013901821337640285, "avg_loss_unweighted": 0.07954610884189606, "avg_weight": 0.5362894535064697, "std_delta_l": 0.043746430426836014, "std_weight": 0.05361246317625046, "weighted_loss": 0.043910831212997437 }, { "avg_delta_l": 0.006157890427857637, "avg_loss_unweighted": 0.09334202855825424, "avg_weight": 0.5419827699661255, "std_delta_l": 0.03509754687547684, "std_weight": 0.054472148418426514, "weighted_loss": 0.051840994507074356 }, { "avg_delta_l": 0.024805665016174316, "avg_loss_unweighted": 0.08242087066173553, "avg_weight": 0.5261103510856628, "std_delta_l": 0.04140631854534149, "std_weight": 0.03862279653549194, "weighted_loss": 0.043927352875471115 }, { "avg_delta_l": 0.02136543206870556, "avg_loss_unweighted": 0.07856277376413345, "avg_weight": 0.5249791145324707, "std_delta_l": 0.03788817301392555, "std_weight": 0.03880779817700386, "weighted_loss": 0.04196818172931671 }, { "avg_delta_l": 0.01769058220088482, "avg_loss_unweighted": 0.09493730962276459, "avg_weight": 0.531137228012085, "std_delta_l": 0.03816878795623779, "std_weight": 0.04405374452471733, "weighted_loss": 0.0512712299823761 }, { "epoch": 9.31304347826087, "grad_norm": 0.6059155890983613, "learning_rate": 6.467612865519674e-06, "loss": 0.0916, "step": 540 }, { "avg_delta_l": 0.02156386338174343, "avg_loss_unweighted": 0.0745440274477005, "avg_weight": 0.5200955271720886, "std_delta_l": 0.036491312086582184, "std_weight": 0.038354791700839996, "weighted_loss": 0.04004085808992386 }, { "avg_delta_l": 0.01984935998916626, "avg_loss_unweighted": 0.08457843214273453, "avg_weight": 0.5318530201911926, "std_delta_l": 0.03803325444459915, "std_weight": 0.041178032755851746, "weighted_loss": 0.045800819993019104 }, { "avg_delta_l": 0.01375300157815218, "avg_loss_unweighted": 0.09278729557991028, "avg_weight": 0.5320667624473572, "std_delta_l": 0.04195341467857361, "std_weight": 0.050580572336912155, "weighted_loss": 0.05007154121994972 }, { "avg_delta_l": 0.011616001836955547, "avg_loss_unweighted": 0.09339705109596252, "avg_weight": 0.5418538451194763, "std_delta_l": 0.04824518784880638, "std_weight": 0.0639280378818512, "weighted_loss": 0.05215911939740181 }, { "avg_delta_l": 0.01735970564186573, "avg_loss_unweighted": 0.08273827284574509, "avg_weight": 0.5412936210632324, "std_delta_l": 0.049060575664043427, "std_weight": 0.05405652895569801, "weighted_loss": 0.046452876180410385 }, { "avg_delta_l": 0.009667402133345604, "avg_loss_unweighted": 0.08811604231595993, "avg_weight": 0.5465773344039917, "std_delta_l": 0.05302037298679352, "std_weight": 0.07479970157146454, "weighted_loss": 0.0499078743159771 }, { "avg_delta_l": 0.014908842742443085, "avg_loss_unweighted": 0.08198970556259155, "avg_weight": 0.5395896434783936, "std_delta_l": 0.04999513179063797, "std_weight": 0.0634065717458725, "weighted_loss": 0.04492286592721939 }, { "avg_delta_l": 0.021628480404615402, "avg_loss_unweighted": 0.09060505032539368, "avg_weight": 0.527351975440979, "std_delta_l": 0.044424138963222504, "std_weight": 0.04644264653325081, "weighted_loss": 0.04881599545478821 }, { "avg_delta_l": 0.012085041031241417, "avg_loss_unweighted": 0.08047611266374588, "avg_weight": 0.5489947199821472, "std_delta_l": 0.05197037011384964, "std_weight": 0.07716509699821472, "weighted_loss": 0.046460289508104324 }, { "avg_delta_l": 0.007291609421372414, "avg_loss_unweighted": 0.08492796868085861, "avg_weight": 0.5546178817749023, "std_delta_l": 0.05839838460087776, "std_weight": 0.0808948427438736, "weighted_loss": 0.048967648297548294 }, { "avg_delta_l": 0.0163735318928957, "avg_loss_unweighted": 0.08303641527891159, "avg_weight": 0.5285415649414062, "std_delta_l": 0.037636324763298035, "std_weight": 0.044137321412563324, "weighted_loss": 0.04471222683787346 }, { "avg_delta_l": 0.004406149033457041, "avg_loss_unweighted": 0.08329188823699951, "avg_weight": 0.5525892376899719, "std_delta_l": 0.04684410244226456, "std_weight": 0.07561257481575012, "weighted_loss": 0.04773736372590065 }, { "avg_delta_l": 0.020427025854587555, "avg_loss_unweighted": 0.07748419046401978, "avg_weight": 0.5330678224563599, "std_delta_l": 0.04397083446383476, "std_weight": 0.04574192687869072, "weighted_loss": 0.04225274920463562 }, { "avg_delta_l": 0.02311832085251808, "avg_loss_unweighted": 0.06623522937297821, "avg_weight": 0.5285032987594604, "std_delta_l": 0.04263090342283249, "std_weight": 0.038053952157497406, "weighted_loss": 0.03624342754483223 }, { "avg_delta_l": 0.004181768279522657, "avg_loss_unweighted": 0.0902210921049118, "avg_weight": 0.5390327572822571, "std_delta_l": 0.03193054720759392, "std_weight": 0.05300727114081383, "weighted_loss": 0.04960748553276062 }, { "avg_delta_l": 0.005691122729331255, "avg_loss_unweighted": 0.082848459482193, "avg_weight": 0.5500393509864807, "std_delta_l": 0.046412769705057144, "std_weight": 0.0616341307759285, "weighted_loss": 0.04736945405602455 }, { "avg_delta_l": 0.020136695355176926, "avg_loss_unweighted": 0.08232220262289047, "avg_weight": 0.5394342541694641, "std_delta_l": 0.05081569030880928, "std_weight": 0.061020225286483765, "weighted_loss": 0.046417854726314545 }, { "avg_delta_l": 0.012351857498288155, "avg_loss_unweighted": 0.09043461084365845, "avg_weight": 0.5361850261688232, "std_delta_l": 0.04222536459565163, "std_weight": 0.057371534407138824, "weighted_loss": 0.04896989464759827 }, { "avg_delta_l": 0.00929528009146452, "avg_loss_unweighted": 0.0771692544221878, "avg_weight": 0.5390903949737549, "std_delta_l": 0.03961912542581558, "std_weight": 0.06089688464999199, "weighted_loss": 0.04305991530418396 }, { "avg_delta_l": 0.00804714486002922, "avg_loss_unweighted": 0.08676691353321075, "avg_weight": 0.5512707829475403, "std_delta_l": 0.049984272569417953, "std_weight": 0.06922957301139832, "weighted_loss": 0.04966989532113075 }, { "epoch": 9.486956521739131, "grad_norm": 0.7035054332923465, "learning_rate": 6.32313814107868e-06, "loss": 0.093, "step": 550 }, { "avg_delta_l": 0.011692570522427559, "avg_loss_unweighted": 0.07914339005947113, "avg_weight": 0.540679395198822, "std_delta_l": 0.04415711387991905, "std_weight": 0.05836290493607521, "weighted_loss": 0.04396119341254234 }, { "avg_delta_l": 0.015603574924170971, "avg_loss_unweighted": 0.07544084638357162, "avg_weight": 0.5331030488014221, "std_delta_l": 0.03883860260248184, "std_weight": 0.04557381942868233, "weighted_loss": 0.041112348437309265 }, { "avg_delta_l": 0.018167298287153244, "avg_loss_unweighted": 0.08428922295570374, "avg_weight": 0.5360559225082397, "std_delta_l": 0.04932402819395065, "std_weight": 0.05865289270877838, "weighted_loss": 0.04636614769697189 }, { "avg_delta_l": 0.01238963846117258, "avg_loss_unweighted": 0.0842864066362381, "avg_weight": 0.5399760007858276, "std_delta_l": 0.04505188390612602, "std_weight": 0.06274157762527466, "weighted_loss": 0.04656552895903587 }, { "avg_delta_l": 0.00877835787832737, "avg_loss_unweighted": 0.0861474871635437, "avg_weight": 0.5393024682998657, "std_delta_l": 0.04261504486203194, "std_weight": 0.057016417384147644, "weighted_loss": 0.04805148392915726 }, { "avg_delta_l": 0.016073420643806458, "avg_loss_unweighted": 0.08245839178562164, "avg_weight": 0.5451945066452026, "std_delta_l": 0.05254994332790375, "std_weight": 0.0691026821732521, "weighted_loss": 0.046073198318481445 }, { "avg_delta_l": 0.003433634527027607, "avg_loss_unweighted": 0.09001617133617401, "avg_weight": 0.5544470548629761, "std_delta_l": 0.04677356407046318, "std_weight": 0.06692527234554291, "weighted_loss": 0.05203390121459961 }, { "avg_delta_l": 0.010725511237978935, "avg_loss_unweighted": 0.09273859858512878, "avg_weight": 0.5418833494186401, "std_delta_l": 0.050009045749902725, "std_weight": 0.05758429691195488, "weighted_loss": 0.051701620221138 }, { "avg_delta_l": 0.013254349119961262, "avg_loss_unweighted": 0.08722295612096786, "avg_weight": 0.5524238348007202, "std_delta_l": 0.05627359077334404, "std_weight": 0.07171400636434555, "weighted_loss": 0.050598010420799255 }, { "avg_delta_l": 0.010979536920785904, "avg_loss_unweighted": 0.08251048624515533, "avg_weight": 0.5496277213096619, "std_delta_l": 0.05121053382754326, "std_weight": 0.06391491740942001, "weighted_loss": 0.04685767740011215 }, { "avg_delta_l": 0.002005180111154914, "avg_loss_unweighted": 0.08018626272678375, "avg_weight": 0.5610002875328064, "std_delta_l": 0.05736418068408966, "std_weight": 0.08467881381511688, "weighted_loss": 0.047486282885074615 }, { "avg_delta_l": 0.010900970548391342, "avg_loss_unweighted": 0.0825335830450058, "avg_weight": 0.549465000629425, "std_delta_l": 0.05210011079907417, "std_weight": 0.07681397348642349, "weighted_loss": 0.04664338380098343 }, { "avg_delta_l": 0.01686767302453518, "avg_loss_unweighted": 0.07544206827878952, "avg_weight": 0.5272397398948669, "std_delta_l": 0.03925127908587456, "std_weight": 0.04647969454526901, "weighted_loss": 0.04075726866722107 }, { "avg_delta_l": 0.00972592830657959, "avg_loss_unweighted": 0.07680200785398483, "avg_weight": 0.5422964096069336, "std_delta_l": 0.04618331044912338, "std_weight": 0.05200966075062752, "weighted_loss": 0.04369719326496124 }, { "avg_delta_l": 0.00327484542503953, "avg_loss_unweighted": 0.08592011034488678, "avg_weight": 0.5639821887016296, "std_delta_l": 0.04945230856537819, "std_weight": 0.07410396635532379, "weighted_loss": 0.0506988950073719 }, { "avg_delta_l": 0.0014571149367839098, "avg_loss_unweighted": 0.08844152837991714, "avg_weight": 0.5621309876441956, "std_delta_l": 0.054015763103961945, "std_weight": 0.07851574569940567, "weighted_loss": 0.051808375865221024 }, { "avg_delta_l": 0.00472179614007473, "avg_loss_unweighted": 0.07788997143507004, "avg_weight": 0.5442419052124023, "std_delta_l": 0.04235897958278656, "std_weight": 0.056203410029411316, "weighted_loss": 0.04406461492180824 }, { "avg_delta_l": 0.011263448745012283, "avg_loss_unweighted": 0.08634059131145477, "avg_weight": 0.5632666349411011, "std_delta_l": 0.06664766371250153, "std_weight": 0.09169033169746399, "weighted_loss": 0.05152661353349686 }, { "avg_delta_l": 0.0028027035295963287, "avg_loss_unweighted": 0.08171559870243073, "avg_weight": 0.5574131011962891, "std_delta_l": 0.04923441261053085, "std_weight": 0.07821977883577347, "weighted_loss": 0.048098817467689514 }, { "avg_delta_l": 0.0201252531260252, "avg_loss_unweighted": 0.08322246372699738, "avg_weight": 0.5350236892700195, "std_delta_l": 0.050410617142915726, "std_weight": 0.0569470189511776, "weighted_loss": 0.04575127363204956 }, { "epoch": 9.660869565217391, "grad_norm": 0.7638750955431987, "learning_rate": 6.177465377062172e-06, "loss": 0.0944, "step": 560 }, { "avg_delta_l": 0.0020824079401791096, "avg_loss_unweighted": 0.08999411761760712, "avg_weight": 0.5639985203742981, "std_delta_l": 0.05394819378852844, "std_weight": 0.07461148500442505, "weighted_loss": 0.05254688858985901 }, { "avg_delta_l": -0.0011333335423842072, "avg_loss_unweighted": 0.08951479196548462, "avg_weight": 0.565507173538208, "std_delta_l": 0.05427645146846771, "std_weight": 0.08653926849365234, "weighted_loss": 0.05244399234652519 }, { "avg_delta_l": 0.00059937231708318, "avg_loss_unweighted": 0.08882881700992584, "avg_weight": 0.5553449392318726, "std_delta_l": 0.04769161343574524, "std_weight": 0.07831891626119614, "weighted_loss": 0.05087536200881004 }, { "avg_delta_l": 0.009165734052658081, "avg_loss_unweighted": 0.08482030034065247, "avg_weight": 0.5524939894676208, "std_delta_l": 0.04916052892804146, "std_weight": 0.0730656087398529, "weighted_loss": 0.0487963892519474 }, { "avg_delta_l": 0.007333409506827593, "avg_loss_unweighted": 0.07883772999048233, "avg_weight": 0.5623536109924316, "std_delta_l": 0.06137434020638466, "std_weight": 0.08668006211519241, "weighted_loss": 0.04680969938635826 }, { "avg_delta_l": 0.00933444406837225, "avg_loss_unweighted": 0.08116931468248367, "avg_weight": 0.5469037890434265, "std_delta_l": 0.04710247367620468, "std_weight": 0.057872362434864044, "weighted_loss": 0.04619552940130234 }, { "avg_delta_l": 0.003483540378510952, "avg_loss_unweighted": 0.09182902425527573, "avg_weight": 0.5666583776473999, "std_delta_l": 0.057097405195236206, "std_weight": 0.07021423429250717, "weighted_loss": 0.05390021577477455 }, { "avg_delta_l": 0.0013524164678528905, "avg_loss_unweighted": 0.08692125976085663, "avg_weight": 0.5648996829986572, "std_delta_l": 0.05288635566830635, "std_weight": 0.08339719474315643, "weighted_loss": 0.05097651109099388 }, { "avg_delta_l": 0.00393750611692667, "avg_loss_unweighted": 0.082228884100914, "avg_weight": 0.564878523349762, "std_delta_l": 0.05632823705673218, "std_weight": 0.08792747557163239, "weighted_loss": 0.04943356290459633 }, { "avg_delta_l": 0.002997864503413439, "avg_loss_unweighted": 0.0871538296341896, "avg_weight": 0.5644306540489197, "std_delta_l": 0.062295958399772644, "std_weight": 0.08617158234119415, "weighted_loss": 0.05109180510044098 }, { "avg_delta_l": -0.00718872481957078, "avg_loss_unweighted": 0.08971035480499268, "avg_weight": 0.5731561183929443, "std_delta_l": 0.05764696002006531, "std_weight": 0.0951545238494873, "weighted_loss": 0.05456646531820297 }, { "avg_delta_l": -0.0004845971707254648, "avg_loss_unweighted": 0.09745237231254578, "avg_weight": 0.5629565715789795, "std_delta_l": 0.05368192493915558, "std_weight": 0.07896684110164642, "weighted_loss": 0.056813616305589676 }, { "avg_delta_l": 0.010667761787772179, "avg_loss_unweighted": 0.08431548625230789, "avg_weight": 0.5541734099388123, "std_delta_l": 0.05863320454955101, "std_weight": 0.0788404643535614, "weighted_loss": 0.048113081604242325 }, { "avg_delta_l": 0.005301058758050203, "avg_loss_unweighted": 0.07718197256326675, "avg_weight": 0.5502583384513855, "std_delta_l": 0.050626423209905624, "std_weight": 0.06960835307836533, "weighted_loss": 0.044758982956409454 }, { "avg_delta_l": -0.008856648579239845, "avg_loss_unweighted": 0.09872335195541382, "avg_weight": 0.5744836926460266, "std_delta_l": 0.0517377071082592, "std_weight": 0.08756883442401886, "weighted_loss": 0.05874023586511612 }, { "avg_delta_l": 0.000810385390650481, "avg_loss_unweighted": 0.08732689172029495, "avg_weight": 0.5640671253204346, "std_delta_l": 0.06083456426858902, "std_weight": 0.09407523274421692, "weighted_loss": 0.05115716531872749 }, { "avg_delta_l": -0.004083442967385054, "avg_loss_unweighted": 0.08311212807893753, "avg_weight": 0.5682602524757385, "std_delta_l": 0.04826097935438156, "std_weight": 0.07023174315690994, "weighted_loss": 0.04949364811182022 }, { "avg_delta_l": -0.0019258285174146295, "avg_loss_unweighted": 0.08490420877933502, "avg_weight": 0.5752331018447876, "std_delta_l": 0.05905672907829285, "std_weight": 0.09248019754886627, "weighted_loss": 0.05105742812156677 }, { "avg_delta_l": 0.010833201929926872, "avg_loss_unweighted": 0.08494430780410767, "avg_weight": 0.5399271845817566, "std_delta_l": 0.04354296624660492, "std_weight": 0.05015340447425842, "weighted_loss": 0.04667195677757263 }, { "avg_delta_l": 0.009427196346223354, "avg_loss_unweighted": 0.0801270380616188, "avg_weight": 0.5628063678741455, "std_delta_l": 0.06270185858011246, "std_weight": 0.09371653199195862, "weighted_loss": 0.047399625182151794 }, { "epoch": 9.834782608695653, "grad_norm": 0.6620271678982141, "learning_rate": 6.0307264733307515e-06, "loss": 0.1012, "step": 570 }, { "avg_delta_l": -0.00017113424837589264, "avg_loss_unweighted": 0.09311716258525848, "avg_weight": 0.5624925494194031, "std_delta_l": 0.05373990163207054, "std_weight": 0.08490046858787537, "weighted_loss": 0.05477401614189148 }, { "avg_delta_l": -0.005336233414709568, "avg_loss_unweighted": 0.0894586369395256, "avg_weight": 0.5721485018730164, "std_delta_l": 0.0487947091460228, "std_weight": 0.07870040088891983, "weighted_loss": 0.05312050133943558 }, { "avg_delta_l": -0.0010700757848098874, "avg_loss_unweighted": 0.08787353336811066, "avg_weight": 0.5618016719818115, "std_delta_l": 0.04816461727023125, "std_weight": 0.07028006762266159, "weighted_loss": 0.05115432292222977 }, { "avg_delta_l": 0.011681774631142616, "avg_loss_unweighted": 0.08218056708574295, "avg_weight": 0.5509645342826843, "std_delta_l": 0.05388343706727028, "std_weight": 0.06855251640081406, "weighted_loss": 0.04738369584083557 }, { "avg_delta_l": -0.0022163288667798042, "avg_loss_unweighted": 0.0919605940580368, "avg_weight": 0.5743510127067566, "std_delta_l": 0.05509687587618828, "std_weight": 0.08393533527851105, "weighted_loss": 0.05447205528616905 }, { "avg_delta_l": 0.0178055576980114, "avg_loss_unweighted": 0.07465256005525589, "avg_weight": 0.548029899597168, "std_delta_l": 0.059056103229522705, "std_weight": 0.07135653495788574, "weighted_loss": 0.042868465185165405 }, { "avg_delta_l": -0.0023435268085449934, "avg_loss_unweighted": 0.08142168074846268, "avg_weight": 0.5659521222114563, "std_delta_l": 0.05469248816370964, "std_weight": 0.0878194198012352, "weighted_loss": 0.04856811463832855 }, { "avg_delta_l": 0.0003364701406098902, "avg_loss_unweighted": 0.08409477770328522, "avg_weight": 0.5611106753349304, "std_delta_l": 0.04979272559285164, "std_weight": 0.0767386257648468, "weighted_loss": 0.0495513379573822 }, { "avg_delta_l": 0.009723538532853127, "avg_loss_unweighted": 0.08389035612344742, "avg_weight": 0.5492232441902161, "std_delta_l": 0.04937572404742241, "std_weight": 0.061540618538856506, "weighted_loss": 0.04786999151110649 }, { "avg_delta_l": 0.01954292505979538, "avg_loss_unweighted": 0.0757463127374649, "avg_weight": 0.5458321571350098, "std_delta_l": 0.04512707144021988, "std_weight": 0.055258858948946, "weighted_loss": 0.043499384075403214 }, { "avg_delta_l": 0.003657961031422019, "avg_loss_unweighted": 0.08164060860872269, "avg_weight": 0.5613288879394531, "std_delta_l": 0.05164892226457596, "std_weight": 0.07313168793916702, "weighted_loss": 0.048090025782585144 }, { "avg_delta_l": 0.009257052093744278, "avg_loss_unweighted": 0.08915787190198898, "avg_weight": 0.5474226474761963, "std_delta_l": 0.04763542488217354, "std_weight": 0.05397815257310867, "weighted_loss": 0.05004313588142395 }, { "avg_delta_l": 0.0027532025706022978, "avg_loss_unweighted": 0.08145111799240112, "avg_weight": 0.5634316802024841, "std_delta_l": 0.05841534957289696, "std_weight": 0.08399779349565506, "weighted_loss": 0.04732339456677437 }, { "avg_delta_l": -0.005385754629969597, "avg_loss_unweighted": 0.08723422884941101, "avg_weight": 0.5712921619415283, "std_delta_l": 0.053639624267816544, "std_weight": 0.0891052782535553, "weighted_loss": 0.05270707607269287 }, { "avg_delta_l": -0.01993630826473236, "avg_loss_unweighted": 0.09778721630573273, "avg_weight": 0.5958477854728699, "std_delta_l": 0.05536423623561859, "std_weight": 0.10342542827129364, "weighted_loss": 0.060894373804330826 }, { "avg_delta_l": 0.01066503394395113, "avg_loss_unweighted": 0.08439057320356369, "avg_weight": 0.5566055178642273, "std_delta_l": 0.05024814233183861, "std_weight": 0.07140558212995529, "weighted_loss": 0.04916739836335182 }, { "avg_delta_l": 0.0028206047136336565, "avg_loss_unweighted": 0.09010228514671326, "avg_weight": 0.5607198476791382, "std_delta_l": 0.05634751915931702, "std_weight": 0.08157698810100555, "weighted_loss": 0.05196559429168701 }, { "avg_delta_l": -0.0021971706300973892, "avg_loss_unweighted": 0.08663301914930344, "avg_weight": 0.5683881044387817, "std_delta_l": 0.04574907198548317, "std_weight": 0.07263443619012833, "weighted_loss": 0.05086599290370941 }, { "avg_delta_l": 0.018080268055200577, "avg_loss_unweighted": 0.07032749801874161, "avg_weight": 0.5416663289070129, "std_delta_l": 0.05088859051465988, "std_weight": 0.055872052907943726, "weighted_loss": 0.03936392813920975 }, { "epoch": 10.0, "grad_norm": 0.6267130842604085, "learning_rate": 5.883054295084571e-06, "loss": 0.0944, "step": 580 }, { "avg_delta_l": 0.016807086765766144, "avg_loss_unweighted": 0.06558269262313843, "avg_weight": 0.5201230049133301, "std_delta_l": 0.03526248037815094, "std_weight": 0.035074081271886826, "weighted_loss": 0.03491979092359543 }, { "avg_delta_l": 0.01832474209368229, "avg_loss_unweighted": 0.0694476068019867, "avg_weight": 0.5136216878890991, "std_delta_l": 0.029134085401892662, "std_weight": 0.02035786770284176, "weighted_loss": 0.036018431186676025 }, { "avg_delta_l": 0.020673735067248344, "avg_loss_unweighted": 0.07893439382314682, "avg_weight": 0.5165756344795227, "std_delta_l": 0.031569432467222214, "std_weight": 0.029445094987750053, "weighted_loss": 0.04120403155684471 }, { "avg_delta_l": 0.014047855511307716, "avg_loss_unweighted": 0.07519639283418655, "avg_weight": 0.5295692682266235, "std_delta_l": 0.038484346121549606, "std_weight": 0.04557404667139053, "weighted_loss": 0.040422093123197556 }, { "avg_delta_l": 0.017657818272709846, "avg_loss_unweighted": 0.06440947949886322, "avg_weight": 0.5247446298599243, "std_delta_l": 0.036790166050195694, "std_weight": 0.037783823907375336, "weighted_loss": 0.03437308222055435 }, { "avg_delta_l": 0.01333535835146904, "avg_loss_unweighted": 0.06556713581085205, "avg_weight": 0.5247958898544312, "std_delta_l": 0.03606730327010155, "std_weight": 0.04635235667228699, "weighted_loss": 0.03498955816030502 }, { "avg_delta_l": 0.014836479909718037, "avg_loss_unweighted": 0.07529614120721817, "avg_weight": 0.5327016115188599, "std_delta_l": 0.04036189988255501, "std_weight": 0.04351015016436577, "weighted_loss": 0.04089447110891342 }, { "avg_delta_l": 0.020123116672039032, "avg_loss_unweighted": 0.06521996855735779, "avg_weight": 0.5220968723297119, "std_delta_l": 0.03308133780956268, "std_weight": 0.032171446830034256, "weighted_loss": 0.034508511424064636 }, { "avg_delta_l": 0.008945929817855358, "avg_loss_unweighted": 0.06689951568841934, "avg_weight": 0.5298780202865601, "std_delta_l": 0.035785842686891556, "std_weight": 0.04617026448249817, "weighted_loss": 0.03621860221028328 }, { "avg_delta_l": 0.017994167283177376, "avg_loss_unweighted": 0.07777883112430573, "avg_weight": 0.5224330425262451, "std_delta_l": 0.03558211401104927, "std_weight": 0.03842150792479515, "weighted_loss": 0.041316207498311996 }, { "avg_delta_l": 0.017618518322706223, "avg_loss_unweighted": 0.06564079225063324, "avg_weight": 0.5271771550178528, "std_delta_l": 0.04077047482132912, "std_weight": 0.04349778592586517, "weighted_loss": 0.035123057663440704 }, { "avg_delta_l": 0.016367843374609947, "avg_loss_unweighted": 0.060268014669418335, "avg_weight": 0.5258272290229797, "std_delta_l": 0.039250969886779785, "std_weight": 0.04000506177544594, "weighted_loss": 0.03247292712330818 }, { "avg_delta_l": 0.01607709750533104, "avg_loss_unweighted": 0.06340013444423676, "avg_weight": 0.5219940543174744, "std_delta_l": 0.032391130924224854, "std_weight": 0.038509100675582886, "weighted_loss": 0.03399866074323654 }, { "avg_delta_l": 0.01799680106341839, "avg_loss_unweighted": 0.07050348818302155, "avg_weight": 0.5281854867935181, "std_delta_l": 0.040819402784109116, "std_weight": 0.04744291678071022, "weighted_loss": 0.037998344749212265 }, { "avg_delta_l": 0.013997378759086132, "avg_loss_unweighted": 0.0689224824309349, "avg_weight": 0.5263499021530151, "std_delta_l": 0.03430943191051483, "std_weight": 0.04732177034020424, "weighted_loss": 0.03712007403373718 }, { "avg_delta_l": 0.017476357519626617, "avg_loss_unweighted": 0.06065811216831207, "avg_weight": 0.5227909684181213, "std_delta_l": 0.037046872079372406, "std_weight": 0.04154864698648453, "weighted_loss": 0.03234528750181198 }, { "avg_delta_l": 0.023001858964562416, "avg_loss_unweighted": 0.07389293611049652, "avg_weight": 0.5165374279022217, "std_delta_l": 0.029054805636405945, "std_weight": 0.02958439290523529, "weighted_loss": 0.03872251510620117 }, { "avg_delta_l": 0.02252296917140484, "avg_loss_unweighted": 0.057202763855457306, "avg_weight": 0.5123565196990967, "std_delta_l": 0.029249077662825584, "std_weight": 0.021253075450658798, "weighted_loss": 0.029764337465167046 }, { "avg_delta_l": 0.024909881874918938, "avg_loss_unweighted": 0.06118376553058624, "avg_weight": 0.5207327604293823, "std_delta_l": 0.042630624026060104, "std_weight": 0.04057292267680168, "weighted_loss": 0.03280564025044441 }, { "avg_delta_l": 0.006793235894292593, "avg_loss_unweighted": 0.07006844878196716, "avg_weight": 0.5346009135246277, "std_delta_l": 0.03316054120659828, "std_weight": 0.049812640994787216, "weighted_loss": 0.038396913558244705 }, { "epoch": 10.173913043478262, "grad_norm": 0.6747597008401585, "learning_rate": 5.734582552560106e-06, "loss": 0.0724, "step": 590 }, { "avg_delta_l": 0.013944393955171108, "avg_loss_unweighted": 0.061697088181972504, "avg_weight": 0.5254374742507935, "std_delta_l": 0.03432772681117058, "std_weight": 0.040683284401893616, "weighted_loss": 0.03313107788562775 }, { "avg_delta_l": 0.010021369904279709, "avg_loss_unweighted": 0.07543474435806274, "avg_weight": 0.5398399829864502, "std_delta_l": 0.04413553699851036, "std_weight": 0.05764329433441162, "weighted_loss": 0.04155721515417099 }, { "avg_delta_l": 0.013209784403443336, "avg_loss_unweighted": 0.06241987645626068, "avg_weight": 0.5375171899795532, "std_delta_l": 0.04638161510229111, "std_weight": 0.062016841024160385, "weighted_loss": 0.034725021570920944 }, { "avg_delta_l": 0.016576610505580902, "avg_loss_unweighted": 0.06698550283908844, "avg_weight": 0.5285496115684509, "std_delta_l": 0.038276560604572296, "std_weight": 0.0380595438182354, "weighted_loss": 0.03599284216761589 }, { "avg_delta_l": 0.009973547421395779, "avg_loss_unweighted": 0.060179829597473145, "avg_weight": 0.5402183532714844, "std_delta_l": 0.04118189215660095, "std_weight": 0.06321239471435547, "weighted_loss": 0.03392276167869568 }, { "avg_delta_l": 0.014867632649838924, "avg_loss_unweighted": 0.06722507625818253, "avg_weight": 0.524055004119873, "std_delta_l": 0.03201014921069145, "std_weight": 0.040064942091703415, "weighted_loss": 0.03586259484291077 }, { "avg_delta_l": 0.013723748736083508, "avg_loss_unweighted": 0.061149269342422485, "avg_weight": 0.5338373780250549, "std_delta_l": 0.04352448880672455, "std_weight": 0.057857122272253036, "weighted_loss": 0.03419215604662895 }, { "avg_delta_l": 0.006991925183683634, "avg_loss_unweighted": 0.06881242990493774, "avg_weight": 0.5381345152854919, "std_delta_l": 0.03510720282793045, "std_weight": 0.04960964620113373, "weighted_loss": 0.03812834993004799 }, { "avg_delta_l": 0.010083270259201527, "avg_loss_unweighted": 0.06121239811182022, "avg_weight": 0.5286891460418701, "std_delta_l": 0.03097088448703289, "std_weight": 0.034740936011075974, "weighted_loss": 0.03305651247501373 }, { "avg_delta_l": 0.016599688678979874, "avg_loss_unweighted": 0.06346245855093002, "avg_weight": 0.5293533205986023, "std_delta_l": 0.03878484666347504, "std_weight": 0.0462752990424633, "weighted_loss": 0.03482852876186371 }, { "avg_delta_l": 0.011661180295050144, "avg_loss_unweighted": 0.06575634330511093, "avg_weight": 0.5346124172210693, "std_delta_l": 0.04109009727835655, "std_weight": 0.0570855438709259, "weighted_loss": 0.03623023256659508 }, { "avg_delta_l": 0.01345827430486679, "avg_loss_unweighted": 0.06714235991239548, "avg_weight": 0.5261178016662598, "std_delta_l": 0.03611855208873749, "std_weight": 0.04376118257641792, "weighted_loss": 0.03633679449558258 }, { "avg_delta_l": 0.007569198962301016, "avg_loss_unweighted": 0.058699361979961395, "avg_weight": 0.5330764651298523, "std_delta_l": 0.03203088417649269, "std_weight": 0.04330751672387123, "weighted_loss": 0.032294269651174545 }, { "avg_delta_l": 0.012201890349388123, "avg_loss_unweighted": 0.06858482211828232, "avg_weight": 0.5368186831474304, "std_delta_l": 0.04587734118103981, "std_weight": 0.058359693735837936, "weighted_loss": 0.03793890029191971 }, { "avg_delta_l": 0.022628888487815857, "avg_loss_unweighted": 0.06251322478055954, "avg_weight": 0.5197912454605103, "std_delta_l": 0.0384029820561409, "std_weight": 0.03563535958528519, "weighted_loss": 0.03313160687685013 }, { "avg_delta_l": 0.008317769505083561, "avg_loss_unweighted": 0.06573457270860672, "avg_weight": 0.5385390520095825, "std_delta_l": 0.041152410209178925, "std_weight": 0.0540250688791275, "weighted_loss": 0.036683619022369385 }, { "avg_delta_l": 0.007463357876986265, "avg_loss_unweighted": 0.07164335250854492, "avg_weight": 0.532650351524353, "std_delta_l": 0.0352364256978035, "std_weight": 0.04854043945670128, "weighted_loss": 0.03914765641093254 }, { "avg_delta_l": 0.02531963586807251, "avg_loss_unweighted": 0.05540401488542557, "avg_weight": 0.5210925936698914, "std_delta_l": 0.03827197104692459, "std_weight": 0.03724642097949982, "weighted_loss": 0.029701700434088707 }, { "avg_delta_l": 0.009594045579433441, "avg_loss_unweighted": 0.06297408044338226, "avg_weight": 0.5325763821601868, "std_delta_l": 0.03650980815291405, "std_weight": 0.054520219564437866, "weighted_loss": 0.0343826562166214 }, { "avg_delta_l": 0.015760919079184532, "avg_loss_unweighted": 0.06391960382461548, "avg_weight": 0.5317400097846985, "std_delta_l": 0.044969283044338226, "std_weight": 0.048384979367256165, "weighted_loss": 0.03485340252518654 }, { "epoch": 10.347826086956522, "grad_norm": 0.5320004625359102, "learning_rate": 5.585445679961823e-06, "loss": 0.0706, "step": 600 }, { "avg_delta_l": 0.018418962135910988, "avg_loss_unweighted": 0.056631073355674744, "avg_weight": 0.5351970195770264, "std_delta_l": 0.0402151420712471, "std_weight": 0.04656436666846275, "weighted_loss": 0.03166523203253746 }, { "avg_delta_l": 0.013657609932124615, "avg_loss_unweighted": 0.06287956237792969, "avg_weight": 0.5354273319244385, "std_delta_l": 0.03977881371974945, "std_weight": 0.04678346589207649, "weighted_loss": 0.03487031161785126 }, { "avg_delta_l": 0.007652763742953539, "avg_loss_unweighted": 0.07594814151525497, "avg_weight": 0.5327420234680176, "std_delta_l": 0.03373913839459419, "std_weight": 0.05079316720366478, "weighted_loss": 0.04122190177440643 }, { "avg_delta_l": 0.013710039667785168, "avg_loss_unweighted": 0.0700787827372551, "avg_weight": 0.5343188047409058, "std_delta_l": 0.041553739458322525, "std_weight": 0.04899151250720024, "weighted_loss": 0.03866865113377571 }, { "avg_delta_l": 0.016933633014559746, "avg_loss_unweighted": 0.06561964750289917, "avg_weight": 0.5366678237915039, "std_delta_l": 0.04045623168349266, "std_weight": 0.05164317414164543, "weighted_loss": 0.035969894379377365 }, { "avg_delta_l": 0.013973608613014221, "avg_loss_unweighted": 0.06273528188467026, "avg_weight": 0.5327205061912537, "std_delta_l": 0.041811805218458176, "std_weight": 0.05149148032069206, "weighted_loss": 0.03401336073875427 }, { "avg_delta_l": 0.0033495549578219652, "avg_loss_unweighted": 0.07409600168466568, "avg_weight": 0.5488234758377075, "std_delta_l": 0.04488194361329079, "std_weight": 0.07050330191850662, "weighted_loss": 0.042140766978263855 }, { "avg_delta_l": 0.009438362903892994, "avg_loss_unweighted": 0.059037867933511734, "avg_weight": 0.5403093695640564, "std_delta_l": 0.040282804518938065, "std_weight": 0.0521196573972702, "weighted_loss": 0.033279843628406525 }, { "avg_delta_l": 0.013054458424448967, "avg_loss_unweighted": 0.06978677213191986, "avg_weight": 0.5300679206848145, "std_delta_l": 0.037749528884887695, "std_weight": 0.047906264662742615, "weighted_loss": 0.03843202814459801 }, { "avg_delta_l": 0.009316567331552505, "avg_loss_unweighted": 0.06620702892541885, "avg_weight": 0.546759843826294, "std_delta_l": 0.04739273712038994, "std_weight": 0.06081128492951393, "weighted_loss": 0.037644870579242706 }, { "avg_delta_l": 0.013255716301500797, "avg_loss_unweighted": 0.0663873702287674, "avg_weight": 0.5389761924743652, "std_delta_l": 0.041200533509254456, "std_weight": 0.05419500172138214, "weighted_loss": 0.03732170909643173 }, { "avg_delta_l": 0.01755528338253498, "avg_loss_unweighted": 0.06272532045841217, "avg_weight": 0.5253925919532776, "std_delta_l": 0.03415292501449585, "std_weight": 0.034576334059238434, "weighted_loss": 0.034010790288448334 }, { "avg_delta_l": 0.009670239873230457, "avg_loss_unweighted": 0.06259559839963913, "avg_weight": 0.5365437865257263, "std_delta_l": 0.04052728787064552, "std_weight": 0.055979449301958084, "weighted_loss": 0.034719593822956085 }, { "avg_delta_l": 0.007218827493488789, "avg_loss_unweighted": 0.06767813861370087, "avg_weight": 0.539009153842926, "std_delta_l": 0.040344759821891785, "std_weight": 0.05886457860469818, "weighted_loss": 0.03755221515893936 }, { "avg_delta_l": 0.009375249966979027, "avg_loss_unweighted": 0.06526003032922745, "avg_weight": 0.5409405827522278, "std_delta_l": 0.04098362848162651, "std_weight": 0.05419657751917839, "weighted_loss": 0.03665543347597122 }, { "avg_delta_l": 0.0014346921816468239, "avg_loss_unweighted": 0.06710261106491089, "avg_weight": 0.5504595041275024, "std_delta_l": 0.045340247452259064, "std_weight": 0.06264561414718628, "weighted_loss": 0.03826579451560974 }, { "avg_delta_l": 0.013018961995840073, "avg_loss_unweighted": 0.06859132647514343, "avg_weight": 0.5359261631965637, "std_delta_l": 0.04261242598295212, "std_weight": 0.051489681005477905, "weighted_loss": 0.038452982902526855 }, { "avg_delta_l": 0.014532983303070068, "avg_loss_unweighted": 0.06551213562488556, "avg_weight": 0.5358619093894958, "std_delta_l": 0.04521937295794487, "std_weight": 0.05576605722308159, "weighted_loss": 0.036444805562496185 }, { "avg_delta_l": 0.006186896935105324, "avg_loss_unweighted": 0.0569646917283535, "avg_weight": 0.5448025465011597, "std_delta_l": 0.04271532967686653, "std_weight": 0.06499142944812775, "weighted_loss": 0.032424844801425934 }, { "avg_delta_l": 0.02163092792034149, "avg_loss_unweighted": 0.06007801741361618, "avg_weight": 0.5277074575424194, "std_delta_l": 0.04265893995761871, "std_weight": 0.0512990839779377, "weighted_loss": 0.03272765129804611 }, { "epoch": 10.521739130434783, "grad_norm": 0.5513773786282195, "learning_rate": 5.435778713738292e-06, "loss": 0.0726, "step": 610 }, { "avg_delta_l": 0.015770114958286285, "avg_loss_unweighted": 0.0504627525806427, "avg_weight": 0.5280014872550964, "std_delta_l": 0.039113979786634445, "std_weight": 0.04258501157164574, "weighted_loss": 0.027354663237929344 }, { "avg_delta_l": 0.01559419184923172, "avg_loss_unweighted": 0.06289180368185043, "avg_weight": 0.5233316421508789, "std_delta_l": 0.03535398840904236, "std_weight": 0.04156753420829773, "weighted_loss": 0.033711664378643036 }, { "avg_delta_l": 0.002400403842329979, "avg_loss_unweighted": 0.07577086240053177, "avg_weight": 0.5430686473846436, "std_delta_l": 0.035621192306280136, "std_weight": 0.0574408620595932, "weighted_loss": 0.04217609018087387 }, { "avg_delta_l": 0.005454920697957277, "avg_loss_unweighted": 0.0721297562122345, "avg_weight": 0.5409220457077026, "std_delta_l": 0.04176996648311615, "std_weight": 0.06763514131307602, "weighted_loss": 0.039870601147413254 }, { "avg_delta_l": 0.0063527971506118774, "avg_loss_unweighted": 0.06448116153478622, "avg_weight": 0.5471367835998535, "std_delta_l": 0.04309193417429924, "std_weight": 0.06895579397678375, "weighted_loss": 0.03677724301815033 }, { "avg_delta_l": 0.016990629956126213, "avg_loss_unweighted": 0.06423310935497284, "avg_weight": 0.5315690636634827, "std_delta_l": 0.040559276938438416, "std_weight": 0.04538632184267044, "weighted_loss": 0.03522390127182007 }, { "avg_delta_l": -0.0025142133235931396, "avg_loss_unweighted": 0.06163421645760536, "avg_weight": 0.551642656326294, "std_delta_l": 0.041367057710886, "std_weight": 0.06535420566797256, "weighted_loss": 0.03534012287855148 }, { "avg_delta_l": 0.015344231389462948, "avg_loss_unweighted": 0.06029659882187843, "avg_weight": 0.5305743217468262, "std_delta_l": 0.04050613194704056, "std_weight": 0.04120704159140587, "weighted_loss": 0.032925549894571304 }, { "avg_delta_l": 0.01413419097661972, "avg_loss_unweighted": 0.06356877833604813, "avg_weight": 0.5341496467590332, "std_delta_l": 0.04124796390533447, "std_weight": 0.04535451531410217, "weighted_loss": 0.034566860646009445 }, { "avg_delta_l": 0.017076918855309486, "avg_loss_unweighted": 0.06009304150938988, "avg_weight": 0.5243760943412781, "std_delta_l": 0.03684426471590996, "std_weight": 0.04235559329390526, "weighted_loss": 0.032672151923179626 }, { "avg_delta_l": 0.010707392357289791, "avg_loss_unweighted": 0.07164416462182999, "avg_weight": 0.5442109704017639, "std_delta_l": 0.04500069469213486, "std_weight": 0.056968845427036285, "weighted_loss": 0.039909448474645615 }, { "avg_delta_l": 0.006283210590481758, "avg_loss_unweighted": 0.06522885710000992, "avg_weight": 0.5422507524490356, "std_delta_l": 0.04172295331954956, "std_weight": 0.05853108689188957, "weighted_loss": 0.03650246188044548 }, { "avg_delta_l": 0.0020241665188223124, "avg_loss_unweighted": 0.06627513468265533, "avg_weight": 0.5449526309967041, "std_delta_l": 0.03799762949347496, "std_weight": 0.05715775489807129, "weighted_loss": 0.037189781665802 }, { "avg_delta_l": 0.008675380609929562, "avg_loss_unweighted": 0.05945289134979248, "avg_weight": 0.5357017517089844, "std_delta_l": 0.03904365003108978, "std_weight": 0.05584435164928436, "weighted_loss": 0.03291967511177063 }, { "avg_delta_l": -1.7033889889717102e-06, "avg_loss_unweighted": 0.06934025138616562, "avg_weight": 0.5498996376991272, "std_delta_l": 0.04067378491163254, "std_weight": 0.06131007894873619, "weighted_loss": 0.03930933028459549 }, { "avg_delta_l": -0.005727005191147327, "avg_loss_unweighted": 0.06751713156700134, "avg_weight": 0.5666356086730957, "std_delta_l": 0.04833420366048813, "std_weight": 0.08044804632663727, "weighted_loss": 0.04062968119978905 }, { "avg_delta_l": 0.005488759838044643, "avg_loss_unweighted": 0.0661502555012703, "avg_weight": 0.5542724132537842, "std_delta_l": 0.053629692643880844, "std_weight": 0.07455871254205704, "weighted_loss": 0.03804738074541092 }, { "avg_delta_l": 0.0007076982292346656, "avg_loss_unweighted": 0.06711384654045105, "avg_weight": 0.5466165542602539, "std_delta_l": 0.039110563695430756, "std_weight": 0.057759594172239304, "weighted_loss": 0.03789253532886505 }, { "avg_delta_l": -0.00772465392947197, "avg_loss_unweighted": 0.07385695725679398, "avg_weight": 0.5686203837394714, "std_delta_l": 0.04521023482084274, "std_weight": 0.07335201650857925, "weighted_loss": 0.04322552680969238 }, { "avg_delta_l": -0.005724685732275248, "avg_loss_unweighted": 0.07975873351097107, "avg_weight": 0.5678609013557434, "std_delta_l": 0.0533946231007576, "std_weight": 0.08377683162689209, "weighted_loss": 0.047577980905771255 }, { "epoch": 10.695652173913043, "grad_norm": 0.6650322157808997, "learning_rate": 5.285717170313018e-06, "loss": 0.0744, "step": 620 }, { "avg_delta_l": 0.011474492028355598, "avg_loss_unweighted": 0.062428731471300125, "avg_weight": 0.5401297211647034, "std_delta_l": 0.04521254077553749, "std_weight": 0.05680413916707039, "weighted_loss": 0.03477688878774643 }, { "avg_delta_l": 0.004930983297526836, "avg_loss_unweighted": 0.0653776228427887, "avg_weight": 0.5481719970703125, "std_delta_l": 0.04472432658076286, "std_weight": 0.06501565873622894, "weighted_loss": 0.03740321844816208 }, { "avg_delta_l": 0.0031973260920494795, "avg_loss_unweighted": 0.05816350877285004, "avg_weight": 0.5456715822219849, "std_delta_l": 0.0423177145421505, "std_weight": 0.05880315974354744, "weighted_loss": 0.033308546990156174 }, { "avg_delta_l": 0.006070244126021862, "avg_loss_unweighted": 0.06152964383363724, "avg_weight": 0.543766438961029, "std_delta_l": 0.04206479340791702, "std_weight": 0.05521802231669426, "weighted_loss": 0.03443818539381027 }, { "avg_delta_l": 0.004577663727104664, "avg_loss_unweighted": 0.05766187235713005, "avg_weight": 0.5449613928794861, "std_delta_l": 0.04086820408701897, "std_weight": 0.05768068507313728, "weighted_loss": 0.032974228262901306 }, { "avg_delta_l": 0.008912810124456882, "avg_loss_unweighted": 0.06382729113101959, "avg_weight": 0.5438387393951416, "std_delta_l": 0.04776783660054207, "std_weight": 0.06788042187690735, "weighted_loss": 0.036917150020599365 }, { "avg_delta_l": -0.0006598791806027293, "avg_loss_unweighted": 0.06769049167633057, "avg_weight": 0.5536613464355469, "std_delta_l": 0.041150592267513275, "std_weight": 0.059982217848300934, "weighted_loss": 0.03905852138996124 }, { "avg_delta_l": -0.007123312912881374, "avg_loss_unweighted": 0.07607401907444, "avg_weight": 0.562492311000824, "std_delta_l": 0.04490204155445099, "std_weight": 0.07232409715652466, "weighted_loss": 0.04436562955379486 }, { "avg_delta_l": 0.009122653864324093, "avg_loss_unweighted": 0.06512820720672607, "avg_weight": 0.5453079342842102, "std_delta_l": 0.04593701660633087, "std_weight": 0.059016477316617966, "weighted_loss": 0.03678961098194122 }, { "avg_delta_l": 0.00489983893930912, "avg_loss_unweighted": 0.06704280525445938, "avg_weight": 0.5504395961761475, "std_delta_l": 0.046353429555892944, "std_weight": 0.07321744412183762, "weighted_loss": 0.03829745575785637 }, { "avg_delta_l": -0.0017746887169778347, "avg_loss_unweighted": 0.06612643599510193, "avg_weight": 0.5554847717285156, "std_delta_l": 0.04519980400800705, "std_weight": 0.07089710235595703, "weighted_loss": 0.038276076316833496 }, { "avg_delta_l": 0.0027706176042556763, "avg_loss_unweighted": 0.07500431686639786, "avg_weight": 0.5482972860336304, "std_delta_l": 0.04406780004501343, "std_weight": 0.05512893572449684, "weighted_loss": 0.04268096014857292 }, { "avg_delta_l": 0.005346386227756739, "avg_loss_unweighted": 0.06208835169672966, "avg_weight": 0.5444440245628357, "std_delta_l": 0.03687408193945885, "std_weight": 0.05524315685033798, "weighted_loss": 0.03502129763364792 }, { "avg_delta_l": 0.007854440249502659, "avg_loss_unweighted": 0.06479714810848236, "avg_weight": 0.5424056649208069, "std_delta_l": 0.04301796853542328, "std_weight": 0.0610114187002182, "weighted_loss": 0.036215923726558685 }, { "avg_delta_l": 0.008976799435913563, "avg_loss_unweighted": 0.06796026229858398, "avg_weight": 0.5398277044296265, "std_delta_l": 0.04562326520681381, "std_weight": 0.060938261449337006, "weighted_loss": 0.037630703300237656 }, { "avg_delta_l": 0.0011221650056540966, "avg_loss_unweighted": 0.06384136527776718, "avg_weight": 0.5621929168701172, "std_delta_l": 0.05108599364757538, "std_weight": 0.07355928421020508, "weighted_loss": 0.03787870705127716 }, { "avg_delta_l": -0.00032142194686457515, "avg_loss_unweighted": 0.06971796602010727, "avg_weight": 0.5546033978462219, "std_delta_l": 0.04437141865491867, "std_weight": 0.06848850101232529, "weighted_loss": 0.039713840931653976 }, { "avg_delta_l": 0.004083350300788879, "avg_loss_unweighted": 0.06511060148477554, "avg_weight": 0.5537394285202026, "std_delta_l": 0.052716270089149475, "std_weight": 0.08218429237604141, "weighted_loss": 0.03802693262696266 }, { "avg_delta_l": 0.008199437521398067, "avg_loss_unweighted": 0.06385575234889984, "avg_weight": 0.5467550158500671, "std_delta_l": 0.039597827941179276, "std_weight": 0.052395373582839966, "weighted_loss": 0.03601514548063278 }, { "avg_delta_l": -0.005914227571338415, "avg_loss_unweighted": 0.07516984641551971, "avg_weight": 0.5694496035575867, "std_delta_l": 0.04356534406542778, "std_weight": 0.07108012586832047, "weighted_loss": 0.04484429210424423 }, { "epoch": 10.869565217391305, "grad_norm": 0.7551652517173152, "learning_rate": 5.1353969233806735e-06, "loss": 0.0755, "step": 630 }, { "avg_delta_l": -0.0023966748267412186, "avg_loss_unweighted": 0.07005820423364639, "avg_weight": 0.5671244263648987, "std_delta_l": 0.04999220743775368, "std_weight": 0.08453986793756485, "weighted_loss": 0.042252667248249054 }, { "avg_delta_l": -0.00213803444057703, "avg_loss_unweighted": 0.07246611267328262, "avg_weight": 0.5626590251922607, "std_delta_l": 0.0513756237924099, "std_weight": 0.0843314379453659, "weighted_loss": 0.042455364018678665 }, { "avg_delta_l": -0.0017275175778195262, "avg_loss_unweighted": 0.06705918163061142, "avg_weight": 0.5593838691711426, "std_delta_l": 0.046549245715141296, "std_weight": 0.07094346731901169, "weighted_loss": 0.038960572332143784 }, { "avg_delta_l": -0.002930121496319771, "avg_loss_unweighted": 0.06434520334005356, "avg_weight": 0.5644233822822571, "std_delta_l": 0.05097976699471474, "std_weight": 0.08310654759407043, "weighted_loss": 0.03832920268177986 }, { "avg_delta_l": -0.003638126887381077, "avg_loss_unweighted": 0.07380899786949158, "avg_weight": 0.5584261417388916, "std_delta_l": 0.046148527413606644, "std_weight": 0.07325878739356995, "weighted_loss": 0.04295002296566963 }, { "avg_delta_l": 0.004556145053356886, "avg_loss_unweighted": 0.05909297615289688, "avg_weight": 0.5388221144676208, "std_delta_l": 0.03527430444955826, "std_weight": 0.05129038542509079, "weighted_loss": 0.03295382484793663 }, { "avg_delta_l": 0.005549267865717411, "avg_loss_unweighted": 0.05865487828850746, "avg_weight": 0.5472806692123413, "std_delta_l": 0.04548081010580063, "std_weight": 0.06146761029958725, "weighted_loss": 0.03335670754313469 }, { "avg_delta_l": -0.008941952139139175, "avg_loss_unweighted": 0.07309463620185852, "avg_weight": 0.5670216083526611, "std_delta_l": 0.04058029130101204, "std_weight": 0.07133807986974716, "weighted_loss": 0.043082345277071 }, { "avg_delta_l": -0.007096145302057266, "avg_loss_unweighted": 0.06815940886735916, "avg_weight": 0.5717515349388123, "std_delta_l": 0.051516663283109665, "std_weight": 0.07785241305828094, "weighted_loss": 0.04095829278230667 }, { "avg_delta_l": -0.003772216849029064, "avg_loss_unweighted": 0.06806595623493195, "avg_weight": 0.5564901828765869, "std_delta_l": 0.039478689432144165, "std_weight": 0.06572864204645157, "weighted_loss": 0.03952000290155411 }, { "avg_delta_l": -0.0017946114530786872, "avg_loss_unweighted": 0.06386587023735046, "avg_weight": 0.5615782141685486, "std_delta_l": 0.04428566247224808, "std_weight": 0.06936270743608475, "weighted_loss": 0.037844568490982056 }, { "avg_delta_l": -0.0019851448014378548, "avg_loss_unweighted": 0.06998053193092346, "avg_weight": 0.5607559084892273, "std_delta_l": 0.04935615882277489, "std_weight": 0.07835779339075089, "weighted_loss": 0.04104137793183327 }, { "avg_delta_l": 0.004418618977069855, "avg_loss_unweighted": 0.062190692871809006, "avg_weight": 0.5440738201141357, "std_delta_l": 0.04329760745167732, "std_weight": 0.06599728763103485, "weighted_loss": 0.035103701055049896 }, { "avg_delta_l": 0.0017999178962782025, "avg_loss_unweighted": 0.06599222123622894, "avg_weight": 0.560458779335022, "std_delta_l": 0.050359658896923065, "std_weight": 0.06912070512771606, "weighted_loss": 0.03859513998031616 }, { "avg_delta_l": 0.01681503653526306, "avg_loss_unweighted": 0.057419437915086746, "avg_weight": 0.5426656007766724, "std_delta_l": 0.046806998550891876, "std_weight": 0.058552082628011703, "weighted_loss": 0.032856691628694534 }, { "avg_delta_l": 0.015089528635144234, "avg_loss_unweighted": 0.052589137107133865, "avg_weight": 0.5154927968978882, "std_delta_l": 0.027330664917826653, "std_weight": 0.024637898430228233, "weighted_loss": 0.027403347194194794 }, { "avg_delta_l": 0.009986396878957748, "avg_loss_unweighted": 0.05448954552412033, "avg_weight": 0.520248293876648, "std_delta_l": 0.023768525570631027, "std_weight": 0.025990435853600502, "weighted_loss": 0.028569865971803665 }, { "avg_delta_l": 0.018464213237166405, "avg_loss_unweighted": 0.0584491528570652, "avg_weight": 0.5113891959190369, "std_delta_l": 0.022625509649515152, "std_weight": 0.01786707155406475, "weighted_loss": 0.030116967856884003 }, { "avg_delta_l": 0.026254436001181602, "avg_loss_unweighted": 0.041812486946582794, "avg_weight": 0.5068069696426392, "std_delta_l": 0.025845034047961235, "std_weight": 0.012955360114574432, "weighted_loss": 0.021317481994628906 }, { "epoch": 11.034782608695652, "grad_norm": 0.5874994435519981, "learning_rate": 4.984954080879825e-06, "loss": 0.0688, "step": 640 }, { "avg_delta_l": 0.014762911014258862, "avg_loss_unweighted": 0.04760172218084335, "avg_weight": 0.5202288031578064, "std_delta_l": 0.03213049843907356, "std_weight": 0.03220032900571823, "weighted_loss": 0.02536974661052227 }, { "avg_delta_l": 0.013500391505658627, "avg_loss_unweighted": 0.04695390164852142, "avg_weight": 0.5196582674980164, "std_delta_l": 0.028240591287612915, "std_weight": 0.024867746978998184, "weighted_loss": 0.02487078122794628 }, { "avg_delta_l": 0.014517845585942268, "avg_loss_unweighted": 0.05523769557476044, "avg_weight": 0.5170906186103821, "std_delta_l": 0.029442626982927322, "std_weight": 0.028882402926683426, "weighted_loss": 0.02916780672967434 }, { "avg_delta_l": 0.01438150554895401, "avg_loss_unweighted": 0.04462520778179169, "avg_weight": 0.5174026489257812, "std_delta_l": 0.02909926511347294, "std_weight": 0.030859479680657387, "weighted_loss": 0.02356557548046112 }, { "avg_delta_l": 0.0201457180082798, "avg_loss_unweighted": 0.055302951484918594, "avg_weight": 0.516236424446106, "std_delta_l": 0.03284713253378868, "std_weight": 0.02895340695977211, "weighted_loss": 0.02892501838505268 }, { "avg_delta_l": 0.01498590037226677, "avg_loss_unweighted": 0.045264601707458496, "avg_weight": 0.519162654876709, "std_delta_l": 0.03090575337409973, "std_weight": 0.03511934354901314, "weighted_loss": 0.02404182218015194 }, { "avg_delta_l": 0.017707662656903267, "avg_loss_unweighted": 0.04733967036008835, "avg_weight": 0.5214943885803223, "std_delta_l": 0.03231600672006607, "std_weight": 0.03329133987426758, "weighted_loss": 0.02522706426680088 }, { "avg_delta_l": 0.01680656522512436, "avg_loss_unweighted": 0.051382843405008316, "avg_weight": 0.5143787264823914, "std_delta_l": 0.024395279586315155, "std_weight": 0.02228638529777527, "weighted_loss": 0.026636486873030663 }, { "avg_delta_l": 0.007599544711410999, "avg_loss_unweighted": 0.05445318669080734, "avg_weight": 0.5302248597145081, "std_delta_l": 0.031130259856581688, "std_weight": 0.04154407978057861, "weighted_loss": 0.029429592192173004 }, { "avg_delta_l": 0.015235493890941143, "avg_loss_unweighted": 0.0515596829354763, "avg_weight": 0.5158681869506836, "std_delta_l": 0.028301265090703964, "std_weight": 0.026503363624215126, "weighted_loss": 0.02694658190011978 }, { "avg_delta_l": 0.012197459116578102, "avg_loss_unweighted": 0.052965663373470306, "avg_weight": 0.5198335647583008, "std_delta_l": 0.025669755414128304, "std_weight": 0.032331790775060654, "weighted_loss": 0.028116833418607712 }, { "avg_delta_l": 0.01089994516223669, "avg_loss_unweighted": 0.04800540953874588, "avg_weight": 0.5173211693763733, "std_delta_l": 0.022860152646899223, "std_weight": 0.03002576343715191, "weighted_loss": 0.025252504274249077 }, { "avg_delta_l": 0.009041914716362953, "avg_loss_unweighted": 0.05154460296034813, "avg_weight": 0.5240514278411865, "std_delta_l": 0.025040550157427788, "std_weight": 0.03300511837005615, "weighted_loss": 0.027544882148504257 }, { "avg_delta_l": 0.01795775629580021, "avg_loss_unweighted": 0.04684443771839142, "avg_weight": 0.5198872685432434, "std_delta_l": 0.03282948210835457, "std_weight": 0.029825327917933464, "weighted_loss": 0.024868084117770195 }, { "avg_delta_l": 0.017503265291452408, "avg_loss_unweighted": 0.053492918610572815, "avg_weight": 0.5090184211730957, "std_delta_l": 0.02001749351620674, "std_weight": 0.013368187472224236, "weighted_loss": 0.02741442620754242 }, { "avg_delta_l": 0.005817443132400513, "avg_loss_unweighted": 0.053805865347385406, "avg_weight": 0.5334411859512329, "std_delta_l": 0.03241457790136337, "std_weight": 0.05185767635703087, "weighted_loss": 0.029394758865237236 }, { "avg_delta_l": 0.02213333547115326, "avg_loss_unweighted": 0.044362954795360565, "avg_weight": 0.509084939956665, "std_delta_l": 0.02780676633119583, "std_weight": 0.016111019998788834, "weighted_loss": 0.022603895515203476 }, { "avg_delta_l": 0.008535676635801792, "avg_loss_unweighted": 0.05589200183749199, "avg_weight": 0.5269587635993958, "std_delta_l": 0.03329451009631157, "std_weight": 0.04959576204419136, "weighted_loss": 0.03013128787279129 }, { "avg_delta_l": 0.009962733834981918, "avg_loss_unweighted": 0.05587172508239746, "avg_weight": 0.5302934050559998, "std_delta_l": 0.03710896894335747, "std_weight": 0.049720484763383865, "weighted_loss": 0.030400220304727554 }, { "avg_delta_l": 0.011416290886700153, "avg_loss_unweighted": 0.052688147872686386, "avg_weight": 0.531750500202179, "std_delta_l": 0.04194941371679306, "std_weight": 0.05156441777944565, "weighted_loss": 0.028956521302461624 }, { "epoch": 11.208695652173914, "grad_norm": 0.6218218119496067, "learning_rate": 4.834524861753587e-06, "loss": 0.0539, "step": 650 }, { "avg_delta_l": 0.00944621954113245, "avg_loss_unweighted": 0.05247132480144501, "avg_weight": 0.5268754363059998, "std_delta_l": 0.031337298452854156, "std_weight": 0.04600163921713829, "weighted_loss": 0.02845047041773796 }, { "avg_delta_l": 0.009888526983559132, "avg_loss_unweighted": 0.04997095465660095, "avg_weight": 0.529882550239563, "std_delta_l": 0.03573695197701454, "std_weight": 0.050087131559848785, "weighted_loss": 0.027600523084402084 }, { "avg_delta_l": 0.021141866222023964, "avg_loss_unweighted": 0.04731164500117302, "avg_weight": 0.5166976451873779, "std_delta_l": 0.03708020597696304, "std_weight": 0.030626125633716583, "weighted_loss": 0.024767795577645302 }, { "avg_delta_l": 0.009008574299514294, "avg_loss_unweighted": 0.05094122141599655, "avg_weight": 0.5318427085876465, "std_delta_l": 0.03862811625003815, "std_weight": 0.054473213851451874, "weighted_loss": 0.02802971936762333 }, { "avg_delta_l": 0.00871839839965105, "avg_loss_unweighted": 0.05553063750267029, "avg_weight": 0.5321503281593323, "std_delta_l": 0.03535058721899986, "std_weight": 0.0490521565079689, "weighted_loss": 0.030600039288401604 }, { "avg_delta_l": 0.01716822013258934, "avg_loss_unweighted": 0.04983062669634819, "avg_weight": 0.517547607421875, "std_delta_l": 0.032705944031476974, "std_weight": 0.02733643725514412, "weighted_loss": 0.02622039057314396 }, { "avg_delta_l": 0.010068954899907112, "avg_loss_unweighted": 0.05462643504142761, "avg_weight": 0.525874137878418, "std_delta_l": 0.030704930424690247, "std_weight": 0.039259374141693115, "weighted_loss": 0.029301008209586143 }, { "avg_delta_l": 0.010413755662739277, "avg_loss_unweighted": 0.05134408921003342, "avg_weight": 0.5275384187698364, "std_delta_l": 0.033487580716609955, "std_weight": 0.03980511426925659, "weighted_loss": 0.027728436514735222 }, { "avg_delta_l": 0.012325114570558071, "avg_loss_unweighted": 0.042608391493558884, "avg_weight": 0.5197850465774536, "std_delta_l": 0.028342748060822487, "std_weight": 0.025351427495479584, "weighted_loss": 0.022621385753154755 }, { "avg_delta_l": 0.010456887073814869, "avg_loss_unweighted": 0.04802246391773224, "avg_weight": 0.5342846512794495, "std_delta_l": 0.041636787354946136, "std_weight": 0.05306376516819, "weighted_loss": 0.02638416364789009 }, { "avg_delta_l": 0.011611413210630417, "avg_loss_unweighted": 0.04781602695584297, "avg_weight": 0.5227261781692505, "std_delta_l": 0.030381789430975914, "std_weight": 0.03615956008434296, "weighted_loss": 0.025949489325284958 }, { "avg_delta_l": 0.0069449469447135925, "avg_loss_unweighted": 0.0500248521566391, "avg_weight": 0.531376838684082, "std_delta_l": 0.030977772548794746, "std_weight": 0.0489395447075367, "weighted_loss": 0.027646832168102264 }, { "avg_delta_l": 0.007667873986065388, "avg_loss_unweighted": 0.05719485878944397, "avg_weight": 0.5271528363227844, "std_delta_l": 0.030253246426582336, "std_weight": 0.04307416081428528, "weighted_loss": 0.031182298436760902 }, { "avg_delta_l": 0.0025323759764432907, "avg_loss_unweighted": 0.05554056167602539, "avg_weight": 0.54163658618927, "std_delta_l": 0.03194187954068184, "std_weight": 0.04994601756334305, "weighted_loss": 0.030881835147738457 }, { "avg_delta_l": 0.015474123880267143, "avg_loss_unweighted": 0.04381310194730759, "avg_weight": 0.5223686695098877, "std_delta_l": 0.03268939629197121, "std_weight": 0.035115621984004974, "weighted_loss": 0.023796308785676956 }, { "avg_delta_l": 0.008511828258633614, "avg_loss_unweighted": 0.05218907445669174, "avg_weight": 0.5354447960853577, "std_delta_l": 0.03928972780704498, "std_weight": 0.050939373672008514, "weighted_loss": 0.028977414593100548 }, { "avg_delta_l": -0.0022259687539190054, "avg_loss_unweighted": 0.05543234571814537, "avg_weight": 0.5458735823631287, "std_delta_l": 0.03604057803750038, "std_weight": 0.06340660154819489, "weighted_loss": 0.031196273863315582 }, { "avg_delta_l": 0.002766250167042017, "avg_loss_unweighted": 0.055285241454839706, "avg_weight": 0.5471906065940857, "std_delta_l": 0.05436136573553085, "std_weight": 0.07191096991300583, "weighted_loss": 0.034438710659742355 }, { "avg_delta_l": 0.015554613433778286, "avg_loss_unweighted": 0.04757654666900635, "avg_weight": 0.5203897953033447, "std_delta_l": 0.03221655264496803, "std_weight": 0.03464989364147186, "weighted_loss": 0.025060981512069702 }, { "avg_delta_l": 0.012336044572293758, "avg_loss_unweighted": 0.04902837425470352, "avg_weight": 0.5220297574996948, "std_delta_l": 0.029919788241386414, "std_weight": 0.03944304212927818, "weighted_loss": 0.026133399456739426 }, { "epoch": 11.382608695652173, "grad_norm": 1.418439295969313, "learning_rate": 4.684245472609743e-06, "loss": 0.0557, "step": 660 }, { "avg_delta_l": 0.00593394972383976, "avg_loss_unweighted": 0.05385212600231171, "avg_weight": 0.5417852997779846, "std_delta_l": 0.035167574882507324, "std_weight": 0.0448920801281929, "weighted_loss": 0.030098682269454002 }, { "avg_delta_l": 0.011202413588762283, "avg_loss_unweighted": 0.04839014634490013, "avg_weight": 0.5265220403671265, "std_delta_l": 0.035034649074077606, "std_weight": 0.04365727677941322, "weighted_loss": 0.026240376755595207 }, { "avg_delta_l": 0.009389056824147701, "avg_loss_unweighted": 0.044445596635341644, "avg_weight": 0.5269004702568054, "std_delta_l": 0.03079099766910076, "std_weight": 0.04265705868601799, "weighted_loss": 0.02421545423567295 }, { "avg_delta_l": 0.012169099412858486, "avg_loss_unweighted": 0.04951073229312897, "avg_weight": 0.5296241641044617, "std_delta_l": 0.038622744381427765, "std_weight": 0.04888058081269264, "weighted_loss": 0.026871457695961 }, { "avg_delta_l": 0.009011579677462578, "avg_loss_unweighted": 0.04493337124586105, "avg_weight": 0.5343063473701477, "std_delta_l": 0.03770160302519798, "std_weight": 0.05263211205601692, "weighted_loss": 0.024953491985797882 }, { "avg_delta_l": 0.008238072507083416, "avg_loss_unweighted": 0.052139438688755035, "avg_weight": 0.5336429476737976, "std_delta_l": 0.03214426711201668, "std_weight": 0.04372048377990723, "weighted_loss": 0.02847440540790558 }, { "avg_delta_l": 0.013320054858922958, "avg_loss_unweighted": 0.052509188652038574, "avg_weight": 0.5227986574172974, "std_delta_l": 0.030352048575878143, "std_weight": 0.035232555121183395, "weighted_loss": 0.02795703336596489 }, { "avg_delta_l": 0.01998857781291008, "avg_loss_unweighted": 0.05054658651351929, "avg_weight": 0.525790274143219, "std_delta_l": 0.037869688123464584, "std_weight": 0.04162024334073067, "weighted_loss": 0.02760964073240757 }, { "avg_delta_l": 0.0024363002739846706, "avg_loss_unweighted": 0.055595751851797104, "avg_weight": 0.5408863425254822, "std_delta_l": 0.03577177971601486, "std_weight": 0.053258489817380905, "weighted_loss": 0.031134046614170074 }, { "avg_delta_l": 0.01842125877737999, "avg_loss_unweighted": 0.05062481388449669, "avg_weight": 0.525449812412262, "std_delta_l": 0.03621874004602432, "std_weight": 0.03739436715841293, "weighted_loss": 0.02737383544445038 }, { "avg_delta_l": 0.016464538872241974, "avg_loss_unweighted": 0.04437336325645447, "avg_weight": 0.5263249278068542, "std_delta_l": 0.03814900293946266, "std_weight": 0.042504798620939255, "weighted_loss": 0.02408580482006073 }, { "avg_delta_l": 0.009512769058346748, "avg_loss_unweighted": 0.05282069370150566, "avg_weight": 0.533128559589386, "std_delta_l": 0.03425274416804314, "std_weight": 0.04236096888780594, "weighted_loss": 0.028972214087843895 }, { "avg_delta_l": 0.013833601027727127, "avg_loss_unweighted": 0.05377946048974991, "avg_weight": 0.5264381170272827, "std_delta_l": 0.03883543238043785, "std_weight": 0.037695836275815964, "weighted_loss": 0.028739597648382187 }, { "avg_delta_l": 0.01079592201858759, "avg_loss_unweighted": 0.048838574439287186, "avg_weight": 0.5256757140159607, "std_delta_l": 0.03272908926010132, "std_weight": 0.03973813354969025, "weighted_loss": 0.026154374703764915 }, { "avg_delta_l": 0.00045580504229292274, "avg_loss_unweighted": 0.05667048692703247, "avg_weight": 0.5501677989959717, "std_delta_l": 0.03785249590873718, "std_weight": 0.05739040672779083, "weighted_loss": 0.03225881606340408 }, { "avg_delta_l": 0.010869708843529224, "avg_loss_unweighted": 0.04935260862112045, "avg_weight": 0.5374934077262878, "std_delta_l": 0.04135234281420708, "std_weight": 0.05376831442117691, "weighted_loss": 0.027305101975798607 }, { "avg_delta_l": 0.010558458045125008, "avg_loss_unweighted": 0.04879186302423477, "avg_weight": 0.5197945237159729, "std_delta_l": 0.026420224457979202, "std_weight": 0.03561313450336456, "weighted_loss": 0.025774579495191574 }, { "avg_delta_l": 0.004059651866555214, "avg_loss_unweighted": 0.05720163881778717, "avg_weight": 0.535494327545166, "std_delta_l": 0.03237771987915039, "std_weight": 0.04990234971046448, "weighted_loss": 0.031710147857666016 }, { "avg_delta_l": 0.005946984514594078, "avg_loss_unweighted": 0.05802790820598602, "avg_weight": 0.5449281930923462, "std_delta_l": 0.04237568378448486, "std_weight": 0.06277313828468323, "weighted_loss": 0.03315270319581032 }, { "avg_delta_l": 0.005773516837507486, "avg_loss_unweighted": 0.050931110978126526, "avg_weight": 0.5402926206588745, "std_delta_l": 0.03508700802922249, "std_weight": 0.049774132668972015, "weighted_loss": 0.02871897630393505 }, { "epoch": 11.556521739130435, "grad_norm": 0.5436789743336843, "learning_rate": 4.5342519843920494e-06, "loss": 0.0562, "step": 670 }, { "avg_delta_l": 0.005838814191520214, "avg_loss_unweighted": 0.056592587381601334, "avg_weight": 0.5367518663406372, "std_delta_l": 0.038095541298389435, "std_weight": 0.055507197976112366, "weighted_loss": 0.03134050965309143 }, { "avg_delta_l": 0.0022884586360305548, "avg_loss_unweighted": 0.05189555883407593, "avg_weight": 0.5426716804504395, "std_delta_l": 0.033887285739183426, "std_weight": 0.05420636758208275, "weighted_loss": 0.029033493250608444 }, { "avg_delta_l": 0.009598853997886181, "avg_loss_unweighted": 0.047877147793769836, "avg_weight": 0.5338236093521118, "std_delta_l": 0.038577549159526825, "std_weight": 0.05249658226966858, "weighted_loss": 0.026790719479322433 }, { "avg_delta_l": 0.011864691972732544, "avg_loss_unweighted": 0.04447241872549057, "avg_weight": 0.5222928524017334, "std_delta_l": 0.02989252842962742, "std_weight": 0.03352423757314682, "weighted_loss": 0.02371845580637455 }, { "avg_delta_l": 0.01248059794306755, "avg_loss_unweighted": 0.04505520686507225, "avg_weight": 0.5266289710998535, "std_delta_l": 0.033997513353824615, "std_weight": 0.039492059499025345, "weighted_loss": 0.024284159764647484 }, { "avg_delta_l": -0.0011685951612889767, "avg_loss_unweighted": 0.0560002326965332, "avg_weight": 0.5473387241363525, "std_delta_l": 0.038618043065071106, "std_weight": 0.06798647344112396, "weighted_loss": 0.03187749907374382 }, { "avg_delta_l": 9.316694922745228e-05, "avg_loss_unweighted": 0.051127467304468155, "avg_weight": 0.5449908971786499, "std_delta_l": 0.04005615785717964, "std_weight": 0.06158294528722763, "weighted_loss": 0.029078355059027672 }, { "avg_delta_l": 0.008158686570823193, "avg_loss_unweighted": 0.04670250043272972, "avg_weight": 0.5363622903823853, "std_delta_l": 0.035581763833761215, "std_weight": 0.04378565028309822, "weighted_loss": 0.02589009888470173 }, { "avg_delta_l": 0.002398743527010083, "avg_loss_unweighted": 0.05752083659172058, "avg_weight": 0.537493109703064, "std_delta_l": 0.028755441308021545, "std_weight": 0.04242802411317825, "weighted_loss": 0.03161553293466568 }, { "avg_delta_l": 0.00557531276717782, "avg_loss_unweighted": 0.04700031876564026, "avg_weight": 0.5314539074897766, "std_delta_l": 0.029218005016446114, "std_weight": 0.038915473967790604, "weighted_loss": 0.02558792755007744 }, { "avg_delta_l": 0.009056375361979008, "avg_loss_unweighted": 0.05111117288470268, "avg_weight": 0.5437432527542114, "std_delta_l": 0.04719408601522446, "std_weight": 0.06701430678367615, "weighted_loss": 0.029060300439596176 }, { "avg_delta_l": 0.0029345261864364147, "avg_loss_unweighted": 0.052109427750110626, "avg_weight": 0.5380486845970154, "std_delta_l": 0.03175922483205795, "std_weight": 0.048318199813365936, "weighted_loss": 0.029183808714151382 }, { "avg_delta_l": 0.003758893581107259, "avg_loss_unweighted": 0.04633051156997681, "avg_weight": 0.5407208800315857, "std_delta_l": 0.03783498704433441, "std_weight": 0.060375701636075974, "weighted_loss": 0.02630341425538063 }, { "avg_delta_l": -0.0012481731828302145, "avg_loss_unweighted": 0.05536220222711563, "avg_weight": 0.5413869619369507, "std_delta_l": 0.03276088461279869, "std_weight": 0.05926505848765373, "weighted_loss": 0.03082391619682312 }, { "avg_delta_l": 0.0004613549681380391, "avg_loss_unweighted": 0.04602460935711861, "avg_weight": 0.5407133102416992, "std_delta_l": 0.036034177988767624, "std_weight": 0.05925781652331352, "weighted_loss": 0.026018401607871056 }, { "avg_delta_l": 0.0029397117905318737, "avg_loss_unweighted": 0.04946746677160263, "avg_weight": 0.5423367023468018, "std_delta_l": 0.03661066293716431, "std_weight": 0.05376839637756348, "weighted_loss": 0.028044208884239197 }, { "avg_delta_l": -0.0036672353744506836, "avg_loss_unweighted": 0.05093875527381897, "avg_weight": 0.544228196144104, "std_delta_l": 0.0367782823741436, "std_weight": 0.06274370104074478, "weighted_loss": 0.028826899826526642 }, { "avg_delta_l": 0.014353048987686634, "avg_loss_unweighted": 0.03909789025783539, "avg_weight": 0.5292149186134338, "std_delta_l": 0.035840995609760284, "std_weight": 0.03940258175134659, "weighted_loss": 0.021624887362122536 }, { "avg_delta_l": 0.0048990496434271336, "avg_loss_unweighted": 0.050839297473430634, "avg_weight": 0.536893904209137, "std_delta_l": 0.03321022540330887, "std_weight": 0.04556718096137047, "weighted_loss": 0.028180953115224838 }, { "avg_delta_l": 0.005270499736070633, "avg_loss_unweighted": 0.05343325063586235, "avg_weight": 0.5353715419769287, "std_delta_l": 0.03628721460700035, "std_weight": 0.05439205840229988, "weighted_loss": 0.029546402394771576 }, { "epoch": 11.730434782608695, "grad_norm": 0.5113487382066199, "learning_rate": 4.384680209174376e-06, "loss": 0.0557, "step": 680 }, { "avg_delta_l": -0.0002680195029824972, "avg_loss_unweighted": 0.05091327428817749, "avg_weight": 0.5526641607284546, "std_delta_l": 0.03764744848012924, "std_weight": 0.05194995179772377, "weighted_loss": 0.029467947781085968 }, { "avg_delta_l": 0.0010661674896255136, "avg_loss_unweighted": 0.05437294766306877, "avg_weight": 0.5397605895996094, "std_delta_l": 0.03460181877017021, "std_weight": 0.05248483270406723, "weighted_loss": 0.03043428435921669 }, { "avg_delta_l": 0.003093486651778221, "avg_loss_unweighted": 0.045926518738269806, "avg_weight": 0.5400446057319641, "std_delta_l": 0.034013453871011734, "std_weight": 0.04650167375802994, "weighted_loss": 0.025831036269664764 }, { "avg_delta_l": -0.00044490955770015717, "avg_loss_unweighted": 0.057607971131801605, "avg_weight": 0.5523542165756226, "std_delta_l": 0.043984588235616684, "std_weight": 0.06337538361549377, "weighted_loss": 0.03323337063193321 }, { "avg_delta_l": 0.0033055334351956844, "avg_loss_unweighted": 0.05191505327820778, "avg_weight": 0.5436745285987854, "std_delta_l": 0.0383911058306694, "std_weight": 0.05996792018413544, "weighted_loss": 0.029110200703144073 }, { "avg_delta_l": -0.0009027718333527446, "avg_loss_unweighted": 0.053575120866298676, "avg_weight": 0.5520039796829224, "std_delta_l": 0.03861531615257263, "std_weight": 0.06679492443799973, "weighted_loss": 0.03131195902824402 }, { "avg_delta_l": -0.0024832820054143667, "avg_loss_unweighted": 0.05049239471554756, "avg_weight": 0.5463668704032898, "std_delta_l": 0.03643859177827835, "std_weight": 0.06234733387827873, "weighted_loss": 0.028757110238075256 }, { "avg_delta_l": 0.0017744003562256694, "avg_loss_unweighted": 0.05557074770331383, "avg_weight": 0.5413135290145874, "std_delta_l": 0.031098319217562675, "std_weight": 0.050261616706848145, "weighted_loss": 0.03118867799639702 }, { "avg_delta_l": 0.011290649883449078, "avg_loss_unweighted": 0.04173877462744713, "avg_weight": 0.5304844379425049, "std_delta_l": 0.037085987627506256, "std_weight": 0.049842409789562225, "weighted_loss": 0.022855514660477638 }, { "avg_delta_l": 0.0023167794570326805, "avg_loss_unweighted": 0.048445675522089005, "avg_weight": 0.5471521019935608, "std_delta_l": 0.038073305040597916, "std_weight": 0.05639631673693657, "weighted_loss": 0.028023993596434593 }, { "avg_delta_l": -0.0002440083771944046, "avg_loss_unweighted": 0.05035882443189621, "avg_weight": 0.5477379560470581, "std_delta_l": 0.038375675678253174, "std_weight": 0.05667835474014282, "weighted_loss": 0.0287310890853405 }, { "avg_delta_l": -0.000772253901232034, "avg_loss_unweighted": 0.05680795758962631, "avg_weight": 0.5447351336479187, "std_delta_l": 0.03529081493616104, "std_weight": 0.05702512338757515, "weighted_loss": 0.032364144921302795 }, { "avg_delta_l": -0.0004598184605129063, "avg_loss_unweighted": 0.05346445366740227, "avg_weight": 0.5501726865768433, "std_delta_l": 0.042066507041454315, "std_weight": 0.06630871444940567, "weighted_loss": 0.030531451106071472 }, { "avg_delta_l": -0.0008255411521531641, "avg_loss_unweighted": 0.05740594491362572, "avg_weight": 0.5426189303398132, "std_delta_l": 0.031224913895130157, "std_weight": 0.05226507782936096, "weighted_loss": 0.03206491842865944 }, { "avg_delta_l": 0.00020853793830610812, "avg_loss_unweighted": 0.050726477056741714, "avg_weight": 0.5500486493110657, "std_delta_l": 0.04103672131896019, "std_weight": 0.06310147792100906, "weighted_loss": 0.029287179931998253 }, { "avg_delta_l": 0.0005192281096242368, "avg_loss_unweighted": 0.04913497343659401, "avg_weight": 0.5430808067321777, "std_delta_l": 0.03599131107330322, "std_weight": 0.056675996631383896, "weighted_loss": 0.027781782671809196 }, { "avg_delta_l": 0.0007395190186798573, "avg_loss_unweighted": 0.061061564832925797, "avg_weight": 0.5428150296211243, "std_delta_l": 0.03607990965247154, "std_weight": 0.05436431244015694, "weighted_loss": 0.03406524658203125 }, { "avg_delta_l": -0.003085521748289466, "avg_loss_unweighted": 0.05675185099244118, "avg_weight": 0.5608210563659668, "std_delta_l": 0.0467718131840229, "std_weight": 0.07474363595247269, "weighted_loss": 0.03358040377497673 }, { "avg_delta_l": 0.004783990792930126, "avg_loss_unweighted": 0.055243946611881256, "avg_weight": 0.5419683456420898, "std_delta_l": 0.03869114816188812, "std_weight": 0.05169697478413582, "weighted_loss": 0.03070196323096752 }, { "avg_delta_l": -0.0018852264620363712, "avg_loss_unweighted": 0.0500359944999218, "avg_weight": 0.543243408203125, "std_delta_l": 0.03212214633822441, "std_weight": 0.051890723407268524, "weighted_loss": 0.028224386274814606 }, { "epoch": 11.904347826086957, "grad_norm": 0.560049356855179, "learning_rate": 4.2356655771892355e-06, "loss": 0.0598, "step": 690 }, { "avg_delta_l": 0.002510707825422287, "avg_loss_unweighted": 0.046701911836862564, "avg_weight": 0.5454115271568298, "std_delta_l": 0.03978501632809639, "std_weight": 0.06548335403203964, "weighted_loss": 0.026826942339539528 }, { "avg_delta_l": 0.014853006228804588, "avg_loss_unweighted": 0.04948064684867859, "avg_weight": 0.5326626896858215, "std_delta_l": 0.040640607476234436, "std_weight": 0.049719344824552536, "weighted_loss": 0.027575064450502396 }, { "avg_delta_l": -0.0023937816731631756, "avg_loss_unweighted": 0.05238588899374008, "avg_weight": 0.5617925524711609, "std_delta_l": 0.041682712733745575, "std_weight": 0.057042934000492096, "weighted_loss": 0.03105160780251026 }, { "avg_delta_l": -0.0019032431300729513, "avg_loss_unweighted": 0.04916209354996681, "avg_weight": 0.5459519028663635, "std_delta_l": 0.038654912263154984, "std_weight": 0.061641328036785126, "weighted_loss": 0.02777167782187462 }, { "avg_delta_l": 0.006839680019766092, "avg_loss_unweighted": 0.05136421322822571, "avg_weight": 0.5418670177459717, "std_delta_l": 0.040564749389886856, "std_weight": 0.056528039276599884, "weighted_loss": 0.02903853729367256 }, { "avg_delta_l": -0.007389080710709095, "avg_loss_unweighted": 0.05048350244760513, "avg_weight": 0.5546606779098511, "std_delta_l": 0.037291932851076126, "std_weight": 0.0646415501832962, "weighted_loss": 0.029171831905841827 }, { "avg_delta_l": 0.004207185469567776, "avg_loss_unweighted": 0.05076645314693451, "avg_weight": 0.5417606830596924, "std_delta_l": 0.036237262189388275, "std_weight": 0.04758545383810997, "weighted_loss": 0.028858106583356857 }, { "avg_delta_l": 0.003195959609001875, "avg_loss_unweighted": 0.05009001865983009, "avg_weight": 0.544266939163208, "std_delta_l": 0.035865046083927155, "std_weight": 0.04931297153234482, "weighted_loss": 0.028192132711410522 }, { "avg_delta_l": 0.0010529812425374985, "avg_loss_unweighted": 0.05022595822811127, "avg_weight": 0.546218752861023, "std_delta_l": 0.036669496446847916, "std_weight": 0.054630886763334274, "weighted_loss": 0.028444360941648483 }, { "avg_delta_l": 0.00602926267310977, "avg_loss_unweighted": 0.04941822588443756, "avg_weight": 0.5439620614051819, "std_delta_l": 0.03915485367178917, "std_weight": 0.052547477185726166, "weighted_loss": 0.028309201821684837 }, { "avg_delta_l": 0.009783045388758183, "avg_loss_unweighted": 0.04243669658899307, "avg_weight": 0.5329586267471313, "std_delta_l": 0.03676029294729233, "std_weight": 0.046767741441726685, "weighted_loss": 0.023449579253792763 }, { "avg_delta_l": 0.009988052770495415, "avg_loss_unweighted": 0.039406340569257736, "avg_weight": 0.5149029493331909, "std_delta_l": 0.018922457471489906, "std_weight": 0.02517743408679962, "weighted_loss": 0.020554572343826294 }, { "avg_delta_l": 0.009437551721930504, "avg_loss_unweighted": 0.04175140708684921, "avg_weight": 0.5163239240646362, "std_delta_l": 0.02329416014254093, "std_weight": 0.030367493629455566, "weighted_loss": 0.021907160058617592 }, { "avg_delta_l": 0.01405475102365017, "avg_loss_unweighted": 0.041392117738723755, "avg_weight": 0.5190964341163635, "std_delta_l": 0.03270821273326874, "std_weight": 0.03612501174211502, "weighted_loss": 0.02209373563528061 }, { "avg_delta_l": 0.012302888557314873, "avg_loss_unweighted": 0.040823325514793396, "avg_weight": 0.5222561955451965, "std_delta_l": 0.03318299725651741, "std_weight": 0.039750002324581146, "weighted_loss": 0.02188424952328205 }, { "avg_delta_l": 0.00617611687630415, "avg_loss_unweighted": 0.04323306307196617, "avg_weight": 0.5221156477928162, "std_delta_l": 0.023115824908018112, "std_weight": 0.032618068158626556, "weighted_loss": 0.023242097347974777 }, { "avg_delta_l": 0.020006466656923294, "avg_loss_unweighted": 0.03527176380157471, "avg_weight": 0.5077913999557495, "std_delta_l": 0.02225642465054989, "std_weight": 0.012388649396598339, "weighted_loss": 0.018093876540660858 }, { "avg_delta_l": 0.012475891038775444, "avg_loss_unweighted": 0.043664492666721344, "avg_weight": 0.5156992673873901, "std_delta_l": 0.026372041553258896, "std_weight": 0.027110673487186432, "weighted_loss": 0.02271304279565811 }, { "avg_delta_l": 0.011779851280152798, "avg_loss_unweighted": 0.03772851079702377, "avg_weight": 0.5162030458450317, "std_delta_l": 0.024484049528837204, "std_weight": 0.027143172919750214, "weighted_loss": 0.01985332742333412 }, { "epoch": 12.069565217391304, "grad_norm": 0.564625464653722, "learning_rate": 4.087343014202049e-06, "loss": 0.0479, "step": 700 }, { "avg_delta_l": 0.01530605461448431, "avg_loss_unweighted": 0.038953348994255066, "avg_weight": 0.5121495723724365, "std_delta_l": 0.024912355467677116, "std_weight": 0.022778112441301346, "weighted_loss": 0.020223762840032578 }, { "avg_delta_l": 0.014147663488984108, "avg_loss_unweighted": 0.04363705590367317, "avg_weight": 0.5098267793655396, "std_delta_l": 0.02045004442334175, "std_weight": 0.016701165586709976, "weighted_loss": 0.022349881008267403 }, { "avg_delta_l": 0.007215358316898346, "avg_loss_unweighted": 0.03602927550673485, "avg_weight": 0.5250938534736633, "std_delta_l": 0.0296766459941864, "std_weight": 0.03962656855583191, "weighted_loss": 0.01962670311331749 }, { "avg_delta_l": 0.01339742261916399, "avg_loss_unweighted": 0.041898809373378754, "avg_weight": 0.5125083923339844, "std_delta_l": 0.023200420662760735, "std_weight": 0.020686987787485123, "weighted_loss": 0.021681562066078186 }, { "avg_delta_l": 0.01246679574251175, "avg_loss_unweighted": 0.038777220994234085, "avg_weight": 0.5120692253112793, "std_delta_l": 0.022434674203395844, "std_weight": 0.019134478643536568, "weighted_loss": 0.020025644451379776 }, { "avg_delta_l": 0.011460037901997566, "avg_loss_unweighted": 0.040449414402246475, "avg_weight": 0.5171583294868469, "std_delta_l": 0.025611136108636856, "std_weight": 0.032784800976514816, "weighted_loss": 0.021388614550232887 }, { "avg_delta_l": 0.014768804423511028, "avg_loss_unweighted": 0.03540044277906418, "avg_weight": 0.5165390968322754, "std_delta_l": 0.02864459902048111, "std_weight": 0.02954266220331192, "weighted_loss": 0.018648609519004822 }, { "avg_delta_l": 0.011136041954159737, "avg_loss_unweighted": 0.036786384880542755, "avg_weight": 0.5204024314880371, "std_delta_l": 0.027470123022794724, "std_weight": 0.03014240227639675, "weighted_loss": 0.019635537639260292 }, { "avg_delta_l": 0.005868134088814259, "avg_loss_unweighted": 0.039989255368709564, "avg_weight": 0.5210998058319092, "std_delta_l": 0.023254381492733955, "std_weight": 0.031494975090026855, "weighted_loss": 0.021404309198260307 }, { "avg_delta_l": 0.009541118517518044, "avg_loss_unweighted": 0.039417896419763565, "avg_weight": 0.5173057913780212, "std_delta_l": 0.023958928883075714, "std_weight": 0.03199068456888199, "weighted_loss": 0.02088126726448536 }, { "avg_delta_l": 0.009087742306292057, "avg_loss_unweighted": 0.03764382377266884, "avg_weight": 0.517977237701416, "std_delta_l": 0.023381197825074196, "std_weight": 0.02606300823390484, "weighted_loss": 0.01989218033850193 }, { "avg_delta_l": 0.012327689677476883, "avg_loss_unweighted": 0.036926738917827606, "avg_weight": 0.5149222016334534, "std_delta_l": 0.022921260446310043, "std_weight": 0.024909857660531998, "weighted_loss": 0.019337832927703857 }, { "avg_delta_l": 0.0066806054674088955, "avg_loss_unweighted": 0.04164127632975578, "avg_weight": 0.5232484936714172, "std_delta_l": 0.025779204443097115, "std_weight": 0.03687550500035286, "weighted_loss": 0.022281641140580177 }, { "avg_delta_l": 0.012757418677210808, "avg_loss_unweighted": 0.03502704203128815, "avg_weight": 0.5213087201118469, "std_delta_l": 0.029483234509825706, "std_weight": 0.030589155852794647, "weighted_loss": 0.01879734918475151 }, { "avg_delta_l": 0.00856703706085682, "avg_loss_unweighted": 0.03724325820803642, "avg_weight": 0.5211492776870728, "std_delta_l": 0.026219967752695084, "std_weight": 0.03167504072189331, "weighted_loss": 0.019804880023002625 }, { "avg_delta_l": 0.004968185443431139, "avg_loss_unweighted": 0.04171612858772278, "avg_weight": 0.5287861824035645, "std_delta_l": 0.03173660859465599, "std_weight": 0.05072293430566788, "weighted_loss": 0.023166460916399956 }, { "avg_delta_l": 0.00719363521784544, "avg_loss_unweighted": 0.0391356386244297, "avg_weight": 0.5194810628890991, "std_delta_l": 0.023443087935447693, "std_weight": 0.026376904919743538, "weighted_loss": 0.02071736939251423 }, { "avg_delta_l": 0.010553181171417236, "avg_loss_unweighted": 0.03923459351062775, "avg_weight": 0.5143792033195496, "std_delta_l": 0.020444277673959732, "std_weight": 0.020040100440382957, "weighted_loss": 0.02033964917063713 }, { "avg_delta_l": 0.011501937173306942, "avg_loss_unweighted": 0.03214482590556145, "avg_weight": 0.5158756971359253, "std_delta_l": 0.024345528334379196, "std_weight": 0.029337603598833084, "weighted_loss": 0.017066363245248795 }, { "avg_delta_l": 0.008137254044413567, "avg_loss_unweighted": 0.040762946009635925, "avg_weight": 0.5259820222854614, "std_delta_l": 0.029441356658935547, "std_weight": 0.03770029544830322, "weighted_loss": 0.022093988955020905 }, { "epoch": 12.243478260869566, "grad_norm": 0.4962575291670369, "learning_rate": 3.939846819342204e-06, "loss": 0.0409, "step": 710 }, { "avg_delta_l": 0.005606367718428373, "avg_loss_unweighted": 0.04362703114748001, "avg_weight": 0.5289711952209473, "std_delta_l": 0.02713017165660858, "std_weight": 0.03880583122372627, "weighted_loss": 0.0238803643733263 }, { "avg_delta_l": 0.014532240107655525, "avg_loss_unweighted": 0.040583010762929916, "avg_weight": 0.5158552527427673, "std_delta_l": 0.025350309908390045, "std_weight": 0.026681436225771904, "weighted_loss": 0.02136736549437046 }, { "avg_delta_l": 0.008254538290202618, "avg_loss_unweighted": 0.038025811314582825, "avg_weight": 0.5266331434249878, "std_delta_l": 0.032748159021139145, "std_weight": 0.05114224553108215, "weighted_loss": 0.021007532253861427 }, { "avg_delta_l": 0.003116066101938486, "avg_loss_unweighted": 0.04589524120092392, "avg_weight": 0.5322647094726562, "std_delta_l": 0.030779631808400154, "std_weight": 0.04987790435552597, "weighted_loss": 0.02519465982913971 }, { "avg_delta_l": 0.011120188981294632, "avg_loss_unweighted": 0.03618449717760086, "avg_weight": 0.5204558968544006, "std_delta_l": 0.026150798425078392, "std_weight": 0.030556483194231987, "weighted_loss": 0.01932349242269993 }, { "avg_delta_l": 0.011517558246850967, "avg_loss_unweighted": 0.036772001534700394, "avg_weight": 0.5192540884017944, "std_delta_l": 0.027979014441370964, "std_weight": 0.03118278644979, "weighted_loss": 0.019266296178102493 }, { "avg_delta_l": 0.01065575797110796, "avg_loss_unweighted": 0.03800613060593605, "avg_weight": 0.5132672786712646, "std_delta_l": 0.022120382636785507, "std_weight": 0.024600690230727196, "weighted_loss": 0.019731100648641586 }, { "avg_delta_l": 0.0028695317450910807, "avg_loss_unweighted": 0.05270853266119957, "avg_weight": 0.5388054251670837, "std_delta_l": 0.030521506443619728, "std_weight": 0.04604519158601761, "weighted_loss": 0.028985168784856796 }, { "avg_delta_l": 0.008347929455339909, "avg_loss_unweighted": 0.03446490317583084, "avg_weight": 0.5160298347473145, "std_delta_l": 0.021312030032277107, "std_weight": 0.02560799941420555, "weighted_loss": 0.01807253435254097 }, { "avg_delta_l": 0.01743946596980095, "avg_loss_unweighted": 0.034429773688316345, "avg_weight": 0.5172365307807922, "std_delta_l": 0.031829871237277985, "std_weight": 0.027944276109337807, "weighted_loss": 0.01813887432217598 }, { "avg_delta_l": 0.0025416184216737747, "avg_loss_unweighted": 0.040706321597099304, "avg_weight": 0.5314340591430664, "std_delta_l": 0.024996964260935783, "std_weight": 0.04120906814932823, "weighted_loss": 0.022381935268640518 }, { "avg_delta_l": 0.008801411837339401, "avg_loss_unweighted": 0.044198520481586456, "avg_weight": 0.5330203771591187, "std_delta_l": 0.03674332797527313, "std_weight": 0.044666290283203125, "weighted_loss": 0.024124665185809135 }, { "avg_delta_l": 0.006621087901294231, "avg_loss_unweighted": 0.04368855431675911, "avg_weight": 0.5284860730171204, "std_delta_l": 0.028904344886541367, "std_weight": 0.04103762283921242, "weighted_loss": 0.02357311174273491 }, { "avg_delta_l": 0.007868481799960136, "avg_loss_unweighted": 0.041935499757528305, "avg_weight": 0.5208508968353271, "std_delta_l": 0.023594895377755165, "std_weight": 0.031323518604040146, "weighted_loss": 0.022270016372203827 }, { "avg_delta_l": 0.004672680515795946, "avg_loss_unweighted": 0.04033816233277321, "avg_weight": 0.5306463241577148, "std_delta_l": 0.03096015378832817, "std_weight": 0.043778542429208755, "weighted_loss": 0.021794885396957397 }, { "avg_delta_l": 0.011862085200846195, "avg_loss_unweighted": 0.0380544438958168, "avg_weight": 0.5230854153633118, "std_delta_l": 0.02966161258518696, "std_weight": 0.04035523161292076, "weighted_loss": 0.020359722897410393 }, { "avg_delta_l": 0.004136145114898682, "avg_loss_unweighted": 0.040120117366313934, "avg_weight": 0.5315262079238892, "std_delta_l": 0.030945949256420135, "std_weight": 0.04938467964529991, "weighted_loss": 0.022093866020441055 }, { "avg_delta_l": 0.007858839817345142, "avg_loss_unweighted": 0.03540830686688423, "avg_weight": 0.5249948501586914, "std_delta_l": 0.02600395306944847, "std_weight": 0.03313979133963585, "weighted_loss": 0.01922091282904148 }, { "avg_delta_l": 0.006461641751229763, "avg_loss_unweighted": 0.04027874767780304, "avg_weight": 0.5251029133796692, "std_delta_l": 0.02999820187687874, "std_weight": 0.03954087570309639, "weighted_loss": 0.021791893988847733 }, { "avg_delta_l": 0.014639807865023613, "avg_loss_unweighted": 0.0360681414604187, "avg_weight": 0.5094239711761475, "std_delta_l": 0.02258385345339775, "std_weight": 0.01666027307510376, "weighted_loss": 0.01848132722079754 }, { "epoch": 12.417391304347825, "grad_norm": 0.4835453211268085, "learning_rate": 3.7933105435014727e-06, "loss": 0.0431, "step": 720 }, { "avg_delta_l": 0.006046322174370289, "avg_loss_unweighted": 0.03767108544707298, "avg_weight": 0.5280193090438843, "std_delta_l": 0.02579350769519806, "std_weight": 0.033563271164894104, "weighted_loss": 0.020536331459879875 }, { "avg_delta_l": 0.01067652739584446, "avg_loss_unweighted": 0.03640591353178024, "avg_weight": 0.5192310810089111, "std_delta_l": 0.024945514276623726, "std_weight": 0.028542978689074516, "weighted_loss": 0.019274303689599037 }, { "avg_delta_l": 0.008638911880552769, "avg_loss_unweighted": 0.04086771979928017, "avg_weight": 0.5195465683937073, "std_delta_l": 0.02283063344657421, "std_weight": 0.027247725054621696, "weighted_loss": 0.021521450951695442 }, { "avg_delta_l": 0.009676322340965271, "avg_loss_unweighted": 0.04397168755531311, "avg_weight": 0.521233856678009, "std_delta_l": 0.025643106549978256, "std_weight": 0.02972821332514286, "weighted_loss": 0.023310210555791855 }, { "avg_delta_l": 0.005082595162093639, "avg_loss_unweighted": 0.037168510258197784, "avg_weight": 0.5253480672836304, "std_delta_l": 0.026329386979341507, "std_weight": 0.03713249787688255, "weighted_loss": 0.020188556984066963 }, { "avg_delta_l": 0.005399761721491814, "avg_loss_unweighted": 0.0384652242064476, "avg_weight": 0.5253891944885254, "std_delta_l": 0.02672075666487217, "std_weight": 0.03646891564130783, "weighted_loss": 0.020538747310638428 }, { "avg_delta_l": 0.004863810259848833, "avg_loss_unweighted": 0.0378112718462944, "avg_weight": 0.527868390083313, "std_delta_l": 0.030630474910140038, "std_weight": 0.048060379922389984, "weighted_loss": 0.020680055022239685 }, { "avg_delta_l": 0.005022906698286533, "avg_loss_unweighted": 0.03257892653346062, "avg_weight": 0.5230236649513245, "std_delta_l": 0.026330607011914253, "std_weight": 0.04116044566035271, "weighted_loss": 0.017675727605819702 }, { "avg_delta_l": 0.014302506111562252, "avg_loss_unweighted": 0.03475850448012352, "avg_weight": 0.5209786891937256, "std_delta_l": 0.028686903417110443, "std_weight": 0.035436660051345825, "weighted_loss": 0.018455959856510162 }, { "avg_delta_l": 0.007998663000762463, "avg_loss_unweighted": 0.03451543301343918, "avg_weight": 0.5230783224105835, "std_delta_l": 0.025544028729200363, "std_weight": 0.032458916306495667, "weighted_loss": 0.018596017733216286 }, { "avg_delta_l": 0.0030752834863960743, "avg_loss_unweighted": 0.037381939589977264, "avg_weight": 0.5326054096221924, "std_delta_l": 0.026112273335456848, "std_weight": 0.0337403304874897, "weighted_loss": 0.020779123529791832 }, { "avg_delta_l": 0.0031064413487911224, "avg_loss_unweighted": 0.04347869008779526, "avg_weight": 0.5319485068321228, "std_delta_l": 0.03033808432519436, "std_weight": 0.04634872078895569, "weighted_loss": 0.023853491991758347 }, { "avg_delta_l": 0.008749968372285366, "avg_loss_unweighted": 0.03498774394392967, "avg_weight": 0.5262855887413025, "std_delta_l": 0.03054891899228096, "std_weight": 0.04004842787981033, "weighted_loss": 0.019074132665991783 }, { "avg_delta_l": 0.0036009931936860085, "avg_loss_unweighted": 0.039776261895895004, "avg_weight": 0.532723069190979, "std_delta_l": 0.029666908085346222, "std_weight": 0.042888447642326355, "weighted_loss": 0.02180030383169651 }, { "avg_delta_l": 0.0037320763804018497, "avg_loss_unweighted": 0.0417323112487793, "avg_weight": 0.5372153520584106, "std_delta_l": 0.034064505249261856, "std_weight": 0.05032837763428688, "weighted_loss": 0.023379044607281685 }, { "avg_delta_l": 0.0016042834613472223, "avg_loss_unweighted": 0.04242653772234917, "avg_weight": 0.5397900938987732, "std_delta_l": 0.035934992134571075, "std_weight": 0.05130891874432564, "weighted_loss": 0.024007275700569153 }, { "avg_delta_l": 0.006709398236125708, "avg_loss_unweighted": 0.03257424756884575, "avg_weight": 0.5228294730186462, "std_delta_l": 0.023093359544873238, "std_weight": 0.029908111318945885, "weighted_loss": 0.017665738239884377 }, { "avg_delta_l": 0.0028382819145917892, "avg_loss_unweighted": 0.036173947155475616, "avg_weight": 0.5336257815361023, "std_delta_l": 0.03212214261293411, "std_weight": 0.046353649348020554, "weighted_loss": 0.02026224322617054 }, { "avg_delta_l": 0.002908945083618164, "avg_loss_unweighted": 0.04227171465754509, "avg_weight": 0.5307292938232422, "std_delta_l": 0.028031539171934128, "std_weight": 0.04495471343398094, "weighted_loss": 0.023291492834687233 }, { "avg_delta_l": 0.008916283026337624, "avg_loss_unweighted": 0.03675375133752823, "avg_weight": 0.52337247133255, "std_delta_l": 0.02803809382021427, "std_weight": 0.0368640311062336, "weighted_loss": 0.01975383795797825 }, { "epoch": 12.591304347826087, "grad_norm": 0.4561616752036166, "learning_rate": 3.6478668684099423e-06, "loss": 0.0415, "step": 730 }, { "avg_delta_l": 0.001084816874936223, "avg_loss_unweighted": 0.03905269131064415, "avg_weight": 0.5384596586227417, "std_delta_l": 0.03029107302427292, "std_weight": 0.04520456865429878, "weighted_loss": 0.02193097583949566 }, { "avg_delta_l": 0.010310097597539425, "avg_loss_unweighted": 0.03679078444838524, "avg_weight": 0.5225501656532288, "std_delta_l": 0.02798185497522354, "std_weight": 0.03049168549478054, "weighted_loss": 0.01973552256822586 }, { "avg_delta_l": 0.006601380184292793, "avg_loss_unweighted": 0.03921535238623619, "avg_weight": 0.5291938185691833, "std_delta_l": 0.031357936561107635, "std_weight": 0.04578167945146561, "weighted_loss": 0.021673545241355896 }, { "avg_delta_l": 0.005234858486801386, "avg_loss_unweighted": 0.03319135680794716, "avg_weight": 0.5240393280982971, "std_delta_l": 0.025220438838005066, "std_weight": 0.034589748829603195, "weighted_loss": 0.017815425992012024 }, { "avg_delta_l": 0.005944495089352131, "avg_loss_unweighted": 0.038929641246795654, "avg_weight": 0.532443106174469, "std_delta_l": 0.026316234841942787, "std_weight": 0.03404345363378525, "weighted_loss": 0.02125980146229267 }, { "avg_delta_l": 0.006637944374233484, "avg_loss_unweighted": 0.033410631120204926, "avg_weight": 0.5269819498062134, "std_delta_l": 0.027147332206368446, "std_weight": 0.04000638425350189, "weighted_loss": 0.01842779479920864 }, { "avg_delta_l": 0.003812123090028763, "avg_loss_unweighted": 0.04254688322544098, "avg_weight": 0.5281292796134949, "std_delta_l": 0.02602992206811905, "std_weight": 0.03505413606762886, "weighted_loss": 0.023087453097105026 }, { "avg_delta_l": -6.32305454928428e-05, "avg_loss_unweighted": 0.040553655475378036, "avg_weight": 0.5350451469421387, "std_delta_l": 0.03006601706147194, "std_weight": 0.05065522342920303, "weighted_loss": 0.022411959245800972 }, { "avg_delta_l": 0.011962939985096455, "avg_loss_unweighted": 0.035865284502506256, "avg_weight": 0.5260292291641235, "std_delta_l": 0.038373757153749466, "std_weight": 0.03661678358912468, "weighted_loss": 0.019430577754974365 }, { "avg_delta_l": 0.004694157280027866, "avg_loss_unweighted": 0.039301857352256775, "avg_weight": 0.5300775766372681, "std_delta_l": 0.029976151883602142, "std_weight": 0.04415563493967056, "weighted_loss": 0.02128567174077034 }, { "avg_delta_l": -0.0006866027833893895, "avg_loss_unweighted": 0.04160310700535774, "avg_weight": 0.5418205261230469, "std_delta_l": 0.03402584418654442, "std_weight": 0.05263964831829071, "weighted_loss": 0.02342997118830681 }, { "avg_delta_l": -0.0009521809406578541, "avg_loss_unweighted": 0.04290139302611351, "avg_weight": 0.5393744111061096, "std_delta_l": 0.0292985737323761, "std_weight": 0.04868190735578537, "weighted_loss": 0.023631220683455467 }, { "avg_delta_l": 0.0037829866632819176, "avg_loss_unweighted": 0.043293945491313934, "avg_weight": 0.5291488170623779, "std_delta_l": 0.02924472652375698, "std_weight": 0.04731006920337677, "weighted_loss": 0.024022173136472702 }, { "avg_delta_l": 0.0017125424928963184, "avg_loss_unweighted": 0.03930502012372017, "avg_weight": 0.5304291248321533, "std_delta_l": 0.02683212235569954, "std_weight": 0.04162386432290077, "weighted_loss": 0.02146233059465885 }, { "avg_delta_l": 0.005799426231533289, "avg_loss_unweighted": 0.036522138863801956, "avg_weight": 0.5269314050674438, "std_delta_l": 0.02822590246796608, "std_weight": 0.03887972980737686, "weighted_loss": 0.019856855273246765 }, { "avg_delta_l": 0.006437474396079779, "avg_loss_unweighted": 0.04074575752019882, "avg_weight": 0.5287961959838867, "std_delta_l": 0.030316505581140518, "std_weight": 0.039191119372844696, "weighted_loss": 0.02208494395017624 }, { "avg_delta_l": 0.003122807713225484, "avg_loss_unweighted": 0.04179910197854042, "avg_weight": 0.533478319644928, "std_delta_l": 0.03014119155704975, "std_weight": 0.05207173526287079, "weighted_loss": 0.023025088012218475 }, { "avg_delta_l": 0.007409951649606228, "avg_loss_unweighted": 0.03901286423206329, "avg_weight": 0.5230392217636108, "std_delta_l": 0.025890713557600975, "std_weight": 0.035779066383838654, "weighted_loss": 0.020798344165086746 }, { "avg_delta_l": 0.016111738979816437, "avg_loss_unweighted": 0.03553181141614914, "avg_weight": 0.5211297273635864, "std_delta_l": 0.02961360104382038, "std_weight": 0.026703322306275368, "weighted_loss": 0.018721340224146843 }, { "avg_delta_l": 0.0022419029846787453, "avg_loss_unweighted": 0.04248819872736931, "avg_weight": 0.5326842665672302, "std_delta_l": 0.028275012969970703, "std_weight": 0.04253506287932396, "weighted_loss": 0.023137472569942474 }, { "epoch": 12.765217391304347, "grad_norm": 0.48826282502988827, "learning_rate": 3.5036474864989294e-06, "loss": 0.0427, "step": 740 }, { "avg_delta_l": 0.005063657648861408, "avg_loss_unweighted": 0.03837164118885994, "avg_weight": 0.5275245904922485, "std_delta_l": 0.02839847095310688, "std_weight": 0.04026840254664421, "weighted_loss": 0.020603690296411514 }, { "avg_delta_l": 0.002708614803850651, "avg_loss_unweighted": 0.04000793769955635, "avg_weight": 0.5306394100189209, "std_delta_l": 0.02922549843788147, "std_weight": 0.04354063794016838, "weighted_loss": 0.021723700687289238 }, { "avg_delta_l": -0.00391701003536582, "avg_loss_unweighted": 0.04810955002903938, "avg_weight": 0.54830402135849, "std_delta_l": 0.03510967642068863, "std_weight": 0.057452693581581116, "weighted_loss": 0.027151724323630333 }, { "avg_delta_l": -0.005018075928092003, "avg_loss_unweighted": 0.04059501364827156, "avg_weight": 0.5491398572921753, "std_delta_l": 0.033585112541913986, "std_weight": 0.058901190757751465, "weighted_loss": 0.023292433470487595 }, { "avg_delta_l": 0.002173080574721098, "avg_loss_unweighted": 0.03789004683494568, "avg_weight": 0.5322178602218628, "std_delta_l": 0.03058973141014576, "std_weight": 0.051359739154577255, "weighted_loss": 0.021002162247896194 }, { "avg_delta_l": 0.0016965523827821016, "avg_loss_unweighted": 0.03596000000834465, "avg_weight": 0.5353351831436157, "std_delta_l": 0.03042777068912983, "std_weight": 0.04568346217274666, "weighted_loss": 0.020007168874144554 }, { "avg_delta_l": 0.005842996761202812, "avg_loss_unweighted": 0.033834557980298996, "avg_weight": 0.5315293669700623, "std_delta_l": 0.032198868691921234, "std_weight": 0.04750657081604004, "weighted_loss": 0.018852485343813896 }, { "avg_delta_l": 0.0038822395727038383, "avg_loss_unweighted": 0.0325370617210865, "avg_weight": 0.5286773443222046, "std_delta_l": 0.02513190172612667, "std_weight": 0.03202597424387932, "weighted_loss": 0.017685377970337868 }, { "avg_delta_l": -0.002033813390880823, "avg_loss_unweighted": 0.04009079188108444, "avg_weight": 0.5393649339675903, "std_delta_l": 0.03062843717634678, "std_weight": 0.05391882359981537, "weighted_loss": 0.022895447909832 }, { "avg_delta_l": -0.001174373086541891, "avg_loss_unweighted": 0.04504045099020004, "avg_weight": 0.542548418045044, "std_delta_l": 0.030945971608161926, "std_weight": 0.05055717006325722, "weighted_loss": 0.025362884625792503 }, { "avg_delta_l": -0.003973153419792652, "avg_loss_unweighted": 0.04141564294695854, "avg_weight": 0.5480263233184814, "std_delta_l": 0.035655274987220764, "std_weight": 0.06251402944326401, "weighted_loss": 0.023713385686278343 }, { "avg_delta_l": 0.003881152020767331, "avg_loss_unweighted": 0.039596669375896454, "avg_weight": 0.5328916907310486, "std_delta_l": 0.030389105901122093, "std_weight": 0.04530708119273186, "weighted_loss": 0.021629881113767624 }, { "avg_delta_l": -0.0035937968641519547, "avg_loss_unweighted": 0.04153577610850334, "avg_weight": 0.5487152934074402, "std_delta_l": 0.03339862450957298, "std_weight": 0.05376794561743736, "weighted_loss": 0.023992082104086876 }, { "avg_delta_l": 0.004437380004674196, "avg_loss_unweighted": 0.036118052899837494, "avg_weight": 0.5337016582489014, "std_delta_l": 0.028214087709784508, "std_weight": 0.03554180637001991, "weighted_loss": 0.020082809031009674 }, { "avg_delta_l": -0.0021772156469523907, "avg_loss_unweighted": 0.03830915316939354, "avg_weight": 0.5370679497718811, "std_delta_l": 0.022627752274274826, "std_weight": 0.03757757693529129, "weighted_loss": 0.021163472905755043 }, { "avg_delta_l": 0.007390453480184078, "avg_loss_unweighted": 0.03825569152832031, "avg_weight": 0.5315348505973816, "std_delta_l": 0.033093538135290146, "std_weight": 0.04364524781703949, "weighted_loss": 0.021087368950247765 }, { "avg_delta_l": -0.0008105045417323709, "avg_loss_unweighted": 0.041410066187381744, "avg_weight": 0.5444734692573547, "std_delta_l": 0.03638144209980965, "std_weight": 0.057183511555194855, "weighted_loss": 0.02368897758424282 }, { "avg_delta_l": -0.002670431975275278, "avg_loss_unweighted": 0.04123661667108536, "avg_weight": 0.5456476211547852, "std_delta_l": 0.03280457481741905, "std_weight": 0.052484773099422455, "weighted_loss": 0.023571569472551346 }, { "avg_delta_l": 0.003953580278903246, "avg_loss_unweighted": 0.03846660628914833, "avg_weight": 0.538190484046936, "std_delta_l": 0.0330384336411953, "std_weight": 0.04419482871890068, "weighted_loss": 0.021371953189373016 }, { "avg_delta_l": -0.0014957708772271872, "avg_loss_unweighted": 0.03637784346938133, "avg_weight": 0.5428191423416138, "std_delta_l": 0.032377708703279495, "std_weight": 0.05538231506943703, "weighted_loss": 0.020792869850993156 }, { "epoch": 12.939130434782609, "grad_norm": 0.4927133279287555, "learning_rate": 3.360782981659659e-06, "loss": 0.044, "step": 750 }, { "avg_delta_l": -0.008784648962318897, "avg_loss_unweighted": 0.040381595492362976, "avg_weight": 0.5553438067436218, "std_delta_l": 0.03143077716231346, "std_weight": 0.05894111841917038, "weighted_loss": 0.02365858107805252 }, { "avg_delta_l": 0.002928657690063119, "avg_loss_unweighted": 0.03801768273115158, "avg_weight": 0.5369704961776733, "std_delta_l": 0.033361807465553284, "std_weight": 0.051803357899188995, "weighted_loss": 0.021502960473299026 }, { "avg_delta_l": -0.0031433356925845146, "avg_loss_unweighted": 0.04271329939365387, "avg_weight": 0.5436152815818787, "std_delta_l": 0.029354635626077652, "std_weight": 0.050325796008110046, "weighted_loss": 0.02393953502178192 }, { "avg_delta_l": 0.003242712700739503, "avg_loss_unweighted": 0.0393250472843647, "avg_weight": 0.5355383157730103, "std_delta_l": 0.032900046557188034, "std_weight": 0.04856597259640694, "weighted_loss": 0.021948521956801414 }, { "avg_delta_l": 0.005221336614340544, "avg_loss_unweighted": 0.03670394793152809, "avg_weight": 0.5317630767822266, "std_delta_l": 0.03096821717917919, "std_weight": 0.04608176648616791, "weighted_loss": 0.02037758380174637 }, { "avg_delta_l": -0.004486467689275742, "avg_loss_unweighted": 0.04629351198673248, "avg_weight": 0.5483805537223816, "std_delta_l": 0.03166204318404198, "std_weight": 0.05400886759161949, "weighted_loss": 0.02671744115650654 }, { "avg_delta_l": -0.001996462233364582, "avg_loss_unweighted": 0.035551369190216064, "avg_weight": 0.537864089012146, "std_delta_l": 0.026613635942339897, "std_weight": 0.04370788112282753, "weighted_loss": 0.019887324422597885 }, { "avg_delta_l": 0.013083776459097862, "avg_loss_unweighted": 0.029564373195171356, "avg_weight": 0.5088145136833191, "std_delta_l": 0.02017355151474476, "std_weight": 0.014629106037318707, "weighted_loss": 0.015185517258942127 }, { "avg_delta_l": 0.01206301897764206, "avg_loss_unweighted": 0.029715200886130333, "avg_weight": 0.5109493136405945, "std_delta_l": 0.021987855434417725, "std_weight": 0.017706580460071564, "weighted_loss": 0.015454171225428581 }, { "avg_delta_l": 0.005191322416067123, "avg_loss_unweighted": 0.03393559902906418, "avg_weight": 0.5172424912452698, "std_delta_l": 0.019262244924902916, "std_weight": 0.02487081103026867, "weighted_loss": 0.01790175586938858 }, { "avg_delta_l": 0.009159224107861519, "avg_loss_unweighted": 0.030409207567572594, "avg_weight": 0.5106590986251831, "std_delta_l": 0.019068270921707153, "std_weight": 0.017753032967448235, "weighted_loss": 0.015643946826457977 }, { "avg_delta_l": 0.007654112298041582, "avg_loss_unweighted": 0.029922127723693848, "avg_weight": 0.5117349624633789, "std_delta_l": 0.018220888450741768, "std_weight": 0.02053939923644066, "weighted_loss": 0.015710527077317238 }, { "avg_delta_l": 0.006125229876488447, "avg_loss_unweighted": 0.03292452543973923, "avg_weight": 0.5152755975723267, "std_delta_l": 0.018368467688560486, "std_weight": 0.022494832053780556, "weighted_loss": 0.017273662611842155 }, { "avg_delta_l": 0.008652857504785061, "avg_loss_unweighted": 0.034067705273628235, "avg_weight": 0.5074335932731628, "std_delta_l": 0.014630327001214027, "std_weight": 0.011544846929609776, "weighted_loss": 0.017401190474629402 }, { "avg_delta_l": 0.002556820400059223, "avg_loss_unweighted": 0.03207341954112053, "avg_weight": 0.520553708076477, "std_delta_l": 0.019164450466632843, "std_weight": 0.031196581199765205, "weighted_loss": 0.01695915497839451 }, { "avg_delta_l": 0.00451890891417861, "avg_loss_unweighted": 0.033810749650001526, "avg_weight": 0.5204920768737793, "std_delta_l": 0.020638180896639824, "std_weight": 0.030595608055591583, "weighted_loss": 0.018019091337919235 }, { "avg_delta_l": 0.0041730450466275215, "avg_loss_unweighted": 0.03069499135017395, "avg_weight": 0.5167343616485596, "std_delta_l": 0.018951624631881714, "std_weight": 0.029095550999045372, "weighted_loss": 0.016095969825983047 }, { "avg_delta_l": 0.009412317536771297, "avg_loss_unweighted": 0.032952144742012024, "avg_weight": 0.515210747718811, "std_delta_l": 0.022401636466383934, "std_weight": 0.023937730118632317, "weighted_loss": 0.017124973237514496 }, { "avg_delta_l": 0.00867992453277111, "avg_loss_unweighted": 0.028750844299793243, "avg_weight": 0.5114758014678955, "std_delta_l": 0.01839306391775608, "std_weight": 0.018892835825681686, "weighted_loss": 0.014912180602550507 }, { "epoch": 13.104347826086956, "grad_norm": 0.5238307097400402, "learning_rate": 3.2194027110056627e-06, "loss": 0.0356, "step": 760 }, { "avg_delta_l": 0.009061422199010849, "avg_loss_unweighted": 0.027737488970160484, "avg_weight": 0.5109638571739197, "std_delta_l": 0.017961163073778152, "std_weight": 0.01961039938032627, "weighted_loss": 0.014287864789366722 }, { "avg_delta_l": 0.0022087455727159977, "avg_loss_unweighted": 0.028914207592606544, "avg_weight": 0.5238832831382751, "std_delta_l": 0.023362532258033752, "std_weight": 0.04094228148460388, "weighted_loss": 0.015694575384259224 }, { "avg_delta_l": 0.010214826092123985, "avg_loss_unweighted": 0.026806678622961044, "avg_weight": 0.5107059478759766, "std_delta_l": 0.01898316852748394, "std_weight": 0.018063319846987724, "weighted_loss": 0.013901785016059875 }, { "avg_delta_l": 0.007484592031687498, "avg_loss_unweighted": 0.03549060970544815, "avg_weight": 0.5161688923835754, "std_delta_l": 0.020482854917645454, "std_weight": 0.026972979307174683, "weighted_loss": 0.018767528235912323 }, { "avg_delta_l": 0.008220926858484745, "avg_loss_unweighted": 0.027489176020026207, "avg_weight": 0.5094420909881592, "std_delta_l": 0.014834022149443626, "std_weight": 0.012522023171186447, "weighted_loss": 0.014226267114281654 }, { "avg_delta_l": 0.008864025585353374, "avg_loss_unweighted": 0.02942274697124958, "avg_weight": 0.5101755261421204, "std_delta_l": 0.01599956676363945, "std_weight": 0.013177772052586079, "weighted_loss": 0.015148818492889404 }, { "avg_delta_l": 0.00642052199691534, "avg_loss_unweighted": 0.02872828021645546, "avg_weight": 0.5177497267723083, "std_delta_l": 0.0222305990755558, "std_weight": 0.030468132346868515, "weighted_loss": 0.015151950530707836 }, { "avg_delta_l": 0.006359041202813387, "avg_loss_unweighted": 0.030935419723391533, "avg_weight": 0.514680027961731, "std_delta_l": 0.01864134706556797, "std_weight": 0.022539744153618813, "weighted_loss": 0.016061753034591675 }, { "avg_delta_l": 0.003416647668927908, "avg_loss_unweighted": 0.029558774083852768, "avg_weight": 0.519599437713623, "std_delta_l": 0.020561618730425835, "std_weight": 0.0326782763004303, "weighted_loss": 0.01577908918261528 }, { "avg_delta_l": 0.011452366597950459, "avg_loss_unweighted": 0.026002638041973114, "avg_weight": 0.5109701156616211, "std_delta_l": 0.020511768758296967, "std_weight": 0.020601119846105576, "weighted_loss": 0.013518724590539932 }, { "avg_delta_l": 0.006058642640709877, "avg_loss_unweighted": 0.031867820769548416, "avg_weight": 0.5238619446754456, "std_delta_l": 0.02605695277452469, "std_weight": 0.036459583789110184, "weighted_loss": 0.017300546169281006 }, { "avg_delta_l": 0.005141218192875385, "avg_loss_unweighted": 0.03109266236424446, "avg_weight": 0.5143938660621643, "std_delta_l": 0.017185309901833534, "std_weight": 0.023797303438186646, "weighted_loss": 0.01618482917547226 }, { "avg_delta_l": 0.009578592143952847, "avg_loss_unweighted": 0.03232244402170181, "avg_weight": 0.5212632417678833, "std_delta_l": 0.028014404699206352, "std_weight": 0.031934358179569244, "weighted_loss": 0.017376938834786415 }, { "avg_delta_l": 0.01207501906901598, "avg_loss_unweighted": 0.02728513814508915, "avg_weight": 0.5114628076553345, "std_delta_l": 0.02080288901925087, "std_weight": 0.01969420351088047, "weighted_loss": 0.014125853776931763 }, { "avg_delta_l": 0.009223895147442818, "avg_loss_unweighted": 0.025796297937631607, "avg_weight": 0.5129257440567017, "std_delta_l": 0.019430816173553467, "std_weight": 0.02142515406012535, "weighted_loss": 0.013526358641684055 }, { "avg_delta_l": 0.002336716279387474, "avg_loss_unweighted": 0.027709171175956726, "avg_weight": 0.5251560211181641, "std_delta_l": 0.026199158281087875, "std_weight": 0.04239947348833084, "weighted_loss": 0.015153593383729458 }, { "avg_delta_l": 0.0038951821625232697, "avg_loss_unweighted": 0.030672257766127586, "avg_weight": 0.5229809880256653, "std_delta_l": 0.022816669195890427, "std_weight": 0.03149303048849106, "weighted_loss": 0.016559746116399765 }, { "avg_delta_l": 0.004917393904179335, "avg_loss_unweighted": 0.03007751703262329, "avg_weight": 0.5201848149299622, "std_delta_l": 0.02280782349407673, "std_weight": 0.027296632528305054, "weighted_loss": 0.016071053221821785 }, { "avg_delta_l": 0.00355146243236959, "avg_loss_unweighted": 0.033053215593099594, "avg_weight": 0.5250471234321594, "std_delta_l": 0.02592962607741356, "std_weight": 0.04074675217270851, "weighted_loss": 0.01793834939599037 }, { "avg_delta_l": 0.006862847600132227, "avg_loss_unweighted": 0.028400929644703865, "avg_weight": 0.5225475430488586, "std_delta_l": 0.02627338282763958, "std_weight": 0.03649240732192993, "weighted_loss": 0.015388354659080505 }, { "epoch": 13.278260869565218, "grad_norm": 0.5295336310300334, "learning_rate": 3.079634687745976e-06, "loss": 0.0312, "step": 770 }, { "avg_delta_l": 0.0036983839236199856, "avg_loss_unweighted": 0.029038656502962112, "avg_weight": 0.5195249915122986, "std_delta_l": 0.021146338433027267, "std_weight": 0.03158460557460785, "weighted_loss": 0.015459232963621616 }, { "avg_delta_l": 0.005444896407425404, "avg_loss_unweighted": 0.03156512230634689, "avg_weight": 0.5237177610397339, "std_delta_l": 0.026303783059120178, "std_weight": 0.036887649446725845, "weighted_loss": 0.01680937595665455 }, { "avg_delta_l": 0.0031298077665269375, "avg_loss_unweighted": 0.03272430598735809, "avg_weight": 0.526183009147644, "std_delta_l": 0.025186648592352867, "std_weight": 0.03646155446767807, "weighted_loss": 0.017649689689278603 }, { "avg_delta_l": 0.006154015194624662, "avg_loss_unweighted": 0.027713553979992867, "avg_weight": 0.522866427898407, "std_delta_l": 0.026928400620818138, "std_weight": 0.03764927387237549, "weighted_loss": 0.015095039270818233 }, { "avg_delta_l": 0.007113649509847164, "avg_loss_unweighted": 0.03240425884723663, "avg_weight": 0.5216661691665649, "std_delta_l": 0.024603450670838356, "std_weight": 0.03446877747774124, "weighted_loss": 0.017404912039637566 }, { "avg_delta_l": 0.007956906221807003, "avg_loss_unweighted": 0.03021610528230667, "avg_weight": 0.5208377242088318, "std_delta_l": 0.024944892153143883, "std_weight": 0.029873047024011612, "weighted_loss": 0.01613638922572136 }, { "avg_delta_l": 0.007059547584503889, "avg_loss_unweighted": 0.03003866598010063, "avg_weight": 0.517188310623169, "std_delta_l": 0.02055264823138714, "std_weight": 0.023778289556503296, "weighted_loss": 0.01575627736747265 }, { "avg_delta_l": 0.007629160303622484, "avg_loss_unweighted": 0.028421424329280853, "avg_weight": 0.5237328410148621, "std_delta_l": 0.023576941341161728, "std_weight": 0.027546308934688568, "weighted_loss": 0.01544872298836708 }, { "avg_delta_l": 0.007511205971240997, "avg_loss_unweighted": 0.031667374074459076, "avg_weight": 0.5204498767852783, "std_delta_l": 0.024752024561166763, "std_weight": 0.03177943080663681, "weighted_loss": 0.016827667132019997 }, { "avg_delta_l": 0.0031190153677016497, "avg_loss_unweighted": 0.03672913461923599, "avg_weight": 0.5237942934036255, "std_delta_l": 0.022078022360801697, "std_weight": 0.03991223871707916, "weighted_loss": 0.019612692296504974 }, { "avg_delta_l": 0.0024301165249198675, "avg_loss_unweighted": 0.03014867566525936, "avg_weight": 0.5221390724182129, "std_delta_l": 0.01964954473078251, "std_weight": 0.033518727868795395, "weighted_loss": 0.01623954251408577 }, { "avg_delta_l": 0.0029502708930522203, "avg_loss_unweighted": 0.03540407866239548, "avg_weight": 0.5273206830024719, "std_delta_l": 0.02606050856411457, "std_weight": 0.037751685827970505, "weighted_loss": 0.01915358379483223 }, { "avg_delta_l": 0.001450365292839706, "avg_loss_unweighted": 0.03339226171374321, "avg_weight": 0.5234003067016602, "std_delta_l": 0.02049119956791401, "std_weight": 0.03284451737999916, "weighted_loss": 0.017859943211078644 }, { "avg_delta_l": 0.007386351469904184, "avg_loss_unweighted": 0.03093293495476246, "avg_weight": 0.5192537903785706, "std_delta_l": 0.024601809680461884, "std_weight": 0.03274558484554291, "weighted_loss": 0.016483785584568977 }, { "avg_delta_l": 0.0006296245846897364, "avg_loss_unweighted": 0.03204323723912239, "avg_weight": 0.5272562503814697, "std_delta_l": 0.02374439686536789, "std_weight": 0.03942493721842766, "weighted_loss": 0.017252808436751366 }, { "avg_delta_l": 0.009248675778508186, "avg_loss_unweighted": 0.028504300862550735, "avg_weight": 0.5207663774490356, "std_delta_l": 0.0278983972966671, "std_weight": 0.03671899810433388, "weighted_loss": 0.01537078246474266 }, { "avg_delta_l": 0.010953323915600777, "avg_loss_unweighted": 0.03215743973851204, "avg_weight": 0.5105012655258179, "std_delta_l": 0.021087102591991425, "std_weight": 0.016610175371170044, "weighted_loss": 0.016542160883545876 }, { "avg_delta_l": -0.0025904919020831585, "avg_loss_unweighted": 0.028725896030664444, "avg_weight": 0.5332763195037842, "std_delta_l": 0.02424277924001217, "std_weight": 0.0411752425134182, "weighted_loss": 0.01596790924668312 }, { "avg_delta_l": 0.003328189253807068, "avg_loss_unweighted": 0.03316833823919296, "avg_weight": 0.5296511054039001, "std_delta_l": 0.026515867561101913, "std_weight": 0.03928050026297569, "weighted_loss": 0.018041079863905907 }, { "avg_delta_l": 0.005148459691554308, "avg_loss_unweighted": 0.027287228032946587, "avg_weight": 0.5204283595085144, "std_delta_l": 0.021473444998264313, "std_weight": 0.02956438809633255, "weighted_loss": 0.01462307758629322 }, { "epoch": 13.452173913043477, "grad_norm": 0.4547044675750186, "learning_rate": 2.9416054652751834e-06, "loss": 0.0334, "step": 780 }, { "avg_delta_l": 0.00858941487967968, "avg_loss_unweighted": 0.02678263746201992, "avg_weight": 0.5155643224716187, "std_delta_l": 0.02020842954516411, "std_weight": 0.021225113421678543, "weighted_loss": 0.014135145582258701 }, { "avg_delta_l": 0.0048292228020727634, "avg_loss_unweighted": 0.025902966037392616, "avg_weight": 0.5184853076934814, "std_delta_l": 0.02076331526041031, "std_weight": 0.02905958518385887, "weighted_loss": 0.013793892227113247 }, { "avg_delta_l": -0.0013437126763164997, "avg_loss_unweighted": 0.030489685013890266, "avg_weight": 0.5292961001396179, "std_delta_l": 0.02309195138514042, "std_weight": 0.04305421561002731, "weighted_loss": 0.016904549673199654 }, { "avg_delta_l": 0.004300737287849188, "avg_loss_unweighted": 0.03222600370645523, "avg_weight": 0.5195502042770386, "std_delta_l": 0.0199319776147604, "std_weight": 0.030343366786837578, "weighted_loss": 0.017221694812178612 }, { "avg_delta_l": 0.004013741388916969, "avg_loss_unweighted": 0.026153361424803734, "avg_weight": 0.5235249996185303, "std_delta_l": 0.02437237650156021, "std_weight": 0.0318162739276886, "weighted_loss": 0.014070802368223667 }, { "avg_delta_l": 0.001604766584932804, "avg_loss_unweighted": 0.03208713233470917, "avg_weight": 0.5302964448928833, "std_delta_l": 0.024596981704235077, "std_weight": 0.03771797567605972, "weighted_loss": 0.017730658873915672 }, { "avg_delta_l": 0.00126351579092443, "avg_loss_unweighted": 0.029369661584496498, "avg_weight": 0.5282137393951416, "std_delta_l": 0.02204788662493229, "std_weight": 0.033849265426397324, "weighted_loss": 0.01595006138086319 }, { "avg_delta_l": 0.0013745655305683613, "avg_loss_unweighted": 0.030366800725460052, "avg_weight": 0.5256690382957458, "std_delta_l": 0.020907782018184662, "std_weight": 0.03377273678779602, "weighted_loss": 0.016444532200694084 }, { "avg_delta_l": 0.0023513471242040396, "avg_loss_unweighted": 0.03403111919760704, "avg_weight": 0.5244309902191162, "std_delta_l": 0.022693850100040436, "std_weight": 0.03800411894917488, "weighted_loss": 0.018323061987757683 }, { "avg_delta_l": 0.008388285525143147, "avg_loss_unweighted": 0.028649862855672836, "avg_weight": 0.5188819766044617, "std_delta_l": 0.020516861230134964, "std_weight": 0.024442685768008232, "weighted_loss": 0.015280086547136307 }, { "avg_delta_l": 0.0012646408285945654, "avg_loss_unweighted": 0.03386271744966507, "avg_weight": 0.5332640409469604, "std_delta_l": 0.030405601486563683, "std_weight": 0.04963567852973938, "weighted_loss": 0.018675677478313446 }, { "avg_delta_l": 0.004908549599349499, "avg_loss_unweighted": 0.029639918357133865, "avg_weight": 0.5236918330192566, "std_delta_l": 0.02369161695241928, "std_weight": 0.03512848541140556, "weighted_loss": 0.015977121889591217 }, { "avg_delta_l": -0.001632578088901937, "avg_loss_unweighted": 0.0340530164539814, "avg_weight": 0.5292791128158569, "std_delta_l": 0.023390349000692368, "std_weight": 0.040494345128536224, "weighted_loss": 0.01859317161142826 }, { "avg_delta_l": 0.008860113099217415, "avg_loss_unweighted": 0.026326488703489304, "avg_weight": 0.5157796740531921, "std_delta_l": 0.022426148876547813, "std_weight": 0.02708405628800392, "weighted_loss": 0.013912410475313663 }, { "avg_delta_l": 0.0035839348565787077, "avg_loss_unweighted": 0.03099398873746395, "avg_weight": 0.5292546153068542, "std_delta_l": 0.025365497916936874, "std_weight": 0.03196438401937485, "weighted_loss": 0.01704656332731247 }, { "avg_delta_l": 0.001536943600513041, "avg_loss_unweighted": 0.029500287026166916, "avg_weight": 0.5291199088096619, "std_delta_l": 0.02700100652873516, "std_weight": 0.04291730746626854, "weighted_loss": 0.016405582427978516 }, { "avg_delta_l": 0.001299800118431449, "avg_loss_unweighted": 0.028833571821451187, "avg_weight": 0.5242658853530884, "std_delta_l": 0.021754072979092598, "std_weight": 0.035562559962272644, "weighted_loss": 0.015481449663639069 }, { "avg_delta_l": 0.006561345420777798, "avg_loss_unweighted": 0.029116351157426834, "avg_weight": 0.520220160484314, "std_delta_l": 0.02356567792594433, "std_weight": 0.03108244575560093, "weighted_loss": 0.015483674593269825 }, { "avg_delta_l": -0.004190878476947546, "avg_loss_unweighted": 0.03241670876741409, "avg_weight": 0.5386792421340942, "std_delta_l": 0.02417294681072235, "std_weight": 0.04548211395740509, "weighted_loss": 0.018046507611870766 }, { "avg_delta_l": 0.0015591317787766457, "avg_loss_unweighted": 0.03423716872930527, "avg_weight": 0.5323066711425781, "std_delta_l": 0.026510296389460564, "std_weight": 0.041698820888996124, "weighted_loss": 0.018988830968737602 }, { "epoch": 13.626086956521739, "grad_norm": 0.43530060822812533, "learning_rate": 2.805440022585247e-06, "loss": 0.0328, "step": 790 }, { "avg_delta_l": -0.0008623439352959394, "avg_loss_unweighted": 0.030426090583205223, "avg_weight": 0.5317378640174866, "std_delta_l": 0.022350989282131195, "std_weight": 0.034130681306123734, "weighted_loss": 0.016725294291973114 }, { "avg_delta_l": 0.007919467985630035, "avg_loss_unweighted": 0.028108730912208557, "avg_weight": 0.5226108431816101, "std_delta_l": 0.027315255254507065, "std_weight": 0.03496699780225754, "weighted_loss": 0.01503946166485548 }, { "avg_delta_l": 0.004016587510704994, "avg_loss_unweighted": 0.031566835939884186, "avg_weight": 0.5249668955802917, "std_delta_l": 0.02647855132818222, "std_weight": 0.04011552035808563, "weighted_loss": 0.017071684822440147 }, { "avg_delta_l": 0.006625744979828596, "avg_loss_unweighted": 0.03141187131404877, "avg_weight": 0.5262794494628906, "std_delta_l": 0.0277144405990839, "std_weight": 0.03477250412106514, "weighted_loss": 0.01709679700434208 }, { "avg_delta_l": 0.008712222799658775, "avg_loss_unweighted": 0.02525898814201355, "avg_weight": 0.5161210894584656, "std_delta_l": 0.018890293315052986, "std_weight": 0.02226427011191845, "weighted_loss": 0.013404757715761662 }, { "avg_delta_l": 0.005550491623580456, "avg_loss_unweighted": 0.02778392657637596, "avg_weight": 0.5195234417915344, "std_delta_l": 0.021031973883509636, "std_weight": 0.030266979709267616, "weighted_loss": 0.014896771870553493 }, { "avg_delta_l": 0.002060086000710726, "avg_loss_unweighted": 0.034098368138074875, "avg_weight": 0.5318828821182251, "std_delta_l": 0.02454783208668232, "std_weight": 0.035908378660678864, "weighted_loss": 0.01861780881881714 }, { "avg_delta_l": 0.006907880771905184, "avg_loss_unweighted": 0.02916054055094719, "avg_weight": 0.5224606990814209, "std_delta_l": 0.024729445576667786, "std_weight": 0.032012805342674255, "weighted_loss": 0.015712227672338486 }, { "avg_delta_l": 0.006273357197642326, "avg_loss_unweighted": 0.02850138396024704, "avg_weight": 0.5180254578590393, "std_delta_l": 0.020054763182997704, "std_weight": 0.026769939810037613, "weighted_loss": 0.015160009264945984 }, { "avg_delta_l": 0.004785592667758465, "avg_loss_unweighted": 0.02872200682759285, "avg_weight": 0.5153874158859253, "std_delta_l": 0.019161663949489594, "std_weight": 0.02634255588054657, "weighted_loss": 0.01502703595906496 }, { "avg_delta_l": 0.001386886928230524, "avg_loss_unweighted": 0.032397374510765076, "avg_weight": 0.5237415432929993, "std_delta_l": 0.021381642669439316, "std_weight": 0.034389615058898926, "weighted_loss": 0.017386944964528084 }, { "avg_delta_l": -0.002579455729573965, "avg_loss_unweighted": 0.03429631143808365, "avg_weight": 0.5363684892654419, "std_delta_l": 0.02294369414448738, "std_weight": 0.04011668264865875, "weighted_loss": 0.019166775047779083 }, { "avg_delta_l": 0.0056259906850755215, "avg_loss_unweighted": 0.027123572304844856, "avg_weight": 0.5261708498001099, "std_delta_l": 0.026410410180687904, "std_weight": 0.03494209051132202, "weighted_loss": 0.01481519266963005 }, { "avg_delta_l": 0.004075366072356701, "avg_loss_unweighted": 0.03065824694931507, "avg_weight": 0.5308693051338196, "std_delta_l": 0.026202142238616943, "std_weight": 0.0440501943230629, "weighted_loss": 0.016858097165822983 }, { "avg_delta_l": 0.008806345984339714, "avg_loss_unweighted": 0.03181125223636627, "avg_weight": 0.521784245967865, "std_delta_l": 0.028115930035710335, "std_weight": 0.03915439546108246, "weighted_loss": 0.01722959242761135 }, { "avg_delta_l": -0.0006790788611397147, "avg_loss_unweighted": 0.02922314777970314, "avg_weight": 0.5287705659866333, "std_delta_l": 0.021665293723344803, "std_weight": 0.03344084694981575, "weighted_loss": 0.015891971066594124 }, { "avg_delta_l": -0.004305132664740086, "avg_loss_unweighted": 0.0311131551861763, "avg_weight": 0.5363298654556274, "std_delta_l": 0.021094130352139473, "std_weight": 0.034446753561496735, "weighted_loss": 0.01718619279563427 }, { "avg_delta_l": 0.0023904754780232906, "avg_loss_unweighted": 0.028323601931333542, "avg_weight": 0.5257049202919006, "std_delta_l": 0.02359924092888832, "std_weight": 0.03304241597652435, "weighted_loss": 0.015447027049958706 }, { "avg_delta_l": -0.00293395621702075, "avg_loss_unweighted": 0.031500011682510376, "avg_weight": 0.5374029278755188, "std_delta_l": 0.026931095868349075, "std_weight": 0.04525916278362274, "weighted_loss": 0.01757470890879631 }, { "avg_delta_l": 0.004429090302437544, "avg_loss_unweighted": 0.02874992974102497, "avg_weight": 0.5218919515609741, "std_delta_l": 0.020667225122451782, "std_weight": 0.029464170336723328, "weighted_loss": 0.015398065559566021 }, { "epoch": 13.8, "grad_norm": 0.42267360547837785, "learning_rate": 2.671261651102885e-06, "loss": 0.0326, "step": 800 }, { "avg_delta_l": 0.007586264982819557, "avg_loss_unweighted": 0.03127158060669899, "avg_weight": 0.5205981731414795, "std_delta_l": 0.02524680271744728, "std_weight": 0.03296876326203346, "weighted_loss": 0.016591578722000122 }, { "avg_delta_l": -0.0032613007351756096, "avg_loss_unweighted": 0.03607511892914772, "avg_weight": 0.5378285646438599, "std_delta_l": 0.025734977796673775, "std_weight": 0.04373667389154434, "weighted_loss": 0.02021593227982521 }, { "avg_delta_l": 0.0026191750075668097, "avg_loss_unweighted": 0.028893502429127693, "avg_weight": 0.5282503962516785, "std_delta_l": 0.025455664843320847, "std_weight": 0.039317406713962555, "weighted_loss": 0.0158088281750679 }, { "avg_delta_l": 0.0019082813523709774, "avg_loss_unweighted": 0.03455997258424759, "avg_weight": 0.5344803929328918, "std_delta_l": 0.030144592747092247, "std_weight": 0.04618578404188156, "weighted_loss": 0.019056597724556923 }, { "avg_delta_l": 0.002293095923960209, "avg_loss_unweighted": 0.02912968583405018, "avg_weight": 0.5234602093696594, "std_delta_l": 0.019668126478791237, "std_weight": 0.026622554287314415, "weighted_loss": 0.01551605761051178 }, { "avg_delta_l": -0.002880493877455592, "avg_loss_unweighted": 0.030627286061644554, "avg_weight": 0.5366392135620117, "std_delta_l": 0.024054203182458878, "std_weight": 0.03920266032218933, "weighted_loss": 0.016936665400862694 }, { "avg_delta_l": 0.0024030287750065327, "avg_loss_unweighted": 0.027507510036230087, "avg_weight": 0.5260494351387024, "std_delta_l": 0.02405647747218609, "std_weight": 0.03573433682322502, "weighted_loss": 0.014850585721433163 }, { "avg_delta_l": -0.001047685043886304, "avg_loss_unweighted": 0.02960350550711155, "avg_weight": 0.530761182308197, "std_delta_l": 0.023296574130654335, "std_weight": 0.04058624058961868, "weighted_loss": 0.016405127942562103 }, { "avg_delta_l": -0.0018030392238870263, "avg_loss_unweighted": 0.03185990825295448, "avg_weight": 0.5402849316596985, "std_delta_l": 0.02940998412668705, "std_weight": 0.04569205641746521, "weighted_loss": 0.018018169328570366 }, { "avg_delta_l": 0.0007827320951037109, "avg_loss_unweighted": 0.02803151309490204, "avg_weight": 0.531147837638855, "std_delta_l": 0.026326218619942665, "std_weight": 0.04005756601691246, "weighted_loss": 0.015469325706362724 }, { "avg_delta_l": -0.00405430793762207, "avg_loss_unweighted": 0.030736204236745834, "avg_weight": 0.5403574705123901, "std_delta_l": 0.023598967120051384, "std_weight": 0.043610963970422745, "weighted_loss": 0.017215285450220108 }, { "avg_delta_l": 0.0033809365704655647, "avg_loss_unweighted": 0.027393994852900505, "avg_weight": 0.5233604907989502, "std_delta_l": 0.019000327214598656, "std_weight": 0.024389799684286118, "weighted_loss": 0.014649048447608948 }, { "avg_delta_l": 0.003338126465678215, "avg_loss_unweighted": 0.028241565451025963, "avg_weight": 0.52637779712677, "std_delta_l": 0.02310742437839508, "std_weight": 0.034442294389009476, "weighted_loss": 0.015161802992224693 }, { "avg_delta_l": -0.005695585161447525, "avg_loss_unweighted": 0.03271304443478584, "avg_weight": 0.5360530018806458, "std_delta_l": 0.01973877288401127, "std_weight": 0.03614465892314911, "weighted_loss": 0.017992377281188965 }, { "avg_delta_l": -0.0035581449046730995, "avg_loss_unweighted": 0.033521685749292374, "avg_weight": 0.5370268225669861, "std_delta_l": 0.02409004047513008, "std_weight": 0.04180775582790375, "weighted_loss": 0.018718048930168152 }, { "avg_delta_l": -0.0027703172527253628, "avg_loss_unweighted": 0.034765828400850296, "avg_weight": 0.5387489795684814, "std_delta_l": 0.028050510212779045, "std_weight": 0.04618917405605316, "weighted_loss": 0.01932169497013092 }, { "avg_delta_l": -0.0008184931939467788, "avg_loss_unweighted": 0.03108515776693821, "avg_weight": 0.531829297542572, "std_delta_l": 0.022949041798710823, "std_weight": 0.041323281824588776, "weighted_loss": 0.017327332869172096 }, { "avg_delta_l": 0.005988292396068573, "avg_loss_unweighted": 0.028556473553180695, "avg_weight": 0.5312653183937073, "std_delta_l": 0.027924999594688416, "std_weight": 0.0390833355486393, "weighted_loss": 0.015956314280629158 }, { "avg_delta_l": -0.006842790171504021, "avg_loss_unweighted": 0.035098783671855927, "avg_weight": 0.5424867868423462, "std_delta_l": 0.023939067497849464, "std_weight": 0.04450875520706177, "weighted_loss": 0.019943317398428917 }, { "avg_delta_l": -0.00029261584859341383, "avg_loss_unweighted": 0.03563443943858147, "avg_weight": 0.5312431454658508, "std_delta_l": 0.026586100459098816, "std_weight": 0.04028352349996567, "weighted_loss": 0.01947527378797531 }, { "epoch": 13.97391304347826, "grad_norm": 0.41486954745592197, "learning_rate": 2.5391918430549635e-06, "loss": 0.0345, "step": 810 }, { "avg_delta_l": -0.00021701905643567443, "avg_loss_unweighted": 0.03089890070259571, "avg_weight": 0.5393850803375244, "std_delta_l": 0.030696669593453407, "std_weight": 0.04676812142133713, "weighted_loss": 0.017542438581585884 }, { "avg_delta_l": 0.006038941442966461, "avg_loss_unweighted": 0.028519436717033386, "avg_weight": 0.5238807797431946, "std_delta_l": 0.02409026212990284, "std_weight": 0.028666052967309952, "weighted_loss": 0.015426315367221832 }, { "avg_delta_l": 0.010386933572590351, "avg_loss_unweighted": 0.02197698876261711, "avg_weight": 0.5138540863990784, "std_delta_l": 0.020491598173975945, "std_weight": 0.016753461211919785, "weighted_loss": 0.011509345844388008 }, { "avg_delta_l": 0.006436248309910297, "avg_loss_unweighted": 0.0278153195977211, "avg_weight": 0.5131543874740601, "std_delta_l": 0.016478605568408966, "std_weight": 0.01965799182653427, "weighted_loss": 0.014526955783367157 }, { "avg_delta_l": 0.00806174986064434, "avg_loss_unweighted": 0.0235797967761755, "avg_weight": 0.5120318531990051, "std_delta_l": 0.020054079592227936, "std_weight": 0.019554803147912025, "weighted_loss": 0.012294591404497623 }, { "avg_delta_l": 0.0024356788489967585, "avg_loss_unweighted": 0.023612771183252335, "avg_weight": 0.5147045850753784, "std_delta_l": 0.014749136753380299, "std_weight": 0.021882666274905205, "weighted_loss": 0.012375697493553162 }, { "avg_delta_l": 0.005870671942830086, "avg_loss_unweighted": 0.021222760900855064, "avg_weight": 0.5104160904884338, "std_delta_l": 0.015651732683181763, "std_weight": 0.01995326578617096, "weighted_loss": 0.011018292978405952 }, { "avg_delta_l": 0.008809605613350868, "avg_loss_unweighted": 0.025638369843363762, "avg_weight": 0.5117085576057434, "std_delta_l": 0.016652369871735573, "std_weight": 0.01810244470834732, "weighted_loss": 0.013303624466061592 }, { "avg_delta_l": 0.005695787258446217, "avg_loss_unweighted": 0.024274606257677078, "avg_weight": 0.5076409578323364, "std_delta_l": 0.01278676651418209, "std_weight": 0.014853076077997684, "weighted_loss": 0.01239097397774458 }, { "avg_delta_l": 0.009225854650139809, "avg_loss_unweighted": 0.020541200414299965, "avg_weight": 0.5048776865005493, "std_delta_l": 0.0144344512373209, "std_weight": 0.007707372307777405, "weighted_loss": 0.010467272251844406 }, { "avg_delta_l": 0.006654705386608839, "avg_loss_unweighted": 0.023760473355650902, "avg_weight": 0.5078542232513428, "std_delta_l": 0.012031247839331627, "std_weight": 0.009919905103743076, "weighted_loss": 0.012146050110459328 }, { "avg_delta_l": 0.00391991576179862, "avg_loss_unweighted": 0.0249324981123209, "avg_weight": 0.5151374936103821, "std_delta_l": 0.017090681940317154, "std_weight": 0.022559301927685738, "weighted_loss": 0.01306871697306633 }, { "avg_delta_l": 0.006210461258888245, "avg_loss_unweighted": 0.029680153355002403, "avg_weight": 0.5152265429496765, "std_delta_l": 0.018214350566267967, "std_weight": 0.023785365745425224, "weighted_loss": 0.015492801554501057 }, { "avg_delta_l": 0.004250461235642433, "avg_loss_unweighted": 0.027256254106760025, "avg_weight": 0.5153092741966248, "std_delta_l": 0.017379069700837135, "std_weight": 0.023214014247059822, "weighted_loss": 0.014265311881899834 }, { "avg_delta_l": 0.005248298402875662, "avg_loss_unweighted": 0.020655537024140358, "avg_weight": 0.5142253041267395, "std_delta_l": 0.018166499212384224, "std_weight": 0.021132776513695717, "weighted_loss": 0.010970672592520714 }, { "avg_delta_l": 0.004783114418387413, "avg_loss_unweighted": 0.02452540583908558, "avg_weight": 0.5172795653343201, "std_delta_l": 0.021303284913301468, "std_weight": 0.031062932685017586, "weighted_loss": 0.013111802749335766 }, { "avg_delta_l": 0.006800836883485317, "avg_loss_unweighted": 0.02452097088098526, "avg_weight": 0.5134789943695068, "std_delta_l": 0.018385257571935654, "std_weight": 0.01857779175043106, "weighted_loss": 0.012808357365429401 }, { "avg_delta_l": 0.004465869627892971, "avg_loss_unweighted": 0.022963091731071472, "avg_weight": 0.5160488486289978, "std_delta_l": 0.018290473148226738, "std_weight": 0.026235852390527725, "weighted_loss": 0.012247342616319656 }, { "avg_delta_l": 0.0017086479347199202, "avg_loss_unweighted": 0.021756703034043312, "avg_weight": 0.5149482488632202, "std_delta_l": 0.012379957363009453, "std_weight": 0.01922762766480446, "weighted_loss": 0.011397802270948887 }, { "epoch": 14.139130434782608, "grad_norm": 0.3764011343696211, "learning_rate": 2.4093501814629867e-06, "loss": 0.0246, "step": 820 }, { "avg_delta_l": 0.005700241774320602, "avg_loss_unweighted": 0.026056012138724327, "avg_weight": 0.5131319165229797, "std_delta_l": 0.015945978462696075, "std_weight": 0.019336413592100143, "weighted_loss": 0.013519295491278172 }, { "avg_delta_l": 0.007151712663471699, "avg_loss_unweighted": 0.026031019166111946, "avg_weight": 0.5120704174041748, "std_delta_l": 0.019532974809408188, "std_weight": 0.022285550832748413, "weighted_loss": 0.013511589728295803 }, { "avg_delta_l": 0.00790691189467907, "avg_loss_unweighted": 0.02587784454226494, "avg_weight": 0.5103326439857483, "std_delta_l": 0.016484815627336502, "std_weight": 0.018660735338926315, "weighted_loss": 0.013339580036699772 }, { "avg_delta_l": 0.008345894515514374, "avg_loss_unweighted": 0.026452282443642616, "avg_weight": 0.5069016814231873, "std_delta_l": 0.01330373901873827, "std_weight": 0.010866554453969002, "weighted_loss": 0.01350521668791771 }, { "avg_delta_l": 0.004453964531421661, "avg_loss_unweighted": 0.027645651251077652, "avg_weight": 0.5172539353370667, "std_delta_l": 0.018870336934924126, "std_weight": 0.02751619927585125, "weighted_loss": 0.01462512742727995 }, { "avg_delta_l": 0.010352333076298237, "avg_loss_unweighted": 0.023339662700891495, "avg_weight": 0.5111643671989441, "std_delta_l": 0.01898587867617607, "std_weight": 0.015530863776803017, "weighted_loss": 0.01207535807043314 }, { "avg_delta_l": 0.006363534834235907, "avg_loss_unweighted": 0.022329580038785934, "avg_weight": 0.515343427658081, "std_delta_l": 0.018163418397307396, "std_weight": 0.02215433679521084, "weighted_loss": 0.011817960999906063 }, { "avg_delta_l": 0.005195447243750095, "avg_loss_unweighted": 0.027286451309919357, "avg_weight": 0.515741765499115, "std_delta_l": 0.01842910796403885, "std_weight": 0.02723783813416958, "weighted_loss": 0.014444618485867977 }, { "avg_delta_l": 0.0024177273735404015, "avg_loss_unweighted": 0.025280192494392395, "avg_weight": 0.5204297304153442, "std_delta_l": 0.020218398422002792, "std_weight": 0.032793737947940826, "weighted_loss": 0.013555456884205341 }, { "avg_delta_l": 0.0007113983738236129, "avg_loss_unweighted": 0.027302665635943413, "avg_weight": 0.5207186341285706, "std_delta_l": 0.01704094372689724, "std_weight": 0.027537334710359573, "weighted_loss": 0.014627419412136078 }, { "avg_delta_l": 0.007581315468996763, "avg_loss_unweighted": 0.02263282611966133, "avg_weight": 0.5117247700691223, "std_delta_l": 0.017857100814580917, "std_weight": 0.019663766026496887, "weighted_loss": 0.01174197904765606 }, { "avg_delta_l": 0.005957499612122774, "avg_loss_unweighted": 0.025384699925780296, "avg_weight": 0.5146675109863281, "std_delta_l": 0.017296181991696358, "std_weight": 0.01974938064813614, "weighted_loss": 0.013254581950604916 }, { "avg_delta_l": 0.005539121106266975, "avg_loss_unweighted": 0.02353125996887684, "avg_weight": 0.5137306451797485, "std_delta_l": 0.015973050147294998, "std_weight": 0.01994425617158413, "weighted_loss": 0.012320419773459435 }, { "avg_delta_l": 0.008213086053729057, "avg_loss_unweighted": 0.02370074950158596, "avg_weight": 0.5139777064323425, "std_delta_l": 0.019427625462412834, "std_weight": 0.02519853599369526, "weighted_loss": 0.012516549788415432 }, { "avg_delta_l": 0.0068447464145720005, "avg_loss_unweighted": 0.0242213923484087, "avg_weight": 0.5144924521446228, "std_delta_l": 0.01729154959321022, "std_weight": 0.022949587553739548, "weighted_loss": 0.012713308446109295 }, { "avg_delta_l": 0.0036175905261188745, "avg_loss_unweighted": 0.021080080419778824, "avg_weight": 0.5169863700866699, "std_delta_l": 0.01729109324514866, "std_weight": 0.026031143963336945, "weighted_loss": 0.011199385859072208 }, { "avg_delta_l": 0.005546939093619585, "avg_loss_unweighted": 0.026135025545954704, "avg_weight": 0.5185582637786865, "std_delta_l": 0.022573841735720634, "std_weight": 0.028091643005609512, "weighted_loss": 0.013681610114872456 }, { "avg_delta_l": 0.0035880061332136393, "avg_loss_unweighted": 0.02534761279821396, "avg_weight": 0.5221060514450073, "std_delta_l": 0.022509898990392685, "std_weight": 0.031196894124150276, "weighted_loss": 0.01342877745628357 }, { "avg_delta_l": 0.006723512429744005, "avg_loss_unweighted": 0.022189941257238388, "avg_weight": 0.5133349895477295, "std_delta_l": 0.01869174651801586, "std_weight": 0.022672735154628754, "weighted_loss": 0.01156417652964592 }, { "avg_delta_l": 0.0014699043240398169, "avg_loss_unweighted": 0.020733585581183434, "avg_weight": 0.5200498700141907, "std_delta_l": 0.018865326419472694, "std_weight": 0.03098405711352825, "weighted_loss": 0.011236649006605148 }, { "epoch": 14.31304347826087, "grad_norm": 0.4665863549023138, "learning_rate": 2.2818542318662617e-06, "loss": 0.0259, "step": 830 }, { "avg_delta_l": 0.002433181507512927, "avg_loss_unweighted": 0.022738788276910782, "avg_weight": 0.5192875266075134, "std_delta_l": 0.01805342361330986, "std_weight": 0.026813648641109467, "weighted_loss": 0.012164474464952946 }, { "avg_delta_l": 0.005384458228945732, "avg_loss_unweighted": 0.024104274809360504, "avg_weight": 0.5169423818588257, "std_delta_l": 0.020805660635232925, "std_weight": 0.027166444808244705, "weighted_loss": 0.01271020993590355 }, { "avg_delta_l": 0.004601978696882725, "avg_loss_unweighted": 0.024034850299358368, "avg_weight": 0.5167086124420166, "std_delta_l": 0.017772968858480453, "std_weight": 0.02740366756916046, "weighted_loss": 0.012895917519927025 }, { "avg_delta_l": 0.0031445843633264303, "avg_loss_unweighted": 0.02384338714182377, "avg_weight": 0.5203033685684204, "std_delta_l": 0.019995812326669693, "std_weight": 0.030678333714604378, "weighted_loss": 0.012863746844232082 }, { "avg_delta_l": 0.002316043945029378, "avg_loss_unweighted": 0.02378697693347931, "avg_weight": 0.5159375667572021, "std_delta_l": 0.016384562477469444, "std_weight": 0.025455238297581673, "weighted_loss": 0.012546068988740444 }, { "avg_delta_l": 0.004968528635799885, "avg_loss_unweighted": 0.02364174835383892, "avg_weight": 0.5146114230155945, "std_delta_l": 0.01788976974785328, "std_weight": 0.024014942348003387, "weighted_loss": 0.012408277951180935 }, { "avg_delta_l": 0.005848804023116827, "avg_loss_unweighted": 0.02423117496073246, "avg_weight": 0.5174063444137573, "std_delta_l": 0.020138509571552277, "std_weight": 0.025168608874082565, "weighted_loss": 0.012862782925367355 }, { "avg_delta_l": 0.0042410786263644695, "avg_loss_unweighted": 0.025237273424863815, "avg_weight": 0.5152809619903564, "std_delta_l": 0.014506297186017036, "std_weight": 0.019500482827425003, "weighted_loss": 0.01314905658364296 }, { "avg_delta_l": 0.004223943222314119, "avg_loss_unweighted": 0.021675636991858482, "avg_weight": 0.5161202549934387, "std_delta_l": 0.019178397953510284, "std_weight": 0.02503271959722042, "weighted_loss": 0.01154643390327692 }, { "avg_delta_l": 0.003776410361751914, "avg_loss_unweighted": 0.023434914648532867, "avg_weight": 0.517734944820404, "std_delta_l": 0.01928229257464409, "std_weight": 0.030251208692789078, "weighted_loss": 0.012452097609639168 }, { "avg_delta_l": 0.006048061419278383, "avg_loss_unweighted": 0.01842886209487915, "avg_weight": 0.5121982097625732, "std_delta_l": 0.016973664984107018, "std_weight": 0.018359458073973656, "weighted_loss": 0.009585557505488396 }, { "avg_delta_l": 0.0066454531624913216, "avg_loss_unweighted": 0.022562317550182343, "avg_weight": 0.5122649669647217, "std_delta_l": 0.017829319462180138, "std_weight": 0.01944543421268463, "weighted_loss": 0.011706640012562275 }, { "avg_delta_l": 0.0027713265735656023, "avg_loss_unweighted": 0.022873224690556526, "avg_weight": 0.5189910531044006, "std_delta_l": 0.018211154267191887, "std_weight": 0.024385496973991394, "weighted_loss": 0.012172915041446686 }, { "avg_delta_l": 0.00509366299957037, "avg_loss_unweighted": 0.023488523438572884, "avg_weight": 0.5163611173629761, "std_delta_l": 0.01867792196571827, "std_weight": 0.025681711733341217, "weighted_loss": 0.012501600198447704 }, { "avg_delta_l": 0.003718260210007429, "avg_loss_unweighted": 0.02638678252696991, "avg_weight": 0.5168783664703369, "std_delta_l": 0.018334565684199333, "std_weight": 0.02922472357749939, "weighted_loss": 0.013843581080436707 }, { "avg_delta_l": 0.004446176812052727, "avg_loss_unweighted": 0.025603029876947403, "avg_weight": 0.5236845016479492, "std_delta_l": 0.02364523895084858, "std_weight": 0.03250075876712799, "weighted_loss": 0.013779988512396812 }, { "avg_delta_l": 0.008381062187254429, "avg_loss_unweighted": 0.021538030356168747, "avg_weight": 0.5110770463943481, "std_delta_l": 0.01769060082733631, "std_weight": 0.018465731292963028, "weighted_loss": 0.011195141822099686 }, { "avg_delta_l": 0.0022643536794930696, "avg_loss_unweighted": 0.024843905121088028, "avg_weight": 0.5185621976852417, "std_delta_l": 0.01810476742684841, "std_weight": 0.030470797792077065, "weighted_loss": 0.013201085850596428 }, { "avg_delta_l": 0.005343456752598286, "avg_loss_unweighted": 0.025037482380867004, "avg_weight": 0.5170117616653442, "std_delta_l": 0.019590303301811218, "std_weight": 0.025116220116615295, "weighted_loss": 0.013258812949061394 }, { "avg_delta_l": 0.002165333367884159, "avg_loss_unweighted": 0.023323021829128265, "avg_weight": 0.5214672088623047, "std_delta_l": 0.0213850699365139, "std_weight": 0.0298386812210083, "weighted_loss": 0.01252271980047226 }, { "epoch": 14.486956521739131, "grad_norm": 0.3529300945461485, "learning_rate": 2.156819435871824e-06, "loss": 0.0249, "step": 840 }, { "avg_delta_l": 0.0013072877191007137, "avg_loss_unweighted": 0.022171955555677414, "avg_weight": 0.5179241299629211, "std_delta_l": 0.01782548427581787, "std_weight": 0.02904646284878254, "weighted_loss": 0.011709872633218765 }, { "avg_delta_l": 0.0055696661584079266, "avg_loss_unweighted": 0.02276940830051899, "avg_weight": 0.5100464820861816, "std_delta_l": 0.014097948558628559, "std_weight": 0.01885024644434452, "weighted_loss": 0.01166573166847229 }, { "avg_delta_l": 0.01021455880254507, "avg_loss_unweighted": 0.02277674712240696, "avg_weight": 0.5126543641090393, "std_delta_l": 0.016791513189673424, "std_weight": 0.017820755019783974, "weighted_loss": 0.011901249177753925 }, { "avg_delta_l": 0.004482741467654705, "avg_loss_unweighted": 0.023111503571271896, "avg_weight": 0.5189597606658936, "std_delta_l": 0.02131158672273159, "std_weight": 0.025096161291003227, "weighted_loss": 0.012305614538490772 }, { "avg_delta_l": 0.001224837964400649, "avg_loss_unweighted": 0.028706327080726624, "avg_weight": 0.5221050381660461, "std_delta_l": 0.018145672976970673, "std_weight": 0.031359873712062836, "weighted_loss": 0.015315534546971321 }, { "avg_delta_l": -0.00023022861569188535, "avg_loss_unweighted": 0.02190704084932804, "avg_weight": 0.5201753973960876, "std_delta_l": 0.016799841076135635, "std_weight": 0.02849237620830536, "weighted_loss": 0.011843102052807808 }, { "avg_delta_l": 0.0007628611056134105, "avg_loss_unweighted": 0.027137864381074905, "avg_weight": 0.5222450494766235, "std_delta_l": 0.01939486898481846, "std_weight": 0.031094806268811226, "weighted_loss": 0.01455595064908266 }, { "avg_delta_l": 0.007384626194834709, "avg_loss_unweighted": 0.024053730070590973, "avg_weight": 0.5128142833709717, "std_delta_l": 0.019195325672626495, "std_weight": 0.022727372124791145, "weighted_loss": 0.012623146176338196 }, { "avg_delta_l": 0.003958922810852528, "avg_loss_unweighted": 0.02346194162964821, "avg_weight": 0.5202484130859375, "std_delta_l": 0.02087452821433544, "std_weight": 0.02897944673895836, "weighted_loss": 0.012505185790359974 }, { "avg_delta_l": 0.003980673849582672, "avg_loss_unweighted": 0.02469327300786972, "avg_weight": 0.5194283127784729, "std_delta_l": 0.02017003670334816, "std_weight": 0.03231457620859146, "weighted_loss": 0.013131937012076378 }, { "avg_delta_l": 0.004528407007455826, "avg_loss_unweighted": 0.02134798653423786, "avg_weight": 0.5171082615852356, "std_delta_l": 0.019000012427568436, "std_weight": 0.02434721775352955, "weighted_loss": 0.011294437572360039 }, { "avg_delta_l": 0.003951637540012598, "avg_loss_unweighted": 0.022107353433966637, "avg_weight": 0.5221604704856873, "std_delta_l": 0.022243738174438477, "std_weight": 0.029973119497299194, "weighted_loss": 0.011937766335904598 }, { "avg_delta_l": 0.004138825926929712, "avg_loss_unweighted": 0.026198463514447212, "avg_weight": 0.5166816711425781, "std_delta_l": 0.01751095801591873, "std_weight": 0.0246390663087368, "weighted_loss": 0.013815016485750675 }, { "avg_delta_l": 0.003243424464017153, "avg_loss_unweighted": 0.023133017122745514, "avg_weight": 0.5176514983177185, "std_delta_l": 0.018912149593234062, "std_weight": 0.02836029604077339, "weighted_loss": 0.012287951074540615 }, { "avg_delta_l": 0.0022787447087466717, "avg_loss_unweighted": 0.02611604705452919, "avg_weight": 0.5204088687896729, "std_delta_l": 0.020790603011846542, "std_weight": 0.03083900175988674, "weighted_loss": 0.013881353661417961 }, { "avg_delta_l": -3.8416997995227575e-05, "avg_loss_unweighted": 0.02693418227136135, "avg_weight": 0.5269520878791809, "std_delta_l": 0.02172132581472397, "std_weight": 0.03347837179899216, "weighted_loss": 0.014572475105524063 }, { "avg_delta_l": -0.0010397800942882895, "avg_loss_unweighted": 0.026865888386964798, "avg_weight": 0.5280834436416626, "std_delta_l": 0.020703012123703957, "std_weight": 0.0327560231089592, "weighted_loss": 0.014608887024223804 }, { "avg_delta_l": 0.00041625008452683687, "avg_loss_unweighted": 0.026022592559456825, "avg_weight": 0.521450400352478, "std_delta_l": 0.017178382724523544, "std_weight": 0.028072336688637733, "weighted_loss": 0.013829343020915985 }, { "avg_delta_l": 0.00248293811455369, "avg_loss_unweighted": 0.024334995076060295, "avg_weight": 0.523112416267395, "std_delta_l": 0.020458364859223366, "std_weight": 0.03185074403882027, "weighted_loss": 0.0130641870200634 }, { "avg_delta_l": 0.0037137670442461967, "avg_loss_unweighted": 0.02296299859881401, "avg_weight": 0.5241339802742004, "std_delta_l": 0.022132454439997673, "std_weight": 0.02970583550632, "weighted_loss": 0.012413963675498962 }, { "epoch": 14.660869565217391, "grad_norm": 0.4012370492518503, "learning_rate": 2.0343590066274656e-06, "loss": 0.0259, "step": 850 }, { "avg_delta_l": 0.005237779580056667, "avg_loss_unweighted": 0.02347036823630333, "avg_weight": 0.5140032172203064, "std_delta_l": 0.016319211572408676, "std_weight": 0.0186980739235878, "weighted_loss": 0.012330381199717522 }, { "avg_delta_l": 0.004130691289901733, "avg_loss_unweighted": 0.028685297816991806, "avg_weight": 0.5240224003791809, "std_delta_l": 0.023597529157996178, "std_weight": 0.03152395784854889, "weighted_loss": 0.015385974198579788 }, { "avg_delta_l": 0.00036828801967203617, "avg_loss_unweighted": 0.027697499841451645, "avg_weight": 0.5257788896560669, "std_delta_l": 0.019149553030729294, "std_weight": 0.03155713528394699, "weighted_loss": 0.014966263435781002 }, { "avg_delta_l": 0.0020553353242576122, "avg_loss_unweighted": 0.02725844830274582, "avg_weight": 0.5235313177108765, "std_delta_l": 0.0209097471088171, "std_weight": 0.02853444404900074, "weighted_loss": 0.014645432122051716 }, { "avg_delta_l": 0.0007303101010620594, "avg_loss_unweighted": 0.02376876398921013, "avg_weight": 0.5225455164909363, "std_delta_l": 0.015272075310349464, "std_weight": 0.023171890527009964, "weighted_loss": 0.012688535265624523 }, { "avg_delta_l": 0.003238012082874775, "avg_loss_unweighted": 0.02386980503797531, "avg_weight": 0.524042010307312, "std_delta_l": 0.020703228190541267, "std_weight": 0.031523432582616806, "weighted_loss": 0.012943658977746964 }, { "avg_delta_l": -0.0004810128593817353, "avg_loss_unweighted": 0.023800991475582123, "avg_weight": 0.524174690246582, "std_delta_l": 0.01944940723478794, "std_weight": 0.03352555260062218, "weighted_loss": 0.012848646380007267 }, { "avg_delta_l": 0.0015649946872144938, "avg_loss_unweighted": 0.023945756256580353, "avg_weight": 0.5234392881393433, "std_delta_l": 0.021647924557328224, "std_weight": 0.03602838143706322, "weighted_loss": 0.012947706505656242 }, { "avg_delta_l": 0.0029900390654802322, "avg_loss_unweighted": 0.02436654455959797, "avg_weight": 0.5203312635421753, "std_delta_l": 0.01790802553296089, "std_weight": 0.022355249151587486, "weighted_loss": 0.012975060380995274 }, { "avg_delta_l": 0.004221441224217415, "avg_loss_unweighted": 0.024996783584356308, "avg_weight": 0.5246632099151611, "std_delta_l": 0.022988712415099144, "std_weight": 0.032705679535865784, "weighted_loss": 0.013675411231815815 }, { "avg_delta_l": 0.00433532427996397, "avg_loss_unweighted": 0.02290724776685238, "avg_weight": 0.5217901468276978, "std_delta_l": 0.02202625945210457, "std_weight": 0.02998500131070614, "weighted_loss": 0.012530311942100525 }, { "avg_delta_l": 0.002230412792414427, "avg_loss_unweighted": 0.02482721582055092, "avg_weight": 0.5222054719924927, "std_delta_l": 0.021765831857919693, "std_weight": 0.0281792301684618, "weighted_loss": 0.013335390947759151 }, { "avg_delta_l": 0.0010484782978892326, "avg_loss_unweighted": 0.023229680955410004, "avg_weight": 0.5260939002037048, "std_delta_l": 0.020206820219755173, "std_weight": 0.02872117981314659, "weighted_loss": 0.01268720906227827 }, { "avg_delta_l": 0.0026617515832185745, "avg_loss_unweighted": 0.02299428917467594, "avg_weight": 0.5231078267097473, "std_delta_l": 0.023026004433631897, "std_weight": 0.03371494635939598, "weighted_loss": 0.012474508956074715 }, { "avg_delta_l": 0.0005247258231975138, "avg_loss_unweighted": 0.01934673637151718, "avg_weight": 0.5206865668296814, "std_delta_l": 0.017115550115704536, "std_weight": 0.02571210078895092, "weighted_loss": 0.010423257015645504 }, { "avg_delta_l": 0.0019195172935724258, "avg_loss_unweighted": 0.025831472128629684, "avg_weight": 0.5276245474815369, "std_delta_l": 0.024265751242637634, "std_weight": 0.03651627525687218, "weighted_loss": 0.014151455834507942 }, { "avg_delta_l": -0.0071171061135828495, "avg_loss_unweighted": 0.02760993130505085, "avg_weight": 0.5403532385826111, "std_delta_l": 0.02202995866537094, "std_weight": 0.04348376393318176, "weighted_loss": 0.015670429915189743 }, { "avg_delta_l": -0.006464798003435135, "avg_loss_unweighted": 0.02510088123381138, "avg_weight": 0.5373933911323547, "std_delta_l": 0.02119576185941696, "std_weight": 0.041782863438129425, "weighted_loss": 0.014073936268687248 }, { "avg_delta_l": -0.003778429701924324, "avg_loss_unweighted": 0.02694731019437313, "avg_weight": 0.5318759679794312, "std_delta_l": 0.018767833709716797, "std_weight": 0.03448763117194176, "weighted_loss": 0.01483383309096098 }, { "avg_delta_l": 0.003949686419218779, "avg_loss_unweighted": 0.022201644256711006, "avg_weight": 0.5171269774436951, "std_delta_l": 0.017403626814484596, "std_weight": 0.022011173889040947, "weighted_loss": 0.011737730354070663 }, { "epoch": 14.834782608695653, "grad_norm": 0.34918374531547913, "learning_rate": 1.9145838263125427e-06, "loss": 0.0267, "step": 860 }, { "avg_delta_l": -0.0040125660598278046, "avg_loss_unweighted": 0.027548380196094513, "avg_weight": 0.5307472944259644, "std_delta_l": 0.018209289759397507, "std_weight": 0.0356217697262764, "weighted_loss": 0.014959149062633514 }, { "avg_delta_l": -0.001753749093040824, "avg_loss_unweighted": 0.024403227493166924, "avg_weight": 0.5265558958053589, "std_delta_l": 0.019552364945411682, "std_weight": 0.0323745459318161, "weighted_loss": 0.013314113020896912 }, { "avg_delta_l": 0.002723819576203823, "avg_loss_unweighted": 0.022165710106492043, "avg_weight": 0.5253661870956421, "std_delta_l": 0.023673973977565765, "std_weight": 0.03544865548610687, "weighted_loss": 0.012013966217637062 }, { "avg_delta_l": -0.0003323043929412961, "avg_loss_unweighted": 0.02657284401357174, "avg_weight": 0.5236548185348511, "std_delta_l": 0.017930857837200165, "std_weight": 0.026822227984666824, "weighted_loss": 0.014313287101686 }, { "avg_delta_l": 0.004387415945529938, "avg_loss_unweighted": 0.022269630804657936, "avg_weight": 0.5187487602233887, "std_delta_l": 0.020420962944626808, "std_weight": 0.030673323199152946, "weighted_loss": 0.011912093497812748 }, { "avg_delta_l": -0.0018906468758359551, "avg_loss_unweighted": 0.02838788367807865, "avg_weight": 0.5266245007514954, "std_delta_l": 0.018982507288455963, "std_weight": 0.03376130387187004, "weighted_loss": 0.01522844098508358 }, { "avg_delta_l": -0.0025223689153790474, "avg_loss_unweighted": 0.026556137949228287, "avg_weight": 0.5286065936088562, "std_delta_l": 0.01785183697938919, "std_weight": 0.03006059303879738, "weighted_loss": 0.014447024092078209 }, { "avg_delta_l": 0.00271143508143723, "avg_loss_unweighted": 0.02405988983809948, "avg_weight": 0.5258493423461914, "std_delta_l": 0.025045638903975487, "std_weight": 0.036986254155635834, "weighted_loss": 0.013243259862065315 }, { "avg_delta_l": -0.0001833751448430121, "avg_loss_unweighted": 0.02340865321457386, "avg_weight": 0.5241771936416626, "std_delta_l": 0.01963876001536846, "std_weight": 0.02971547096967697, "weighted_loss": 0.012685284949839115 }, { "avg_delta_l": 0.001349069643765688, "avg_loss_unweighted": 0.023316942155361176, "avg_weight": 0.5230486989021301, "std_delta_l": 0.016775403171777725, "std_weight": 0.026377804577350616, "weighted_loss": 0.012601001188158989 }, { "avg_delta_l": 0.004592857323586941, "avg_loss_unweighted": 0.02134755812585354, "avg_weight": 0.518307626247406, "std_delta_l": 0.015559544786810875, "std_weight": 0.02007334865629673, "weighted_loss": 0.01129088643938303 }, { "avg_delta_l": -0.0008947981405071914, "avg_loss_unweighted": 0.025771092623472214, "avg_weight": 0.5281050801277161, "std_delta_l": 0.02221549302339554, "std_weight": 0.03574269264936447, "weighted_loss": 0.01408053282648325 }, { "avg_delta_l": 0.004593278281390667, "avg_loss_unweighted": 0.024861697107553482, "avg_weight": 0.521151065826416, "std_delta_l": 0.02342122420668602, "std_weight": 0.03031948395073414, "weighted_loss": 0.013248786330223083 }, { "avg_delta_l": -0.0030315592885017395, "avg_loss_unweighted": 0.026821952313184738, "avg_weight": 0.531511127948761, "std_delta_l": 0.021153276786208153, "std_weight": 0.0374455600976944, "weighted_loss": 0.014761285856366158 }, { "avg_delta_l": -0.0016208505257964134, "avg_loss_unweighted": 0.025059770792722702, "avg_weight": 0.5349702835083008, "std_delta_l": 0.025525614619255066, "std_weight": 0.03835441172122955, "weighted_loss": 0.013983040116727352 }, { "avg_delta_l": 0.002361397258937359, "avg_loss_unweighted": 0.023896342143416405, "avg_weight": 0.5278804302215576, "std_delta_l": 0.026149598881602287, "std_weight": 0.04061368107795715, "weighted_loss": 0.01317885983735323 }, { "avg_delta_l": -0.0009969630045816302, "avg_loss_unweighted": 0.023127352818846703, "avg_weight": 0.5238269567489624, "std_delta_l": 0.018644006922841072, "std_weight": 0.029728179797530174, "weighted_loss": 0.012523781508207321 }, { "avg_delta_l": 0.0022370778024196625, "avg_loss_unweighted": 0.022306347265839577, "avg_weight": 0.5248356461524963, "std_delta_l": 0.02123195305466652, "std_weight": 0.027942746877670288, "weighted_loss": 0.012170031666755676 }, { "avg_delta_l": 0.0009909969521686435, "avg_loss_unweighted": 0.025563791394233704, "avg_weight": 0.5242376923561096, "std_delta_l": 0.019748210906982422, "std_weight": 0.028808319941163063, "weighted_loss": 0.013795615173876286 }, { "epoch": 15.0, "grad_norm": 0.36394698278360144, "learning_rate": 1.797602345739352e-06, "loss": 0.0254, "step": 870 }, { "avg_delta_l": 0.004690113943070173, "avg_loss_unweighted": 0.02192593924701214, "avg_weight": 0.5121392011642456, "std_delta_l": 0.015248763374984264, "std_weight": 0.021101998165249825, "weighted_loss": 0.011424820870161057 }, { "avg_delta_l": 0.0043268511071801186, "avg_loss_unweighted": 0.01786632090806961, "avg_weight": 0.5115770101547241, "std_delta_l": 0.016967173665761948, "std_weight": 0.017833411693572998, "weighted_loss": 0.009323066100478172 }, { "avg_delta_l": 0.00425204960629344, "avg_loss_unweighted": 0.02036936581134796, "avg_weight": 0.5088469982147217, "std_delta_l": 0.01141718402504921, "std_weight": 0.012846973724663258, "weighted_loss": 0.010488475672900677 }, { "avg_delta_l": 0.0037239608354866505, "avg_loss_unweighted": 0.020009882748126984, "avg_weight": 0.5105654001235962, "std_delta_l": 0.013178292661905289, "std_weight": 0.019883424043655396, "weighted_loss": 0.01036941260099411 }, { "avg_delta_l": 0.004133214708417654, "avg_loss_unweighted": 0.015564857050776482, "avg_weight": 0.5072110295295715, "std_delta_l": 0.011440498754382133, "std_weight": 0.013365881517529488, "weighted_loss": 0.007951765321195126 }, { "avg_delta_l": 0.005388753954321146, "avg_loss_unweighted": 0.02062552236020565, "avg_weight": 0.5092496275901794, "std_delta_l": 0.013873152434825897, "std_weight": 0.01750076748430729, "weighted_loss": 0.010677628219127655 }, { "avg_delta_l": 0.007773749530315399, "avg_loss_unweighted": 0.016507528722286224, "avg_weight": 0.505255401134491, "std_delta_l": 0.012920818291604519, "std_weight": 0.01023907121270895, "weighted_loss": 0.008429866284132004 }, { "avg_delta_l": 0.001000434160232544, "avg_loss_unweighted": 0.0220537930727005, "avg_weight": 0.5134861469268799, "std_delta_l": 0.012313595041632652, "std_weight": 0.02151453122496605, "weighted_loss": 0.011410638689994812 }, { "avg_delta_l": 0.0030321627855300903, "avg_loss_unweighted": 0.021216461434960365, "avg_weight": 0.5137786865234375, "std_delta_l": 0.014978419989347458, "std_weight": 0.019285939633846283, "weighted_loss": 0.011082633398473263 }, { "avg_delta_l": 0.0008753939182497561, "avg_loss_unweighted": 0.022703783586621284, "avg_weight": 0.5134586095809937, "std_delta_l": 0.012760454788804054, "std_weight": 0.021377043798565865, "weighted_loss": 0.011845963075757027 }, { "avg_delta_l": 0.0029261813033372164, "avg_loss_unweighted": 0.02154981717467308, "avg_weight": 0.5146622657775879, "std_delta_l": 0.016611017286777496, "std_weight": 0.021759994328022003, "weighted_loss": 0.011355534195899963 }, { "avg_delta_l": 0.0038955744821578264, "avg_loss_unweighted": 0.018712373450398445, "avg_weight": 0.5128529667854309, "std_delta_l": 0.013320542871952057, "std_weight": 0.016939913854002953, "weighted_loss": 0.009795106947422028 }, { "avg_delta_l": 0.003455623285844922, "avg_loss_unweighted": 0.020662259310483932, "avg_weight": 0.5135029554367065, "std_delta_l": 0.016074690967798233, "std_weight": 0.02337963879108429, "weighted_loss": 0.010809018276631832 }, { "avg_delta_l": 0.0031849718652665615, "avg_loss_unweighted": 0.021779032424092293, "avg_weight": 0.5145935416221619, "std_delta_l": 0.015689965337514877, "std_weight": 0.020502259954810143, "weighted_loss": 0.011403120122849941 }, { "avg_delta_l": 5.940083065070212e-05, "avg_loss_unweighted": 0.022509796544909477, "avg_weight": 0.518034815788269, "std_delta_l": 0.01621696539223194, "std_weight": 0.02930562198162079, "weighted_loss": 0.011908294633030891 }, { "avg_delta_l": 0.004107546526938677, "avg_loss_unweighted": 0.022057026624679565, "avg_weight": 0.5128145217895508, "std_delta_l": 0.01725858263671398, "std_weight": 0.02297113835811615, "weighted_loss": 0.011498023755848408 }, { "avg_delta_l": 0.0030098871793597937, "avg_loss_unweighted": 0.018118029460310936, "avg_weight": 0.5106572508811951, "std_delta_l": 0.013434398919343948, "std_weight": 0.019083280116319656, "weighted_loss": 0.009400714188814163 }, { "avg_delta_l": 0.005739906802773476, "avg_loss_unweighted": 0.019949011504650116, "avg_weight": 0.5118695497512817, "std_delta_l": 0.016450831666588783, "std_weight": 0.020271064713597298, "weighted_loss": 0.010456419549882412 }, { "avg_delta_l": 0.008387016132473946, "avg_loss_unweighted": 0.014704396948218346, "avg_weight": 0.5065183639526367, "std_delta_l": 0.013065065257251263, "std_weight": 0.009713355451822281, "weighted_loss": 0.00753701850771904 }, { "avg_delta_l": 0.004618958570063114, "avg_loss_unweighted": 0.015353742986917496, "avg_weight": 0.5119580030441284, "std_delta_l": 0.015748629346489906, "std_weight": 0.020655330270528793, "weighted_loss": 0.008064494468271732 }, { "epoch": 15.173913043478262, "grad_norm": 0.3034436936836824, "learning_rate": 1.6835204861560068e-06, "loss": 0.0205, "step": 880 }, { "avg_delta_l": 0.004326885566115379, "avg_loss_unweighted": 0.015525109134614468, "avg_weight": 0.5100046396255493, "std_delta_l": 0.014554825611412525, "std_weight": 0.018127523362636566, "weighted_loss": 0.008028192445635796 }, { "avg_delta_l": 0.005141166038811207, "avg_loss_unweighted": 0.018631871789693832, "avg_weight": 0.5077270269393921, "std_delta_l": 0.012173076160252094, "std_weight": 0.011036871001124382, "weighted_loss": 0.009500112384557724 }, { "avg_delta_l": 0.0026280172169208527, "avg_loss_unweighted": 0.017532186582684517, "avg_weight": 0.5169122219085693, "std_delta_l": 0.01723100058734417, "std_weight": 0.02724481001496315, "weighted_loss": 0.009318712167441845 }, { "avg_delta_l": 0.002673343988135457, "avg_loss_unweighted": 0.019643696025013924, "avg_weight": 0.5115509629249573, "std_delta_l": 0.013918040320277214, "std_weight": 0.020714882761240005, "weighted_loss": 0.010206944309175014 }, { "avg_delta_l": 0.0037806055042892694, "avg_loss_unweighted": 0.02146216481924057, "avg_weight": 0.514313280582428, "std_delta_l": 0.015237321145832539, "std_weight": 0.020163943991065025, "weighted_loss": 0.011137491092085838 }, { "avg_delta_l": 0.003293663030490279, "avg_loss_unweighted": 0.022360289469361305, "avg_weight": 0.5111106038093567, "std_delta_l": 0.011784866452217102, "std_weight": 0.01437333319336176, "weighted_loss": 0.011516792699694633 }, { "avg_delta_l": 0.0030509179923683405, "avg_loss_unweighted": 0.017862211912870407, "avg_weight": 0.5125849843025208, "std_delta_l": 0.013651006855070591, "std_weight": 0.02064514346420765, "weighted_loss": 0.009400931186974049 }, { "avg_delta_l": 0.003145193215459585, "avg_loss_unweighted": 0.02121308632194996, "avg_weight": 0.5120396614074707, "std_delta_l": 0.013067127205431461, "std_weight": 0.018497779965400696, "weighted_loss": 0.010995089076459408 }, { "avg_delta_l": -0.001671234960667789, "avg_loss_unweighted": 0.02394982986152172, "avg_weight": 0.5237363576889038, "std_delta_l": 0.01749291829764843, "std_weight": 0.030644472688436508, "weighted_loss": 0.012926293537020683 }, { "avg_delta_l": 0.0032790792174637318, "avg_loss_unweighted": 0.019278956577181816, "avg_weight": 0.5112557411193848, "std_delta_l": 0.013475819490849972, "std_weight": 0.01671060174703598, "weighted_loss": 0.01001634169369936 }, { "avg_delta_l": 0.005283011589199305, "avg_loss_unweighted": 0.020213311538100243, "avg_weight": 0.5107378363609314, "std_delta_l": 0.014210049994289875, "std_weight": 0.018621739000082016, "weighted_loss": 0.010480819270014763 }, { "avg_delta_l": 0.004849454388022423, "avg_loss_unweighted": 0.017055628821253777, "avg_weight": 0.5091197490692139, "std_delta_l": 0.012427844107151031, "std_weight": 0.012900596484541893, "weighted_loss": 0.008740974590182304 }, { "avg_delta_l": 0.004577756393700838, "avg_loss_unweighted": 0.020922312512993813, "avg_weight": 0.5112206339836121, "std_delta_l": 0.01311964076012373, "std_weight": 0.01693611964583397, "weighted_loss": 0.010945339687168598 }, { "avg_delta_l": 0.004557460080832243, "avg_loss_unweighted": 0.017906228080391884, "avg_weight": 0.5081586837768555, "std_delta_l": 0.01282200776040554, "std_weight": 0.01340022124350071, "weighted_loss": 0.00924293790012598 }, { "avg_delta_l": 0.0010723727755248547, "avg_loss_unweighted": 0.018991224467754364, "avg_weight": 0.5192292928695679, "std_delta_l": 0.018231641501188278, "std_weight": 0.030405467376112938, "weighted_loss": 0.010277227498590946 }, { "avg_delta_l": 0.0028259798418730497, "avg_loss_unweighted": 0.019853878766298294, "avg_weight": 0.5179018974304199, "std_delta_l": 0.019032875075936317, "std_weight": 0.026028046384453773, "weighted_loss": 0.01053329836577177 }, { "avg_delta_l": 0.004939711652696133, "avg_loss_unweighted": 0.018697943538427353, "avg_weight": 0.5155746936798096, "std_delta_l": 0.020336657762527466, "std_weight": 0.023487690836191177, "weighted_loss": 0.009954671375453472 }, { "avg_delta_l": 0.0015180859481915832, "avg_loss_unweighted": 0.018698373809456825, "avg_weight": 0.518947422504425, "std_delta_l": 0.017151938751339912, "std_weight": 0.02499237470328808, "weighted_loss": 0.009915592148900032 }, { "avg_delta_l": 0.0016240018885582685, "avg_loss_unweighted": 0.018991444259881973, "avg_weight": 0.5144928693771362, "std_delta_l": 0.014542060904204845, "std_weight": 0.025000760331749916, "weighted_loss": 0.01008842047303915 }, { "avg_delta_l": 0.005496377125382423, "avg_loss_unweighted": 0.019034655764698982, "avg_weight": 0.5090119242668152, "std_delta_l": 0.014018462039530277, "std_weight": 0.015559171326458454, "weighted_loss": 0.009806064888834953 }, { "epoch": 15.347826086956522, "grad_norm": 0.31231127732941527, "learning_rate": 1.5724415433397066e-06, "loss": 0.0203, "step": 890 }, { "avg_delta_l": 0.0018945860210806131, "avg_loss_unweighted": 0.02453846111893654, "avg_weight": 0.5166962146759033, "std_delta_l": 0.015507612377405167, "std_weight": 0.02374393306672573, "weighted_loss": 0.012952442280948162 }, { "avg_delta_l": 0.0028832138050347567, "avg_loss_unweighted": 0.020133960992097855, "avg_weight": 0.5117161870002747, "std_delta_l": 0.012044571340084076, "std_weight": 0.01649191416800022, "weighted_loss": 0.01051126979291439 }, { "avg_delta_l": 0.0005329279229044914, "avg_loss_unweighted": 0.021257899701595306, "avg_weight": 0.5168911218643188, "std_delta_l": 0.015440745279192924, "std_weight": 0.026943445205688477, "weighted_loss": 0.011208634823560715 }, { "avg_delta_l": 0.004930289927870035, "avg_loss_unweighted": 0.018164996057748795, "avg_weight": 0.5134355425834656, "std_delta_l": 0.015806810930371284, "std_weight": 0.02061322145164013, "weighted_loss": 0.00948464497923851 }, { "avg_delta_l": 0.0013249765615910292, "avg_loss_unweighted": 0.023115411400794983, "avg_weight": 0.5190441608428955, "std_delta_l": 0.01689530722796917, "std_weight": 0.025598494336009026, "weighted_loss": 0.012247621081769466 }, { "avg_delta_l": 0.0021677829790860415, "avg_loss_unweighted": 0.021371543407440186, "avg_weight": 0.5172196626663208, "std_delta_l": 0.018405061215162277, "std_weight": 0.031102901324629784, "weighted_loss": 0.01138300821185112 }, { "avg_delta_l": 0.0009474389953538775, "avg_loss_unweighted": 0.021234707906842232, "avg_weight": 0.5169219374656677, "std_delta_l": 0.012344873510301113, "std_weight": 0.017407022416591644, "weighted_loss": 0.011148370802402496 }, { "avg_delta_l": 0.002266558352857828, "avg_loss_unweighted": 0.022695574909448624, "avg_weight": 0.5204032063484192, "std_delta_l": 0.021623581647872925, "std_weight": 0.033527884632349014, "weighted_loss": 0.012152343988418579 }, { "avg_delta_l": 0.0013184158597141504, "avg_loss_unweighted": 0.022586727514863014, "avg_weight": 0.5197912454605103, "std_delta_l": 0.01718674786388874, "std_weight": 0.025567084550857544, "weighted_loss": 0.011957383714616299 }, { "avg_delta_l": 0.0009246327681466937, "avg_loss_unweighted": 0.02070886641740799, "avg_weight": 0.516242504119873, "std_delta_l": 0.014828677289187908, "std_weight": 0.025075498968362808, "weighted_loss": 0.010916958563029766 }, { "avg_delta_l": 0.0017563170986250043, "avg_loss_unweighted": 0.02208581380546093, "avg_weight": 0.5151357650756836, "std_delta_l": 0.013755235821008682, "std_weight": 0.020409520715475082, "weighted_loss": 0.01150538120418787 }, { "avg_delta_l": 0.0024829322937875986, "avg_loss_unweighted": 0.01838548481464386, "avg_weight": 0.5158125162124634, "std_delta_l": 0.013951580971479416, "std_weight": 0.01771301031112671, "weighted_loss": 0.009697006084024906 }, { "avg_delta_l": 0.0078122434206306934, "avg_loss_unweighted": 0.01746637560427189, "avg_weight": 0.508172869682312, "std_delta_l": 0.015658102929592133, "std_weight": 0.014750508591532707, "weighted_loss": 0.008972018025815487 }, { "avg_delta_l": 0.0005996514810249209, "avg_loss_unweighted": 0.02104191482067108, "avg_weight": 0.5185102820396423, "std_delta_l": 0.016531003639101982, "std_weight": 0.02840660884976387, "weighted_loss": 0.01118873618543148 }, { "avg_delta_l": 0.00447657098993659, "avg_loss_unweighted": 0.01970500685274601, "avg_weight": 0.5157848596572876, "std_delta_l": 0.017817422747612, "std_weight": 0.025038812309503555, "weighted_loss": 0.01041974313557148 }, { "avg_delta_l": 0.002231789752840996, "avg_loss_unweighted": 0.017496487125754356, "avg_weight": 0.5153607130050659, "std_delta_l": 0.01604847051203251, "std_weight": 0.026651188731193542, "weighted_loss": 0.00929510872811079 }, { "avg_delta_l": -0.0007149731391109526, "avg_loss_unweighted": 0.024387115612626076, "avg_weight": 0.523887038230896, "std_delta_l": 0.017706211656332016, "std_weight": 0.028901726007461548, "weighted_loss": 0.01307747233659029 }, { "avg_delta_l": 0.0002171135856769979, "avg_loss_unweighted": 0.022569358348846436, "avg_weight": 0.5214952230453491, "std_delta_l": 0.01797850988805294, "std_weight": 0.02730998955667019, "weighted_loss": 0.012033728882670403 }, { "avg_delta_l": 0.0021622725762426853, "avg_loss_unweighted": 0.018275460228323936, "avg_weight": 0.5153253674507141, "std_delta_l": 0.014842175878584385, "std_weight": 0.02260098047554493, "weighted_loss": 0.009615369141101837 }, { "avg_delta_l": 0.004026579670608044, "avg_loss_unweighted": 0.01958831213414669, "avg_weight": 0.5126476287841797, "std_delta_l": 0.0154389888048172, "std_weight": 0.020469527691602707, "weighted_loss": 0.01020083948969841 }, { "epoch": 15.521739130434783, "grad_norm": 0.31373673938306584, "learning_rate": 1.4644660940672628e-06, "loss": 0.022, "step": 900 }, { "avg_delta_l": 0.0013940164353698492, "avg_loss_unweighted": 0.01899326965212822, "avg_weight": 0.5176061391830444, "std_delta_l": 0.015429076738655567, "std_weight": 0.026184674352407455, "weighted_loss": 0.010126622393727303 }, { "avg_delta_l": 0.0015389432664960623, "avg_loss_unweighted": 0.018093913793563843, "avg_weight": 0.5181937217712402, "std_delta_l": 0.017774874344468117, "std_weight": 0.02785726636648178, "weighted_loss": 0.009643806144595146 }, { "avg_delta_l": 0.005635260138660669, "avg_loss_unweighted": 0.018664808943867683, "avg_weight": 0.5128695368766785, "std_delta_l": 0.01718386635184288, "std_weight": 0.019886527210474014, "weighted_loss": 0.009746083058416843 }, { "avg_delta_l": -0.0008784602978266776, "avg_loss_unweighted": 0.02144555374979973, "avg_weight": 0.5229560136795044, "std_delta_l": 0.01694527268409729, "std_weight": 0.03226224333047867, "weighted_loss": 0.011574100703001022 }, { "avg_delta_l": 0.0008316901512444019, "avg_loss_unweighted": 0.023228909820318222, "avg_weight": 0.5223427414894104, "std_delta_l": 0.01999545469880104, "std_weight": 0.03246607631444931, "weighted_loss": 0.012555848807096481 }, { "avg_delta_l": 0.0014977396931499243, "avg_loss_unweighted": 0.019391415640711784, "avg_weight": 0.5173465013504028, "std_delta_l": 0.015684479847550392, "std_weight": 0.0216236375272274, "weighted_loss": 0.010275442153215408 }, { "avg_delta_l": 0.00362210301682353, "avg_loss_unweighted": 0.018589673563838005, "avg_weight": 0.5171179175376892, "std_delta_l": 0.017569031566381454, "std_weight": 0.026585783809423447, "weighted_loss": 0.009931903332471848 }, { "avg_delta_l": 0.0003685886040329933, "avg_loss_unweighted": 0.018801622092723846, "avg_weight": 0.5201413035392761, "std_delta_l": 0.016708245500922203, "std_weight": 0.025668710470199585, "weighted_loss": 0.010033832862973213 }, { "avg_delta_l": 0.00048660809989087284, "avg_loss_unweighted": 0.019178712740540504, "avg_weight": 0.521035373210907, "std_delta_l": 0.016429023817181587, "std_weight": 0.024171672761440277, "weighted_loss": 0.01035253144800663 }, { "avg_delta_l": -0.000507015036419034, "avg_loss_unweighted": 0.02236834168434143, "avg_weight": 0.5163020491600037, "std_delta_l": 0.013487671501934528, "std_weight": 0.022423993796110153, "weighted_loss": 0.011653082445263863 }, { "avg_delta_l": 0.0003147660754621029, "avg_loss_unweighted": 0.020129645243287086, "avg_weight": 0.5217928886413574, "std_delta_l": 0.018314553424715996, "std_weight": 0.02722414955496788, "weighted_loss": 0.010796604678034782 }, { "avg_delta_l": 0.0017470091115683317, "avg_loss_unweighted": 0.021008865907788277, "avg_weight": 0.520759105682373, "std_delta_l": 0.016143741086125374, "std_weight": 0.023471103981137276, "weighted_loss": 0.011243375018239021 }, { "avg_delta_l": 0.004582534544169903, "avg_loss_unweighted": 0.019545473158359528, "avg_weight": 0.515443742275238, "std_delta_l": 0.017959902063012123, "std_weight": 0.0228403490036726, "weighted_loss": 0.010275899432599545 }, { "avg_delta_l": 0.00013999186921864748, "avg_loss_unweighted": 0.02127433754503727, "avg_weight": 0.5221587419509888, "std_delta_l": 0.018417885527014732, "std_weight": 0.028482861816883087, "weighted_loss": 0.011458959430456161 }, { "avg_delta_l": 0.0007070485153235495, "avg_loss_unweighted": 0.01781109906733036, "avg_weight": 0.5228923559188843, "std_delta_l": 0.020802481099963188, "std_weight": 0.03356335312128067, "weighted_loss": 0.009683472104370594 }, { "avg_delta_l": 0.00023572909412905574, "avg_loss_unweighted": 0.020789597183465958, "avg_weight": 0.5250157713890076, "std_delta_l": 0.02064051479101181, "std_weight": 0.03295953944325447, "weighted_loss": 0.011212476529181004 }, { "avg_delta_l": -0.0016758125275373459, "avg_loss_unweighted": 0.01905922032892704, "avg_weight": 0.5202425122261047, "std_delta_l": 0.01477315928786993, "std_weight": 0.028291424736380577, "weighted_loss": 0.010183168575167656 }, { "avg_delta_l": 0.003595862304791808, "avg_loss_unweighted": 0.019978897646069527, "avg_weight": 0.5148242712020874, "std_delta_l": 0.016921959817409515, "std_weight": 0.021714508533477783, "weighted_loss": 0.010490021668374538 }, { "avg_delta_l": 0.0007878508185967803, "avg_loss_unweighted": 0.018234865739941597, "avg_weight": 0.5168612003326416, "std_delta_l": 0.013974669389426708, "std_weight": 0.020835503935813904, "weighted_loss": 0.009685358963906765 }, { "avg_delta_l": -0.0011274886783212423, "avg_loss_unweighted": 0.02046387642621994, "avg_weight": 0.5247162580490112, "std_delta_l": 0.01887364499270916, "std_weight": 0.03200644999742508, "weighted_loss": 0.011055218987166882 }, { "epoch": 15.695652173913043, "grad_norm": 0.3350500036874474, "learning_rate": 1.359691905047527e-06, "loss": 0.0212, "step": 910 }, { "avg_delta_l": 0.004061304498463869, "avg_loss_unweighted": 0.018157124519348145, "avg_weight": 0.5137883424758911, "std_delta_l": 0.015591196715831757, "std_weight": 0.018629057332873344, "weighted_loss": 0.009464964270591736 }, { "avg_delta_l": -0.003450620686635375, "avg_loss_unweighted": 0.022935358807444572, "avg_weight": 0.5255196690559387, "std_delta_l": 0.015134996734559536, "std_weight": 0.028514614328742027, "weighted_loss": 0.012384156696498394 }, { "avg_delta_l": -0.0027676515746861696, "avg_loss_unweighted": 0.022033851593732834, "avg_weight": 0.525229275226593, "std_delta_l": 0.0168205164372921, "std_weight": 0.032059088349342346, "weighted_loss": 0.011966515332460403 }, { "avg_delta_l": -0.0010234988294541836, "avg_loss_unweighted": 0.02032344415783882, "avg_weight": 0.5217001438140869, "std_delta_l": 0.015513589605689049, "std_weight": 0.023374494165182114, "weighted_loss": 0.010852334089577198 }, { "avg_delta_l": 0.0012215753085911274, "avg_loss_unweighted": 0.023634744808077812, "avg_weight": 0.5212749242782593, "std_delta_l": 0.018311593681573868, "std_weight": 0.028000224381685257, "weighted_loss": 0.012640969827771187 }, { "avg_delta_l": -0.0007429656106978655, "avg_loss_unweighted": 0.02116873860359192, "avg_weight": 0.5240675806999207, "std_delta_l": 0.015756605193018913, "std_weight": 0.02696751244366169, "weighted_loss": 0.011473585851490498 }, { "avg_delta_l": -0.0016281579155474901, "avg_loss_unweighted": 0.022746186703443527, "avg_weight": 0.5242373943328857, "std_delta_l": 0.016662761569023132, "std_weight": 0.02852075919508934, "weighted_loss": 0.012247107923030853 }, { "avg_delta_l": 0.00074991793371737, "avg_loss_unweighted": 0.01994858682155609, "avg_weight": 0.5228366851806641, "std_delta_l": 0.01997794210910797, "std_weight": 0.032132089138031006, "weighted_loss": 0.010730169713497162 }, { "avg_delta_l": 0.001020863652229309, "avg_loss_unweighted": 0.018836744129657745, "avg_weight": 0.5237405896186829, "std_delta_l": 0.02026209607720375, "std_weight": 0.029975198209285736, "weighted_loss": 0.010257849469780922 }, { "avg_delta_l": 0.0039788964204490185, "avg_loss_unweighted": 0.01753615215420723, "avg_weight": 0.512056827545166, "std_delta_l": 0.014930976554751396, "std_weight": 0.019298972561955452, "weighted_loss": 0.009082155302166939 }, { "avg_delta_l": 0.0017033369513228536, "avg_loss_unweighted": 0.018640462309122086, "avg_weight": 0.5189487338066101, "std_delta_l": 0.017524465918540955, "std_weight": 0.027936942875385284, "weighted_loss": 0.009912216104567051 }, { "avg_delta_l": 0.0036374451592564583, "avg_loss_unweighted": 0.01891789585351944, "avg_weight": 0.5167516469955444, "std_delta_l": 0.01761460117995739, "std_weight": 0.02576032280921936, "weighted_loss": 0.01010357029736042 }, { "avg_delta_l": -0.0013286108151078224, "avg_loss_unweighted": 0.02321961522102356, "avg_weight": 0.5238926410675049, "std_delta_l": 0.017768969759345055, "std_weight": 0.028519418090581894, "weighted_loss": 0.01257537305355072 }, { "avg_delta_l": -0.001358194975182414, "avg_loss_unweighted": 0.020298685878515244, "avg_weight": 0.5235987305641174, "std_delta_l": 0.016378888860344887, "std_weight": 0.028015688061714172, "weighted_loss": 0.011155758053064346 }, { "avg_delta_l": -0.0027774928603321314, "avg_loss_unweighted": 0.020778201520442963, "avg_weight": 0.5267404317855835, "std_delta_l": 0.015103807672858238, "std_weight": 0.027949519455432892, "weighted_loss": 0.011392282322049141 }, { "avg_delta_l": 0.0006999342003837228, "avg_loss_unweighted": 0.02085133083164692, "avg_weight": 0.5207288861274719, "std_delta_l": 0.01861618272960186, "std_weight": 0.030321087688207626, "weighted_loss": 0.011245961301028728 }, { "avg_delta_l": 0.0031372630037367344, "avg_loss_unweighted": 0.018211621791124344, "avg_weight": 0.5189570784568787, "std_delta_l": 0.019962072372436523, "std_weight": 0.028111478313803673, "weighted_loss": 0.009737218730151653 }, { "avg_delta_l": 0.0010279400739818811, "avg_loss_unweighted": 0.021444838494062424, "avg_weight": 0.5219014883041382, "std_delta_l": 0.015648139640688896, "std_weight": 0.024064410477876663, "weighted_loss": 0.01163501013070345 }, { "avg_delta_l": -0.00029588007600978017, "avg_loss_unweighted": 0.020703230053186417, "avg_weight": 0.5242760181427002, "std_delta_l": 0.01997663453221321, "std_weight": 0.036147888749837875, "weighted_loss": 0.011304649524390697 }, { "avg_delta_l": -0.002199434209614992, "avg_loss_unweighted": 0.02101883664727211, "avg_weight": 0.5254149436950684, "std_delta_l": 0.018466809764504433, "std_weight": 0.03209993615746498, "weighted_loss": 0.011336266063153744 }, { "epoch": 15.869565217391305, "grad_norm": 0.3463977995084705, "learning_rate": 1.258213844398226e-06, "loss": 0.0221, "step": 920 }, { "avg_delta_l": 0.00271393870934844, "avg_loss_unweighted": 0.016733800992369652, "avg_weight": 0.5192806720733643, "std_delta_l": 0.018705155700445175, "std_weight": 0.025760218501091003, "weighted_loss": 0.008979607373476028 }, { "avg_delta_l": 0.0011483568232506514, "avg_loss_unweighted": 0.019170530140399933, "avg_weight": 0.519675076007843, "std_delta_l": 0.01799160987138748, "std_weight": 0.026655040681362152, "weighted_loss": 0.010166401043534279 }, { "avg_delta_l": 0.0026116191875189543, "avg_loss_unweighted": 0.01686159148812294, "avg_weight": 0.5182825922966003, "std_delta_l": 0.01680750772356987, "std_weight": 0.022990809753537178, "weighted_loss": 0.009066406637430191 }, { "avg_delta_l": -0.0017303016502410173, "avg_loss_unweighted": 0.023950498551130295, "avg_weight": 0.5227311849594116, "std_delta_l": 0.016331057995557785, "std_weight": 0.02839021012187004, "weighted_loss": 0.012810438871383667 }, { "avg_delta_l": 0.0011295868316665292, "avg_loss_unweighted": 0.020506680011749268, "avg_weight": 0.5214318633079529, "std_delta_l": 0.018002960830926895, "std_weight": 0.025793924927711487, "weighted_loss": 0.01100701279938221 }, { "avg_delta_l": 0.0013457158347591758, "avg_loss_unweighted": 0.022332195192575455, "avg_weight": 0.5240743160247803, "std_delta_l": 0.021690845489501953, "std_weight": 0.03746962174773216, "weighted_loss": 0.012078830972313881 }, { "avg_delta_l": 0.0054052057676017284, "avg_loss_unweighted": 0.017867231741547585, "avg_weight": 0.513954222202301, "std_delta_l": 0.01601225882768631, "std_weight": 0.02195427194237709, "weighted_loss": 0.009431984275579453 }, { "avg_delta_l": 0.0034687635488808155, "avg_loss_unweighted": 0.02073568105697632, "avg_weight": 0.5171297788619995, "std_delta_l": 0.015405778773128986, "std_weight": 0.021767208352684975, "weighted_loss": 0.010941311717033386 }, { "avg_delta_l": -0.0016347737982869148, "avg_loss_unweighted": 0.021252062171697617, "avg_weight": 0.5243372917175293, "std_delta_l": 0.01631506346166134, "std_weight": 0.02541697584092617, "weighted_loss": 0.011376092210412025 }, { "avg_delta_l": -0.0005225756904110312, "avg_loss_unweighted": 0.019482018426060677, "avg_weight": 0.5261124968528748, "std_delta_l": 0.022299302741885185, "std_weight": 0.03515128418803215, "weighted_loss": 0.010730513371527195 }, { "avg_delta_l": 0.0006095987046137452, "avg_loss_unweighted": 0.018821852281689644, "avg_weight": 0.5224267244338989, "std_delta_l": 0.01785990037024021, "std_weight": 0.027151914313435555, "weighted_loss": 0.010220940224826336 }, { "avg_delta_l": 0.0025445919018238783, "avg_loss_unweighted": 0.018710292875766754, "avg_weight": 0.5188571214675903, "std_delta_l": 0.017663901671767235, "std_weight": 0.025065375491976738, "weighted_loss": 0.00993289053440094 }, { "avg_delta_l": -0.001769122201949358, "avg_loss_unweighted": 0.023912906646728516, "avg_weight": 0.5293063521385193, "std_delta_l": 0.018929289653897285, "std_weight": 0.03144034370779991, "weighted_loss": 0.013123634271323681 }, { "avg_delta_l": -0.0037879454903304577, "avg_loss_unweighted": 0.020398296415805817, "avg_weight": 0.53061842918396, "std_delta_l": 0.01919589191675186, "std_weight": 0.03242383897304535, "weighted_loss": 0.01116352342069149 }, { "avg_delta_l": 0.0038520274683833122, "avg_loss_unweighted": 0.01782761700451374, "avg_weight": 0.5171269178390503, "std_delta_l": 0.01618373394012451, "std_weight": 0.022542797029018402, "weighted_loss": 0.009541163221001625 }, { "avg_delta_l": 0.0030313939787447453, "avg_loss_unweighted": 0.016457192599773407, "avg_weight": 0.508060097694397, "std_delta_l": 0.011098822578787804, "std_weight": 0.014384904876351357, "weighted_loss": 0.008442540653049946 }, { "avg_delta_l": 0.0018528795335441828, "avg_loss_unweighted": 0.016749270260334015, "avg_weight": 0.5105131268501282, "std_delta_l": 0.011948523111641407, "std_weight": 0.01650635525584221, "weighted_loss": 0.008698568679392338 }, { "avg_delta_l": 0.0008268409874290228, "avg_loss_unweighted": 0.01830175705254078, "avg_weight": 0.5166245102882385, "std_delta_l": 0.01595974713563919, "std_weight": 0.02591911517083645, "weighted_loss": 0.009762406349182129 }, { "avg_delta_l": 0.0030910936184227467, "avg_loss_unweighted": 0.014769432134926319, "avg_weight": 0.5087864398956299, "std_delta_l": 0.010843303985893726, "std_weight": 0.010440356098115444, "weighted_loss": 0.007584592327475548 }, { "epoch": 16.034782608695654, "grad_norm": 0.280270450285988, "learning_rate": 1.16012379574733e-06, "loss": 0.0195, "step": 930 }, { "avg_delta_l": 0.002756161382421851, "avg_loss_unweighted": 0.01632426120340824, "avg_weight": 0.5096110105514526, "std_delta_l": 0.012662648223340511, "std_weight": 0.015181073918938637, "weighted_loss": 0.008415519259870052 }, { "avg_delta_l": 0.001688922755420208, "avg_loss_unweighted": 0.016835933551192284, "avg_weight": 0.507392168045044, "std_delta_l": 0.009228168055415154, "std_weight": 0.012766041792929173, "weighted_loss": 0.008624027483165264 }, { "avg_delta_l": 0.00798279233276844, "avg_loss_unweighted": 0.013537788763642311, "avg_weight": 0.502612829208374, "std_delta_l": 0.011503459885716438, "std_weight": 0.00395279610529542, "weighted_loss": 0.006825513206422329 }, { "avg_delta_l": 0.0019040381303057075, "avg_loss_unweighted": 0.017764292657375336, "avg_weight": 0.5078644752502441, "std_delta_l": 0.008745756931602955, "std_weight": 0.012145712040364742, "weighted_loss": 0.009109009057283401 }, { "avg_delta_l": 0.002514849416911602, "avg_loss_unweighted": 0.0217963308095932, "avg_weight": 0.5097654461860657, "std_delta_l": 0.010306200012564659, "std_weight": 0.012865599244832993, "weighted_loss": 0.01121537946164608 }, { "avg_delta_l": 0.002991043496876955, "avg_loss_unweighted": 0.01806066744029522, "avg_weight": 0.5054657459259033, "std_delta_l": 0.006819083821028471, "std_weight": 0.006299953907728195, "weighted_loss": 0.009149981662631035 }, { "avg_delta_l": 0.0005021883407607675, "avg_loss_unweighted": 0.016917502507567406, "avg_weight": 0.5131964087486267, "std_delta_l": 0.012015845626592636, "std_weight": 0.020636549219489098, "weighted_loss": 0.00884932279586792 }, { "avg_delta_l": 0.002564248163253069, "avg_loss_unweighted": 0.021031895652413368, "avg_weight": 0.5117013454437256, "std_delta_l": 0.013512138277292252, "std_weight": 0.020032433792948723, "weighted_loss": 0.010885804891586304 }, { "avg_delta_l": 0.0018740335945039988, "avg_loss_unweighted": 0.016075916588306427, "avg_weight": 0.513536274433136, "std_delta_l": 0.013388128019869328, "std_weight": 0.019310420379042625, "weighted_loss": 0.00846879743039608 }, { "avg_delta_l": 0.0021640390623360872, "avg_loss_unweighted": 0.017167937010526657, "avg_weight": 0.5108725428581238, "std_delta_l": 0.010799416340887547, "std_weight": 0.013701281510293484, "weighted_loss": 0.008879072964191437 }, { "avg_delta_l": 0.003279689699411392, "avg_loss_unweighted": 0.014963774010539055, "avg_weight": 0.5086478590965271, "std_delta_l": 0.011897879652678967, "std_weight": 0.01438883412629366, "weighted_loss": 0.007733928505331278 }, { "avg_delta_l": 0.0025787760969251394, "avg_loss_unweighted": 0.017635663971304893, "avg_weight": 0.5104041695594788, "std_delta_l": 0.011107666417956352, "std_weight": 0.013870161958038807, "weighted_loss": 0.009153793565928936 }, { "avg_delta_l": 0.002160194329917431, "avg_loss_unweighted": 0.014586076140403748, "avg_weight": 0.5093068480491638, "std_delta_l": 0.012213604524731636, "std_weight": 0.016925403848290443, "weighted_loss": 0.007578500546514988 }, { "avg_delta_l": 0.001991087105125189, "avg_loss_unweighted": 0.020149165764451027, "avg_weight": 0.5122666358947754, "std_delta_l": 0.011426559649407864, "std_weight": 0.01566186174750328, "weighted_loss": 0.010523904114961624 }, { "avg_delta_l": 0.0022852544207125902, "avg_loss_unweighted": 0.018737534061074257, "avg_weight": 0.5087897181510925, "std_delta_l": 0.008848468773066998, "std_weight": 0.01061891857534647, "weighted_loss": 0.0096554234623909 }, { "avg_delta_l": 0.0016983312088996172, "avg_loss_unweighted": 0.02024991810321808, "avg_weight": 0.5129332542419434, "std_delta_l": 0.013525179587304592, "std_weight": 0.021930785849690437, "weighted_loss": 0.010551992803812027 }, { "avg_delta_l": 0.00153742718975991, "avg_loss_unweighted": 0.019983302801847458, "avg_weight": 0.51145339012146, "std_delta_l": 0.011577334254980087, "std_weight": 0.01657026819884777, "weighted_loss": 0.010344533249735832 }, { "avg_delta_l": 0.0015816978411749005, "avg_loss_unweighted": 0.019212882965803146, "avg_weight": 0.511019766330719, "std_delta_l": 0.011945140548050404, "std_weight": 0.01722383126616478, "weighted_loss": 0.009963074699044228 }, { "avg_delta_l": -0.00054071907652542, "avg_loss_unweighted": 0.01625683344900608, "avg_weight": 0.5151482820510864, "std_delta_l": 0.012407123111188412, "std_weight": 0.021368855610489845, "weighted_loss": 0.008630817756056786 }, { "avg_delta_l": 0.0010822892654687166, "avg_loss_unweighted": 0.01871507801115513, "avg_weight": 0.5107126235961914, "std_delta_l": 0.01025393046438694, "std_weight": 0.015263168141245842, "weighted_loss": 0.009660162031650543 }, { "epoch": 16.208695652173912, "grad_norm": 0.27106560554948006, "learning_rate": 1.0655105750367245e-06, "loss": 0.0184, "step": 940 }, { "avg_delta_l": 0.0023932638578116894, "avg_loss_unweighted": 0.02019691839814186, "avg_weight": 0.5121749639511108, "std_delta_l": 0.01239200308918953, "std_weight": 0.017463546246290207, "weighted_loss": 0.010492496192455292 }, { "avg_delta_l": 0.0016489842673763633, "avg_loss_unweighted": 0.01767164282500744, "avg_weight": 0.5089872479438782, "std_delta_l": 0.010227741673588753, "std_weight": 0.016371499747037888, "weighted_loss": 0.009100126102566719 }, { "avg_delta_l": 0.0015179375186562538, "avg_loss_unweighted": 0.018212471157312393, "avg_weight": 0.515365481376648, "std_delta_l": 0.014102790504693985, "std_weight": 0.02069273591041565, "weighted_loss": 0.009544470347464085 }, { "avg_delta_l": 0.003383045084774494, "avg_loss_unweighted": 0.01828909106552601, "avg_weight": 0.510924220085144, "std_delta_l": 0.012129608541727066, "std_weight": 0.015519064851105213, "weighted_loss": 0.009455175139009953 }, { "avg_delta_l": 0.003224768675863743, "avg_loss_unweighted": 0.017573853954672813, "avg_weight": 0.5094790458679199, "std_delta_l": 0.012048611417412758, "std_weight": 0.016944557428359985, "weighted_loss": 0.009067915380001068 }, { "avg_delta_l": 0.0035399377811700106, "avg_loss_unweighted": 0.014301103539764881, "avg_weight": 0.5079295635223389, "std_delta_l": 0.011260082945227623, "std_weight": 0.01466347835958004, "weighted_loss": 0.007323156110942364 }, { "avg_delta_l": 0.0031250384636223316, "avg_loss_unweighted": 0.018010782077908516, "avg_weight": 0.5136401057243347, "std_delta_l": 0.014569194987416267, "std_weight": 0.020210668444633484, "weighted_loss": 0.009449385106563568 }, { "avg_delta_l": -0.0004895374295301735, "avg_loss_unweighted": 0.019609039649367332, "avg_weight": 0.5150004625320435, "std_delta_l": 0.012344482354819775, "std_weight": 0.023997383192181587, "weighted_loss": 0.010289670899510384 }, { "avg_delta_l": 0.001679133391007781, "avg_loss_unweighted": 0.016481861472129822, "avg_weight": 0.5127766132354736, "std_delta_l": 0.013065453618764877, "std_weight": 0.021492093801498413, "weighted_loss": 0.008609097450971603 }, { "avg_delta_l": 0.0012568830279633403, "avg_loss_unweighted": 0.019252147525548935, "avg_weight": 0.5093687176704407, "std_delta_l": 0.010481122881174088, "std_weight": 0.015507247298955917, "weighted_loss": 0.009896762669086456 }, { "avg_delta_l": 0.004338636063039303, "avg_loss_unweighted": 0.014313762076199055, "avg_weight": 0.5097732543945312, "std_delta_l": 0.013203638605773449, "std_weight": 0.01474824734032154, "weighted_loss": 0.007370014674961567 }, { "avg_delta_l": 0.00042465259321033955, "avg_loss_unweighted": 0.01673314720392227, "avg_weight": 0.5143863558769226, "std_delta_l": 0.013955104164779186, "std_weight": 0.02300095744431019, "weighted_loss": 0.00881447084248066 }, { "avg_delta_l": 0.0026212420780211687, "avg_loss_unweighted": 0.017593322321772575, "avg_weight": 0.5133838057518005, "std_delta_l": 0.014216003939509392, "std_weight": 0.021890966221690178, "weighted_loss": 0.00920606404542923 }, { "avg_delta_l": 0.004198574461042881, "avg_loss_unweighted": 0.013918126001954079, "avg_weight": 0.5093144774436951, "std_delta_l": 0.012396082282066345, "std_weight": 0.014777462929487228, "weighted_loss": 0.007201514672487974 }, { "avg_delta_l": 0.004857126623392105, "avg_loss_unweighted": 0.01615373231470585, "avg_weight": 0.5060045719146729, "std_delta_l": 0.010087183676660061, "std_weight": 0.010644350200891495, "weighted_loss": 0.008253656327724457 }, { "avg_delta_l": 0.0015611319104209542, "avg_loss_unweighted": 0.01893230527639389, "avg_weight": 0.5161491632461548, "std_delta_l": 0.01523839496076107, "std_weight": 0.0245792455971241, "weighted_loss": 0.009992234408855438 }, { "avg_delta_l": 0.00258639152161777, "avg_loss_unweighted": 0.016750479117035866, "avg_weight": 0.5113065838813782, "std_delta_l": 0.012863497249782085, "std_weight": 0.017533497884869576, "weighted_loss": 0.00865522213280201 }, { "avg_delta_l": -0.001152528333477676, "avg_loss_unweighted": 0.018888505175709724, "avg_weight": 0.5181565284729004, "std_delta_l": 0.013707129284739494, "std_weight": 0.024765485897660255, "weighted_loss": 0.010016215965151787 }, { "avg_delta_l": 0.003136141924187541, "avg_loss_unweighted": 0.01824996992945671, "avg_weight": 0.5116901397705078, "std_delta_l": 0.014087891206145287, "std_weight": 0.01895040273666382, "weighted_loss": 0.009432245045900345 }, { "avg_delta_l": 0.004348048474639654, "avg_loss_unweighted": 0.017114706337451935, "avg_weight": 0.5123304724693298, "std_delta_l": 0.015350041911005974, "std_weight": 0.020531389862298965, "weighted_loss": 0.008976219221949577 }, { "epoch": 16.382608695652173, "grad_norm": 0.2753966291450714, "learning_rate": 9.744598501035468e-07, "loss": 0.0181, "step": 950 }, { "avg_delta_l": 0.000464198732515797, "avg_loss_unweighted": 0.017090098932385445, "avg_weight": 0.5164493322372437, "std_delta_l": 0.01460331678390503, "std_weight": 0.025341618806123734, "weighted_loss": 0.009093066677451134 }, { "avg_delta_l": 0.0024565563071519136, "avg_loss_unweighted": 0.01588759571313858, "avg_weight": 0.5114567875862122, "std_delta_l": 0.013250716961920261, "std_weight": 0.01825837977230549, "weighted_loss": 0.008266419172286987 }, { "avg_delta_l": -0.0009186926181428134, "avg_loss_unweighted": 0.01877441257238388, "avg_weight": 0.5169666409492493, "std_delta_l": 0.01313912682235241, "std_weight": 0.02452962100505829, "weighted_loss": 0.009950836189091206 }, { "avg_delta_l": 0.0012820919509977102, "avg_loss_unweighted": 0.018147775903344154, "avg_weight": 0.5153676271438599, "std_delta_l": 0.01492065005004406, "std_weight": 0.021711058914661407, "weighted_loss": 0.009523141197860241 }, { "avg_delta_l": -0.0007071216241456568, "avg_loss_unweighted": 0.01796029880642891, "avg_weight": 0.5172950029373169, "std_delta_l": 0.013354332186281681, "std_weight": 0.023258037865161896, "weighted_loss": 0.009575012139976025 }, { "avg_delta_l": 0.005999566987156868, "avg_loss_unweighted": 0.015566055662930012, "avg_weight": 0.5084227919578552, "std_delta_l": 0.011410088278353214, "std_weight": 0.01349897775799036, "weighted_loss": 0.008018347434699535 }, { "avg_delta_l": 0.00013346006744541228, "avg_loss_unweighted": 0.016336271539330482, "avg_weight": 0.5154673457145691, "std_delta_l": 0.01296256110072136, "std_weight": 0.02268024906516075, "weighted_loss": 0.008625742979347706 }, { "avg_delta_l": 0.003503000596538186, "avg_loss_unweighted": 0.014942958950996399, "avg_weight": 0.5084421634674072, "std_delta_l": 0.009849945083260536, "std_weight": 0.01175575703382492, "weighted_loss": 0.0076573072001338005 }, { "avg_delta_l": 0.0041455840691924095, "avg_loss_unweighted": 0.015250218100845814, "avg_weight": 0.5126445293426514, "std_delta_l": 0.015060344710946083, "std_weight": 0.019026018679142, "weighted_loss": 0.008017700165510178 }, { "avg_delta_l": 0.005009640008211136, "avg_loss_unweighted": 0.016156142577528954, "avg_weight": 0.5083238482475281, "std_delta_l": 0.013077514246106148, "std_weight": 0.016523543745279312, "weighted_loss": 0.008315753191709518 }, { "avg_delta_l": 0.0002576152328401804, "avg_loss_unweighted": 0.016824740916490555, "avg_weight": 0.5155296325683594, "std_delta_l": 0.01330646127462387, "std_weight": 0.021443834528326988, "weighted_loss": 0.008890809491276741 }, { "avg_delta_l": -0.0003246357664465904, "avg_loss_unweighted": 0.017731672152876854, "avg_weight": 0.5166964530944824, "std_delta_l": 0.012522684410214424, "std_weight": 0.021254345774650574, "weighted_loss": 0.00940551795065403 }, { "avg_delta_l": 0.0035319356247782707, "avg_loss_unweighted": 0.016275808215141296, "avg_weight": 0.5102887153625488, "std_delta_l": 0.01246813777834177, "std_weight": 0.015287603251636028, "weighted_loss": 0.008450128138065338 }, { "avg_delta_l": 0.006640541832894087, "avg_loss_unweighted": 0.015268415212631226, "avg_weight": 0.510662317276001, "std_delta_l": 0.015902237966656685, "std_weight": 0.016916904598474503, "weighted_loss": 0.007921567186713219 }, { "avg_delta_l": -0.0006662009982392192, "avg_loss_unweighted": 0.018017824739217758, "avg_weight": 0.5153892636299133, "std_delta_l": 0.011393939144909382, "std_weight": 0.019528307020664215, "weighted_loss": 0.009525970555841923 }, { "avg_delta_l": 0.001346335164271295, "avg_loss_unweighted": 0.018827438354492188, "avg_weight": 0.5153460502624512, "std_delta_l": 0.014981625601649284, "std_weight": 0.023644469678401947, "weighted_loss": 0.009910515509545803 }, { "avg_delta_l": -0.0002559757558628917, "avg_loss_unweighted": 0.01756523735821247, "avg_weight": 0.5154657363891602, "std_delta_l": 0.012425864115357399, "std_weight": 0.01873813197016716, "weighted_loss": 0.009249736554920673 }, { "avg_delta_l": -0.0005832749884575605, "avg_loss_unweighted": 0.01918902061879635, "avg_weight": 0.5173063278198242, "std_delta_l": 0.013516683131456375, "std_weight": 0.022016262635588646, "weighted_loss": 0.010129708796739578 }, { "avg_delta_l": 0.0007393269333988428, "avg_loss_unweighted": 0.016042517498135567, "avg_weight": 0.5169001817703247, "std_delta_l": 0.01500055193901062, "std_weight": 0.02220328152179718, "weighted_loss": 0.008510109037160873 }, { "avg_delta_l": 0.005745766684412956, "avg_loss_unweighted": 0.015717221423983574, "avg_weight": 0.5081480145454407, "std_delta_l": 0.013757571578025818, "std_weight": 0.015755388885736465, "weighted_loss": 0.008079515770077705 }, { "epoch": 16.556521739130435, "grad_norm": 0.25972798576196343, "learning_rate": 8.870540631119667e-07, "loss": 0.0177, "step": 960 }, { "avg_delta_l": -0.0005618429277092218, "avg_loss_unweighted": 0.018142355605959892, "avg_weight": 0.5143978595733643, "std_delta_l": 0.011499369516968727, "std_weight": 0.02015882171690464, "weighted_loss": 0.00950412917882204 }, { "avg_delta_l": 0.0027388520538806915, "avg_loss_unweighted": 0.014864606782793999, "avg_weight": 0.5131934881210327, "std_delta_l": 0.015872091054916382, "std_weight": 0.021014409139752388, "weighted_loss": 0.007803791202604771 }, { "avg_delta_l": 0.003510743146762252, "avg_loss_unweighted": 0.016836224123835564, "avg_weight": 0.5154799818992615, "std_delta_l": 0.014454193413257599, "std_weight": 0.021866945549845695, "weighted_loss": 0.008978196419775486 }, { "avg_delta_l": 0.0014265398494899273, "avg_loss_unweighted": 0.017769385129213333, "avg_weight": 0.518140435218811, "std_delta_l": 0.0156796183437109, "std_weight": 0.023927530273795128, "weighted_loss": 0.009380154311656952 }, { "avg_delta_l": 0.0031183501705527306, "avg_loss_unweighted": 0.01814286969602108, "avg_weight": 0.5182219743728638, "std_delta_l": 0.01871638372540474, "std_weight": 0.027005955576896667, "weighted_loss": 0.009627830237150192 }, { "avg_delta_l": 0.00041331141255795956, "avg_loss_unweighted": 0.018110398203134537, "avg_weight": 0.5185195207595825, "std_delta_l": 0.014638884924352169, "std_weight": 0.022996703162789345, "weighted_loss": 0.009647206403315067 }, { "avg_delta_l": 0.002706388244405389, "avg_loss_unweighted": 0.01797734759747982, "avg_weight": 0.513041615486145, "std_delta_l": 0.014789493754506111, "std_weight": 0.019489508122205734, "weighted_loss": 0.009385412558913231 }, { "avg_delta_l": -0.0009908813517540693, "avg_loss_unweighted": 0.01706971600651741, "avg_weight": 0.5176069140434265, "std_delta_l": 0.013590382412075996, "std_weight": 0.02639438584446907, "weighted_loss": 0.008975655771791935 }, { "avg_delta_l": 0.0026412555016577244, "avg_loss_unweighted": 0.01624486967921257, "avg_weight": 0.5121405720710754, "std_delta_l": 0.012813728302717209, "std_weight": 0.017158903181552887, "weighted_loss": 0.008474627509713173 }, { "avg_delta_l": 0.0020374630112200975, "avg_loss_unweighted": 0.015716563910245895, "avg_weight": 0.5167787075042725, "std_delta_l": 0.015192786231637001, "std_weight": 0.020934371277689934, "weighted_loss": 0.008304529823362827 }, { "avg_delta_l": 0.0009722727118059993, "avg_loss_unweighted": 0.017081957310438156, "avg_weight": 0.5186498165130615, "std_delta_l": 0.013873763382434845, "std_weight": 0.019645927473902702, "weighted_loss": 0.009029950015246868 }, { "avg_delta_l": 0.002468401100486517, "avg_loss_unweighted": 0.018584076315164566, "avg_weight": 0.5163536667823792, "std_delta_l": 0.016896406188607216, "std_weight": 0.026790577918291092, "weighted_loss": 0.009920299984514713 }, { "avg_delta_l": -0.0021690279245376587, "avg_loss_unweighted": 0.021090472117066383, "avg_weight": 0.5224126577377319, "std_delta_l": 0.013562479987740517, "std_weight": 0.026067912578582764, "weighted_loss": 0.011217865161597729 }, { "avg_delta_l": -0.002302877139300108, "avg_loss_unweighted": 0.017038648948073387, "avg_weight": 0.5183456540107727, "std_delta_l": 0.012475891038775444, "std_weight": 0.023636003956198692, "weighted_loss": 0.00909013394266367 }, { "avg_delta_l": 0.0030129258520901203, "avg_loss_unweighted": 0.015692586079239845, "avg_weight": 0.5129709839820862, "std_delta_l": 0.014592500403523445, "std_weight": 0.016794700175523758, "weighted_loss": 0.00823782104998827 }, { "avg_delta_l": 0.0032864902168512344, "avg_loss_unweighted": 0.017212845385074615, "avg_weight": 0.5145787596702576, "std_delta_l": 0.015611381269991398, "std_weight": 0.02186746522784233, "weighted_loss": 0.009045501239597797 }, { "avg_delta_l": -0.0018891686340793967, "avg_loss_unweighted": 0.018488135188817978, "avg_weight": 0.5233811736106873, "std_delta_l": 0.016957957297563553, "std_weight": 0.02700965665280819, "weighted_loss": 0.009931370615959167 }, { "avg_delta_l": 0.0015849852934479713, "avg_loss_unweighted": 0.014496934600174427, "avg_weight": 0.5148517489433289, "std_delta_l": 0.013667989522218704, "std_weight": 0.020701950415968895, "weighted_loss": 0.007619985844939947 }, { "avg_delta_l": 0.001104984199628234, "avg_loss_unweighted": 0.01730123534798622, "avg_weight": 0.5212680101394653, "std_delta_l": 0.01621648296713829, "std_weight": 0.02298872172832489, "weighted_loss": 0.009292714297771454 }, { "avg_delta_l": -0.00190419377759099, "avg_loss_unweighted": 0.01999897137284279, "avg_weight": 0.5267012119293213, "std_delta_l": 0.019928818568587303, "std_weight": 0.03324027732014656, "weighted_loss": 0.010934569872915745 }, { "epoch": 16.730434782608697, "grad_norm": 0.3589528256418049, "learning_rate": 8.0337235590568e-07, "loss": 0.0184, "step": 970 }, { "avg_delta_l": 0.0022362619638442993, "avg_loss_unweighted": 0.014942772686481476, "avg_weight": 0.5122965574264526, "std_delta_l": 0.011423518881201744, "std_weight": 0.015339572913944721, "weighted_loss": 0.007775731850415468 }, { "avg_delta_l": -0.0006502033211290836, "avg_loss_unweighted": 0.018921412527561188, "avg_weight": 0.5190403461456299, "std_delta_l": 0.015521764755249023, "std_weight": 0.025596238672733307, "weighted_loss": 0.010028472170233727 }, { "avg_delta_l": 0.002487117424607277, "avg_loss_unweighted": 0.01514872070401907, "avg_weight": 0.5166293382644653, "std_delta_l": 0.01704028993844986, "std_weight": 0.02620156668126583, "weighted_loss": 0.008102558553218842 }, { "avg_delta_l": 0.00015979583258740604, "avg_loss_unweighted": 0.017181452363729477, "avg_weight": 0.5192691087722778, "std_delta_l": 0.015205471776425838, "std_weight": 0.022832300513982773, "weighted_loss": 0.009161878377199173 }, { "avg_delta_l": -0.001405605347827077, "avg_loss_unweighted": 0.021727250888943672, "avg_weight": 0.5213291645050049, "std_delta_l": 0.015279744751751423, "std_weight": 0.025671042501926422, "weighted_loss": 0.011587902903556824 }, { "avg_delta_l": -0.0005996730760671198, "avg_loss_unweighted": 0.01553682703524828, "avg_weight": 0.5179315209388733, "std_delta_l": 0.01327644195407629, "std_weight": 0.02261277660727501, "weighted_loss": 0.008262718096375465 }, { "avg_delta_l": 0.0028626862913370132, "avg_loss_unweighted": 0.014690298587083817, "avg_weight": 0.5150579810142517, "std_delta_l": 0.014404533430933952, "std_weight": 0.018227821215987206, "weighted_loss": 0.007746977265924215 }, { "avg_delta_l": 0.0005650765961036086, "avg_loss_unweighted": 0.016764486208558083, "avg_weight": 0.5168023705482483, "std_delta_l": 0.014441332779824734, "std_weight": 0.0226084366440773, "weighted_loss": 0.008889328688383102 }, { "avg_delta_l": -0.0004641501000151038, "avg_loss_unweighted": 0.019565051421523094, "avg_weight": 0.5173773169517517, "std_delta_l": 0.012959406711161137, "std_weight": 0.021952202543616295, "weighted_loss": 0.01029361505061388 }, { "avg_delta_l": 0.0012483862228691578, "avg_loss_unweighted": 0.01842862367630005, "avg_weight": 0.5175518989562988, "std_delta_l": 0.01338442973792553, "std_weight": 0.020041873678565025, "weighted_loss": 0.00965158548206091 }, { "avg_delta_l": 0.0005523789441213012, "avg_loss_unweighted": 0.017351029440760612, "avg_weight": 0.5189388394355774, "std_delta_l": 0.016590427607297897, "std_weight": 0.02587047405540943, "weighted_loss": 0.009235376492142677 }, { "avg_delta_l": 0.0006117273587733507, "avg_loss_unweighted": 0.019463665783405304, "avg_weight": 0.5168573260307312, "std_delta_l": 0.014972042292356491, "std_weight": 0.02374337613582611, "weighted_loss": 0.010335348546504974 }, { "avg_delta_l": -0.0009173662401735783, "avg_loss_unweighted": 0.015910930931568146, "avg_weight": 0.5212984681129456, "std_delta_l": 0.015517988242208958, "std_weight": 0.025778474286198616, "weighted_loss": 0.008584891445934772 }, { "avg_delta_l": 0.002454789588227868, "avg_loss_unweighted": 0.017158789560198784, "avg_weight": 0.5172015428543091, "std_delta_l": 0.01605980098247528, "std_weight": 0.022123055532574654, "weighted_loss": 0.00912568625062704 }, { "avg_delta_l": 0.002779175993055105, "avg_loss_unweighted": 0.015109985135495663, "avg_weight": 0.5138098001480103, "std_delta_l": 0.014615385793149471, "std_weight": 0.0193791426718235, "weighted_loss": 0.007969073951244354 }, { "avg_delta_l": 0.0027502584271132946, "avg_loss_unweighted": 0.018140239641070366, "avg_weight": 0.5155394077301025, "std_delta_l": 0.015189751982688904, "std_weight": 0.02112770639359951, "weighted_loss": 0.009613921865820885 }, { "avg_delta_l": 0.00015252630691975355, "avg_loss_unweighted": 0.01647462509572506, "avg_weight": 0.5218042135238647, "std_delta_l": 0.016842493787407875, "std_weight": 0.02510901167988777, "weighted_loss": 0.008957979269325733 }, { "avg_delta_l": -9.384457371197641e-06, "avg_loss_unweighted": 0.01726565696299076, "avg_weight": 0.5195217728614807, "std_delta_l": 0.014029123820364475, "std_weight": 0.01957685314118862, "weighted_loss": 0.009208611212670803 }, { "avg_delta_l": 0.00018926779739558697, "avg_loss_unweighted": 0.016518989577889442, "avg_weight": 0.5210398435592651, "std_delta_l": 0.0161360502243042, "std_weight": 0.02492581121623516, "weighted_loss": 0.00895884819328785 }, { "avg_delta_l": 0.0012184546794742346, "avg_loss_unweighted": 0.017424941062927246, "avg_weight": 0.5176334381103516, "std_delta_l": 0.015470752492547035, "std_weight": 0.021323973312973976, "weighted_loss": 0.009359537623822689 }, { "epoch": 16.904347826086955, "grad_norm": 0.2803539621961668, "learning_rate": 7.234904983486668e-07, "loss": 0.0183, "step": 980 }, { "avg_delta_l": 0.0016299675917252898, "avg_loss_unweighted": 0.01757109723985195, "avg_weight": 0.5195282697677612, "std_delta_l": 0.01773145981132984, "std_weight": 0.02429365925490856, "weighted_loss": 0.009341124445199966 }, { "avg_delta_l": 0.0038119538221508265, "avg_loss_unweighted": 0.01661565527319908, "avg_weight": 0.5172209143638611, "std_delta_l": 0.01877865567803383, "std_weight": 0.024427035823464394, "weighted_loss": 0.008818092755973339 }, { "avg_delta_l": 0.0003119230386801064, "avg_loss_unweighted": 0.019010746851563454, "avg_weight": 0.5202476382255554, "std_delta_l": 0.017376847565174103, "std_weight": 0.027450956404209137, "weighted_loss": 0.010180555284023285 }, { "avg_delta_l": -0.002451330656185746, "avg_loss_unweighted": 0.019731681793928146, "avg_weight": 0.524478018283844, "std_delta_l": 0.016278326511383057, "std_weight": 0.027700813487172127, "weighted_loss": 0.010619297623634338 }, { "avg_delta_l": 0.0019893983844667673, "avg_loss_unweighted": 0.015540511347353458, "avg_weight": 0.5130964517593384, "std_delta_l": 0.014077769592404366, "std_weight": 0.018270330503582954, "weighted_loss": 0.008134535513818264 }, { "avg_delta_l": -0.0006902299937792122, "avg_loss_unweighted": 0.015530777163803577, "avg_weight": 0.5240730047225952, "std_delta_l": 0.018220597878098488, "std_weight": 0.027940651401877403, "weighted_loss": 0.00846666656434536 }, { "avg_delta_l": -0.002666171407327056, "avg_loss_unweighted": 0.01833500526845455, "avg_weight": 0.5232138633728027, "std_delta_l": 0.014864780940115452, "std_weight": 0.028460755944252014, "weighted_loss": 0.00992524717003107 }, { "avg_delta_l": -0.002016346203163266, "avg_loss_unweighted": 0.02125697396695614, "avg_weight": 0.524587869644165, "std_delta_l": 0.01564997248351574, "std_weight": 0.0290373582392931, "weighted_loss": 0.011512653902173042 }, { "avg_delta_l": 0.0005118421395309269, "avg_loss_unweighted": 0.01805984415113926, "avg_weight": 0.5224819183349609, "std_delta_l": 0.017672860994935036, "std_weight": 0.0312067698687315, "weighted_loss": 0.009765686467289925 }, { "avg_delta_l": 0.0007589515880681574, "avg_loss_unweighted": 0.016357067972421646, "avg_weight": 0.5178879499435425, "std_delta_l": 0.014589336700737476, "std_weight": 0.02380974590778351, "weighted_loss": 0.00868661142885685 }, { "avg_delta_l": -0.00044762089964933693, "avg_loss_unweighted": 0.014735667034983635, "avg_weight": 0.5207532644271851, "std_delta_l": 0.01503837015479803, "std_weight": 0.023715155199170113, "weighted_loss": 0.007974264211952686 }, { "avg_delta_l": 0.001442858250811696, "avg_loss_unweighted": 0.014669036492705345, "avg_weight": 0.5059553384780884, "std_delta_l": 0.007150521036237478, "std_weight": 0.009364800527691841, "weighted_loss": 0.007504018489271402 }, { "avg_delta_l": 0.002418950665742159, "avg_loss_unweighted": 0.015692144632339478, "avg_weight": 0.5079072713851929, "std_delta_l": 0.011415394023060799, "std_weight": 0.011988556012511253, "weighted_loss": 0.008071289397776127 }, { "avg_delta_l": 0.0020376131869852543, "avg_loss_unweighted": 0.014789817854762077, "avg_weight": 0.5066837072372437, "std_delta_l": 0.008853393606841564, "std_weight": 0.01206368487328291, "weighted_loss": 0.007567360997200012 }, { "avg_delta_l": 0.0007788136135786772, "avg_loss_unweighted": 0.014869378879666328, "avg_weight": 0.5095392465591431, "std_delta_l": 0.010024836286902428, "std_weight": 0.016439765691757202, "weighted_loss": 0.007753415033221245 }, { "avg_delta_l": 0.0020208247005939484, "avg_loss_unweighted": 0.01510846707969904, "avg_weight": 0.5113247036933899, "std_delta_l": 0.011925255879759789, "std_weight": 0.018226809799671173, "weighted_loss": 0.007839992642402649 }, { "avg_delta_l": 0.005099042318761349, "avg_loss_unweighted": 0.014954598620533943, "avg_weight": 0.5074192881584167, "std_delta_l": 0.011454746127128601, "std_weight": 0.01193860825151205, "weighted_loss": 0.007673837710171938 }, { "avg_delta_l": 0.001564557896926999, "avg_loss_unweighted": 0.014411957003176212, "avg_weight": 0.5076483488082886, "std_delta_l": 0.009337102063000202, "std_weight": 0.013586881570518017, "weighted_loss": 0.007427618373185396 }, { "avg_delta_l": -0.0011254942510277033, "avg_loss_unweighted": 0.016180191189050674, "avg_weight": 0.5115136504173279, "std_delta_l": 0.008264558389782906, "std_weight": 0.014693262986838818, "weighted_loss": 0.00840223953127861 }, { "epoch": 17.069565217391304, "grad_norm": 0.23446771328945706, "learning_rate": 6.474808197191401e-07, "loss": 0.0166, "step": 990 }, { "avg_delta_l": 0.0031338671687990427, "avg_loss_unweighted": 0.015245018526911736, "avg_weight": 0.5079995393753052, "std_delta_l": 0.011160568334162235, "std_weight": 0.014559528790414333, "weighted_loss": 0.00791489239782095 }, { "avg_delta_l": 0.0022087846882641315, "avg_loss_unweighted": 0.015515260398387909, "avg_weight": 0.507634699344635, "std_delta_l": 0.010239722207188606, "std_weight": 0.013928264379501343, "weighted_loss": 0.007946975529193878 }, { "avg_delta_l": 0.002443511737510562, "avg_loss_unweighted": 0.018394406884908676, "avg_weight": 0.5104352235794067, "std_delta_l": 0.012524670921266079, "std_weight": 0.015294821001589298, "weighted_loss": 0.009510241448879242 }, { "avg_delta_l": 8.933965000323951e-05, "avg_loss_unweighted": 0.01592087745666504, "avg_weight": 0.5102226734161377, "std_delta_l": 0.009565340355038643, "std_weight": 0.015633966773748398, "weighted_loss": 0.008318735286593437 }, { "avg_delta_l": 0.001710766227915883, "avg_loss_unweighted": 0.01489319372922182, "avg_weight": 0.5116299986839294, "std_delta_l": 0.012708447873592377, "std_weight": 0.01918954588472843, "weighted_loss": 0.007754914462566376 }, { "avg_delta_l": 0.0012113091070204973, "avg_loss_unweighted": 0.01642575114965439, "avg_weight": 0.5071548819541931, "std_delta_l": 0.007806950248777866, "std_weight": 0.01282656379044056, "weighted_loss": 0.00842054933309555 }, { "avg_delta_l": 0.0019144490361213684, "avg_loss_unweighted": 0.01665722206234932, "avg_weight": 0.5086112022399902, "std_delta_l": 0.010347466915845871, "std_weight": 0.015388188883662224, "weighted_loss": 0.008590670302510262 }, { "avg_delta_l": 8.03027069196105e-05, "avg_loss_unweighted": 0.015251756645739079, "avg_weight": 0.5130192041397095, "std_delta_l": 0.012893946841359138, "std_weight": 0.021039944142103195, "weighted_loss": 0.007993627339601517 }, { "avg_delta_l": 0.0009201699867844582, "avg_loss_unweighted": 0.015309532172977924, "avg_weight": 0.5125681161880493, "std_delta_l": 0.011584858410060406, "std_weight": 0.017776664346456528, "weighted_loss": 0.007995382882654667 }, { "avg_delta_l": 0.0003456797858234495, "avg_loss_unweighted": 0.013712769374251366, "avg_weight": 0.5104022026062012, "std_delta_l": 0.00867936760187149, "std_weight": 0.012152375653386116, "weighted_loss": 0.007110957521945238 }, { "avg_delta_l": 0.0030736876651644707, "avg_loss_unweighted": 0.015326532535254955, "avg_weight": 0.5075958967208862, "std_delta_l": 0.01072855293750763, "std_weight": 0.012892544269561768, "weighted_loss": 0.007911236956715584 }, { "avg_delta_l": 0.002763621276244521, "avg_loss_unweighted": 0.014423102140426636, "avg_weight": 0.5072774887084961, "std_delta_l": 0.008816584944725037, "std_weight": 0.010132716968655586, "weighted_loss": 0.007382511161267757 }, { "avg_delta_l": 0.002734229201450944, "avg_loss_unweighted": 0.012495799921452999, "avg_weight": 0.5061396360397339, "std_delta_l": 0.009463726542890072, "std_weight": 0.01107771322131157, "weighted_loss": 0.006413280498236418 }, { "avg_delta_l": -9.20047314139083e-05, "avg_loss_unweighted": 0.01240855548530817, "avg_weight": 0.5082297325134277, "std_delta_l": 0.006928922608494759, "std_weight": 0.011499244719743729, "weighted_loss": 0.006369329057633877 }, { "avg_delta_l": 0.00011928696767427027, "avg_loss_unweighted": 0.016557591035962105, "avg_weight": 0.51187664270401, "std_delta_l": 0.00937384832650423, "std_weight": 0.014196733944118023, "weighted_loss": 0.008626130409538746 }, { "avg_delta_l": 0.0029912458267062902, "avg_loss_unweighted": 0.016403453424572945, "avg_weight": 0.5066582560539246, "std_delta_l": 0.009670114144682884, "std_weight": 0.012090632691979408, "weighted_loss": 0.008374673314392567 }, { "avg_delta_l": -0.0007314521935768425, "avg_loss_unweighted": 0.015350391156971455, "avg_weight": 0.5125612020492554, "std_delta_l": 0.009621703997254372, "std_weight": 0.01594782993197441, "weighted_loss": 0.00805521011352539 }, { "avg_delta_l": 0.002353802789002657, "avg_loss_unweighted": 0.01455097459256649, "avg_weight": 0.5111854076385498, "std_delta_l": 0.014103977009654045, "std_weight": 0.019408462569117546, "weighted_loss": 0.0075907898135483265 }, { "avg_delta_l": 0.001501955557614565, "avg_loss_unweighted": 0.014253847301006317, "avg_weight": 0.5091708898544312, "std_delta_l": 0.01059708558022976, "std_weight": 0.013735863380134106, "weighted_loss": 0.007360890507698059 }, { "avg_delta_l": 0.004738754592835903, "avg_loss_unweighted": 0.01312834769487381, "avg_weight": 0.5036846399307251, "std_delta_l": 0.008867628872394562, "std_weight": 0.007166985888034105, "weighted_loss": 0.006637114100158215 }, { "epoch": 17.243478260869566, "grad_norm": 0.24465519030295269, "learning_rate": 5.754121432187681e-07, "loss": 0.0156, "step": 1000 }, { "avg_delta_l": 0.001960660796612501, "avg_loss_unweighted": 0.017768455669283867, "avg_weight": 0.5123658776283264, "std_delta_l": 0.01302239578217268, "std_weight": 0.01896796189248562, "weighted_loss": 0.009245985187590122 }, { "avg_delta_l": 0.0015226793475449085, "avg_loss_unweighted": 0.01315609272569418, "avg_weight": 0.5113490223884583, "std_delta_l": 0.009245066903531551, "std_weight": 0.010364960879087448, "weighted_loss": 0.006891146767884493 }, { "avg_delta_l": 0.0018783584237098694, "avg_loss_unweighted": 0.013129712082445621, "avg_weight": 0.5129897594451904, "std_delta_l": 0.01432205643504858, "std_weight": 0.019140996038913727, "weighted_loss": 0.006956622004508972 }, { "avg_delta_l": 0.0037468071095645428, "avg_loss_unweighted": 0.016162138432264328, "avg_weight": 0.5073950290679932, "std_delta_l": 0.01048650685697794, "std_weight": 0.013156291097402573, "weighted_loss": 0.008241849951446056 }, { "avg_delta_l": 0.0052309962920844555, "avg_loss_unweighted": 0.015545646660029888, "avg_weight": 0.5078431367874146, "std_delta_l": 0.012280208058655262, "std_weight": 0.009913269430398941, "weighted_loss": 0.00797156523913145 }, { "avg_delta_l": 0.00048163143219426274, "avg_loss_unweighted": 0.015555061399936676, "avg_weight": 0.5117940902709961, "std_delta_l": 0.011223988607525826, "std_weight": 0.020734533667564392, "weighted_loss": 0.00811171717941761 }, { "avg_delta_l": -0.0008705687941983342, "avg_loss_unweighted": 0.013686040416359901, "avg_weight": 0.5126467943191528, "std_delta_l": 0.00999538041651249, "std_weight": 0.020394429564476013, "weighted_loss": 0.007176557555794716 }, { "avg_delta_l": 0.0023481466341763735, "avg_loss_unweighted": 0.013404695317149162, "avg_weight": 0.5103357434272766, "std_delta_l": 0.012374508194625378, "std_weight": 0.018003413453698158, "weighted_loss": 0.007009350229054689 }, { "avg_delta_l": -0.0007342754979617894, "avg_loss_unweighted": 0.015140201896429062, "avg_weight": 0.5155513286590576, "std_delta_l": 0.012166622094810009, "std_weight": 0.02195575460791588, "weighted_loss": 0.008000968024134636 }, { "avg_delta_l": 0.0021463464945554733, "avg_loss_unweighted": 0.0157551858574152, "avg_weight": 0.5120149850845337, "std_delta_l": 0.012434565462172031, "std_weight": 0.018541380763053894, "weighted_loss": 0.008204901590943336 }, { "avg_delta_l": 0.0020475108176469803, "avg_loss_unweighted": 0.01657501421868801, "avg_weight": 0.5131394863128662, "std_delta_l": 0.014237871393561363, "std_weight": 0.019145473837852478, "weighted_loss": 0.008628111332654953 }, { "avg_delta_l": -0.0013975797919556499, "avg_loss_unweighted": 0.01707167737185955, "avg_weight": 0.5169652700424194, "std_delta_l": 0.012868526391685009, "std_weight": 0.023755501955747604, "weighted_loss": 0.009113024920225143 }, { "avg_delta_l": 0.0012673622695729136, "avg_loss_unweighted": 0.01753847301006317, "avg_weight": 0.5138543248176575, "std_delta_l": 0.013771901838481426, "std_weight": 0.019478922709822655, "weighted_loss": 0.009234301745891571 }, { "avg_delta_l": -0.0026901764795184135, "avg_loss_unweighted": 0.018421346321702003, "avg_weight": 0.5191446542739868, "std_delta_l": 0.012388695031404495, "std_weight": 0.023509588092565536, "weighted_loss": 0.009776086546480656 }, { "avg_delta_l": 0.0007551489397883415, "avg_loss_unweighted": 0.01725277677178383, "avg_weight": 0.5128670334815979, "std_delta_l": 0.012691437266767025, "std_weight": 0.019294878467917442, "weighted_loss": 0.008991699665784836 }, { "avg_delta_l": 0.002003809902817011, "avg_loss_unweighted": 0.016646133735775948, "avg_weight": 0.5118465423583984, "std_delta_l": 0.013065054081380367, "std_weight": 0.019433844834566116, "weighted_loss": 0.008642551489174366 }, { "avg_delta_l": 0.003345032688230276, "avg_loss_unweighted": 0.012423541396856308, "avg_weight": 0.511430025100708, "std_delta_l": 0.013914323411881924, "std_weight": 0.01789742149412632, "weighted_loss": 0.0065713906660676 }, { "avg_delta_l": 0.0005795275792479515, "avg_loss_unweighted": 0.016043147072196007, "avg_weight": 0.5127406120300293, "std_delta_l": 0.012505227699875832, "std_weight": 0.01966269500553608, "weighted_loss": 0.0083400784060359 }, { "avg_delta_l": 0.002382345264777541, "avg_loss_unweighted": 0.015021143481135368, "avg_weight": 0.5122048854827881, "std_delta_l": 0.01443915069103241, "std_weight": 0.019832748919725418, "weighted_loss": 0.007855899631977081 }, { "avg_delta_l": 0.0015506613999605179, "avg_loss_unweighted": 0.01619028113782406, "avg_weight": 0.5105252265930176, "std_delta_l": 0.009515984915196896, "std_weight": 0.014898027293384075, "weighted_loss": 0.00833708606660366 }, { "epoch": 17.417391304347827, "grad_norm": 0.251277599233262, "learning_rate": 5.073497236564973e-07, "loss": 0.0163, "step": 1010 }, { "avg_delta_l": 3.5878620110452175e-05, "avg_loss_unweighted": 0.01623803749680519, "avg_weight": 0.51555997133255, "std_delta_l": 0.012711473740637302, "std_weight": 0.02025189995765686, "weighted_loss": 0.008565016090869904 }, { "avg_delta_l": 0.0008018077351152897, "avg_loss_unweighted": 0.01485157199203968, "avg_weight": 0.5125020742416382, "std_delta_l": 0.012175369076430798, "std_weight": 0.019963642582297325, "weighted_loss": 0.007729881908744574 }, { "avg_delta_l": 0.0027350089512765408, "avg_loss_unweighted": 0.014439699240028858, "avg_weight": 0.5096362233161926, "std_delta_l": 0.011792540550231934, "std_weight": 0.013380852527916431, "weighted_loss": 0.007479660678654909 }, { "avg_delta_l": 0.0007337552378885448, "avg_loss_unweighted": 0.016601068899035454, "avg_weight": 0.5155375599861145, "std_delta_l": 0.013861147686839104, "std_weight": 0.022436564788222313, "weighted_loss": 0.008740435354411602 }, { "avg_delta_l": 0.002717189257964492, "avg_loss_unweighted": 0.015076709911227226, "avg_weight": 0.5078680515289307, "std_delta_l": 0.010520236566662788, "std_weight": 0.014597050845623016, "weighted_loss": 0.007766378577798605 }, { "avg_delta_l": 0.000700397533364594, "avg_loss_unweighted": 0.015246011316776276, "avg_weight": 0.5143011808395386, "std_delta_l": 0.013880872167646885, "std_weight": 0.02314789406955242, "weighted_loss": 0.008052870631217957 }, { "avg_delta_l": 0.0010450119152665138, "avg_loss_unweighted": 0.015397639013826847, "avg_weight": 0.5116381645202637, "std_delta_l": 0.01118131447583437, "std_weight": 0.015619276091456413, "weighted_loss": 0.00796952098608017 }, { "avg_delta_l": 0.0014316581655293703, "avg_loss_unweighted": 0.014715788885951042, "avg_weight": 0.5119805932044983, "std_delta_l": 0.011642435565590858, "std_weight": 0.017854461446404457, "weighted_loss": 0.007683773525059223 }, { "avg_delta_l": -0.00012225151294842362, "avg_loss_unweighted": 0.01681351289153099, "avg_weight": 0.5138153433799744, "std_delta_l": 0.013085036538541317, "std_weight": 0.02221440151333809, "weighted_loss": 0.00891558825969696 }, { "avg_delta_l": 0.00012860549031756818, "avg_loss_unweighted": 0.0152665413916111, "avg_weight": 0.516878604888916, "std_delta_l": 0.014566843397915363, "std_weight": 0.023755770176649094, "weighted_loss": 0.008099235594272614 }, { "avg_delta_l": -0.0021167462691664696, "avg_loss_unweighted": 0.014992610551416874, "avg_weight": 0.5156332850456238, "std_delta_l": 0.011452310718595982, "std_weight": 0.021766094490885735, "weighted_loss": 0.007874242030084133 }, { "avg_delta_l": 0.0011741225607693195, "avg_loss_unweighted": 0.013584007509052753, "avg_weight": 0.5133529305458069, "std_delta_l": 0.013787928968667984, "std_weight": 0.019858820363879204, "weighted_loss": 0.00715566985309124 }, { "avg_delta_l": 0.0023016962222754955, "avg_loss_unweighted": 0.013665130361914635, "avg_weight": 0.5093733668327332, "std_delta_l": 0.010700765997171402, "std_weight": 0.015050484798848629, "weighted_loss": 0.007057138253003359 }, { "avg_delta_l": 0.001326443045400083, "avg_loss_unweighted": 0.01644848845899105, "avg_weight": 0.5133978128433228, "std_delta_l": 0.01426080334931612, "std_weight": 0.02216934598982334, "weighted_loss": 0.008595932275056839 }, { "avg_delta_l": 0.001595561159774661, "avg_loss_unweighted": 0.014394293539226055, "avg_weight": 0.5128915309906006, "std_delta_l": 0.011723470874130726, "std_weight": 0.014570691622793674, "weighted_loss": 0.007542246952652931 }, { "avg_delta_l": 0.0008690344402566552, "avg_loss_unweighted": 0.013567297719419003, "avg_weight": 0.5105563402175903, "std_delta_l": 0.009303831495344639, "std_weight": 0.015446692705154419, "weighted_loss": 0.007076675537973642 }, { "avg_delta_l": -0.0011646943166851997, "avg_loss_unweighted": 0.018106166273355484, "avg_weight": 0.5167227983474731, "std_delta_l": 0.011642269790172577, "std_weight": 0.018803803250193596, "weighted_loss": 0.009538665413856506 }, { "avg_delta_l": 0.00011969672050327063, "avg_loss_unweighted": 0.014373240061104298, "avg_weight": 0.516090989112854, "std_delta_l": 0.013190858997404575, "std_weight": 0.01910722441971302, "weighted_loss": 0.007607915438711643 }, { "avg_delta_l": -0.0001168327871710062, "avg_loss_unweighted": 0.016552597284317017, "avg_weight": 0.5184468626976013, "std_delta_l": 0.016297539696097374, "std_weight": 0.026700112968683243, "weighted_loss": 0.008818509057164192 }, { "avg_delta_l": 0.0009832045761868358, "avg_loss_unweighted": 0.016269292682409286, "avg_weight": 0.515618085861206, "std_delta_l": 0.014173283241689205, "std_weight": 0.018555667251348495, "weighted_loss": 0.008567043580114841 }, { "epoch": 17.591304347826085, "grad_norm": 0.24559719880107664, "learning_rate": 4.433551883633719e-07, "loss": 0.0161, "step": 1020 }, { "avg_delta_l": 0.0018028027843683958, "avg_loss_unweighted": 0.015698369592428207, "avg_weight": 0.5157625079154968, "std_delta_l": 0.01539900153875351, "std_weight": 0.02311747893691063, "weighted_loss": 0.008265325799584389 }, { "avg_delta_l": -0.001076226937584579, "avg_loss_unweighted": 0.017008941620588303, "avg_weight": 0.514970064163208, "std_delta_l": 0.010742910206317902, "std_weight": 0.01844925247132778, "weighted_loss": 0.008884873241186142 }, { "avg_delta_l": -0.0017969952896237373, "avg_loss_unweighted": 0.015564238652586937, "avg_weight": 0.5173996686935425, "std_delta_l": 0.010531519539654255, "std_weight": 0.020297614857554436, "weighted_loss": 0.008178873918950558 }, { "avg_delta_l": 0.0022219778038561344, "avg_loss_unweighted": 0.01473273430019617, "avg_weight": 0.5114591717720032, "std_delta_l": 0.01207448448985815, "std_weight": 0.014246728271245956, "weighted_loss": 0.007668713573366404 }, { "avg_delta_l": -0.0003582738572731614, "avg_loss_unweighted": 0.017480118200182915, "avg_weight": 0.5191684365272522, "std_delta_l": 0.01402714941650629, "std_weight": 0.02309272810816765, "weighted_loss": 0.009298987686634064 }, { "avg_delta_l": -0.0013001187471672893, "avg_loss_unweighted": 0.01656254567205906, "avg_weight": 0.5171124339103699, "std_delta_l": 0.01133247371762991, "std_weight": 0.01803124137222767, "weighted_loss": 0.008800709620118141 }, { "avg_delta_l": -0.0021401364356279373, "avg_loss_unweighted": 0.015932302922010422, "avg_weight": 0.5211947560310364, "std_delta_l": 0.015561857260763645, "std_weight": 0.030757656320929527, "weighted_loss": 0.008626454509794712 }, { "avg_delta_l": 0.00011483803973533213, "avg_loss_unweighted": 0.0169546939432621, "avg_weight": 0.5180301666259766, "std_delta_l": 0.013882300816476345, "std_weight": 0.021947382017970085, "weighted_loss": 0.009043091908097267 }, { "avg_delta_l": 0.0014887070283293724, "avg_loss_unweighted": 0.014567411504685879, "avg_weight": 0.514758288860321, "std_delta_l": 0.014502966776490211, "std_weight": 0.024627529084682465, "weighted_loss": 0.007742489222437143 }, { "avg_delta_l": 0.004461281932890415, "avg_loss_unweighted": 0.013502061367034912, "avg_weight": 0.5091148018836975, "std_delta_l": 0.013090146705508232, "std_weight": 0.013927977532148361, "weighted_loss": 0.006958117708563805 }, { "avg_delta_l": -0.0003743223496712744, "avg_loss_unweighted": 0.015380005352199078, "avg_weight": 0.5176209807395935, "std_delta_l": 0.01469884067773819, "std_weight": 0.022700855508446693, "weighted_loss": 0.008159839548170567 }, { "avg_delta_l": 0.0015880464343354106, "avg_loss_unweighted": 0.016620086506009102, "avg_weight": 0.5171169638633728, "std_delta_l": 0.01585443690419197, "std_weight": 0.02242802083492279, "weighted_loss": 0.008823116309940815 }, { "avg_delta_l": -0.0018589000683277845, "avg_loss_unweighted": 0.01735982671380043, "avg_weight": 0.5213356614112854, "std_delta_l": 0.014722649939358234, "std_weight": 0.02714504674077034, "weighted_loss": 0.009375562891364098 }, { "avg_delta_l": -0.002265757182613015, "avg_loss_unweighted": 0.019210895523428917, "avg_weight": 0.5210953950881958, "std_delta_l": 0.01468344870954752, "std_weight": 0.02602570131421089, "weighted_loss": 0.010334481485188007 }, { "avg_delta_l": -0.0015618641627952456, "avg_loss_unweighted": 0.016641588881611824, "avg_weight": 0.5192754864692688, "std_delta_l": 0.014056823216378689, "std_weight": 0.02493620663881302, "weighted_loss": 0.008918436244130135 }, { "avg_delta_l": -0.0016369866207242012, "avg_loss_unweighted": 0.018117910251021385, "avg_weight": 0.5204439163208008, "std_delta_l": 0.013914680108428001, "std_weight": 0.02619227021932602, "weighted_loss": 0.009689254686236382 }, { "avg_delta_l": -0.0023618668783456087, "avg_loss_unweighted": 0.016443736851215363, "avg_weight": 0.5213178992271423, "std_delta_l": 0.014566109515726566, "std_weight": 0.02590291202068329, "weighted_loss": 0.008879565633833408 }, { "avg_delta_l": 0.00046290879254229367, "avg_loss_unweighted": 0.014617775566875935, "avg_weight": 0.5151819586753845, "std_delta_l": 0.01344766654074192, "std_weight": 0.020613474771380424, "weighted_loss": 0.007736474275588989 }, { "avg_delta_l": -0.0008463846752420068, "avg_loss_unweighted": 0.016861548647284508, "avg_weight": 0.5166494250297546, "std_delta_l": 0.012804955244064331, "std_weight": 0.02154608443379402, "weighted_loss": 0.008918365463614464 }, { "avg_delta_l": -0.0002659243473317474, "avg_loss_unweighted": 0.016630368307232857, "avg_weight": 0.5159698128700256, "std_delta_l": 0.012251954525709152, "std_weight": 0.020154645666480064, "weighted_loss": 0.008756497874855995 }, { "epoch": 17.765217391304347, "grad_norm": 0.2823503664794058, "learning_rate": 3.8348648139188206e-07, "loss": 0.0173, "step": 1030 }, { "avg_delta_l": -0.0023547224700450897, "avg_loss_unweighted": 0.016423489898443222, "avg_weight": 0.5202850699424744, "std_delta_l": 0.012003512121737003, "std_weight": 0.021748363971710205, "weighted_loss": 0.008735000155866146 }, { "avg_delta_l": -0.0006551106343977153, "avg_loss_unweighted": 0.017564846202731133, "avg_weight": 0.5188287496566772, "std_delta_l": 0.01461301650851965, "std_weight": 0.02432899922132492, "weighted_loss": 0.009310286492109299 }, { "avg_delta_l": 0.0005276785814203322, "avg_loss_unweighted": 0.015060633420944214, "avg_weight": 0.5192481279373169, "std_delta_l": 0.016077542677521706, "std_weight": 0.028178509324789047, "weighted_loss": 0.008100251667201519 }, { "avg_delta_l": 5.3944793762639165e-05, "avg_loss_unweighted": 0.017544854432344437, "avg_weight": 0.5135877132415771, "std_delta_l": 0.009736305102705956, "std_weight": 0.014596723020076752, "weighted_loss": 0.009225751273334026 }, { "avg_delta_l": 0.0006314214551821351, "avg_loss_unweighted": 0.015955490991473198, "avg_weight": 0.5192503333091736, "std_delta_l": 0.016070056706666946, "std_weight": 0.02648266963660717, "weighted_loss": 0.008564702235162258 }, { "avg_delta_l": 0.0010196918155997992, "avg_loss_unweighted": 0.01627279259264469, "avg_weight": 0.5174038410186768, "std_delta_l": 0.01462831161916256, "std_weight": 0.02132708951830864, "weighted_loss": 0.008611850440502167 }, { "avg_delta_l": -0.0030147917568683624, "avg_loss_unweighted": 0.019333261996507645, "avg_weight": 0.5214747786521912, "std_delta_l": 0.013868698850274086, "std_weight": 0.026545273140072823, "weighted_loss": 0.01036995742470026 }, { "avg_delta_l": 0.0009574211435392499, "avg_loss_unweighted": 0.016370810568332672, "avg_weight": 0.5159419775009155, "std_delta_l": 0.014679977670311928, "std_weight": 0.02107958309352398, "weighted_loss": 0.00873809028416872 }, { "avg_delta_l": -0.004427929874509573, "avg_loss_unweighted": 0.017356859520077705, "avg_weight": 0.5236957669258118, "std_delta_l": 0.014620410278439522, "std_weight": 0.0298798605799675, "weighted_loss": 0.00939987227320671 }, { "avg_delta_l": 0.000940781319513917, "avg_loss_unweighted": 0.014519688673317432, "avg_weight": 0.5196554064750671, "std_delta_l": 0.016617070883512497, "std_weight": 0.026380568742752075, "weighted_loss": 0.007868653163313866 }, { "avg_delta_l": 0.0003222968662157655, "avg_loss_unweighted": 0.014688512310385704, "avg_weight": 0.5181907415390015, "std_delta_l": 0.015187997370958328, "std_weight": 0.024210233241319656, "weighted_loss": 0.00784890167415142 }, { "avg_delta_l": 0.0029148724861443043, "avg_loss_unweighted": 0.01177668571472168, "avg_weight": 0.5115137100219727, "std_delta_l": 0.011719350703060627, "std_weight": 0.015275415033102036, "weighted_loss": 0.006164586171507835 }, { "avg_delta_l": 0.0017747014062479138, "avg_loss_unweighted": 0.014999360777437687, "avg_weight": 0.5156161189079285, "std_delta_l": 0.013894526287913322, "std_weight": 0.021170388907194138, "weighted_loss": 0.007923849858343601 }, { "avg_delta_l": -8.344629895873368e-05, "avg_loss_unweighted": 0.01690758764743805, "avg_weight": 0.5187017917633057, "std_delta_l": 0.01452938187867403, "std_weight": 0.021100152283906937, "weighted_loss": 0.00892590545117855 }, { "avg_delta_l": 0.0012351591140031815, "avg_loss_unweighted": 0.014026844874024391, "avg_weight": 0.5158743858337402, "std_delta_l": 0.013990010134875774, "std_weight": 0.020151976495981216, "weighted_loss": 0.007429240737110376 }, { "avg_delta_l": -0.006134417373687029, "avg_loss_unweighted": 0.02051285095512867, "avg_weight": 0.5294241309165955, "std_delta_l": 0.015546401962637901, "std_weight": 0.03363867104053497, "weighted_loss": 0.011281631886959076 }, { "avg_delta_l": -0.0004925359971821308, "avg_loss_unweighted": 0.01693020388484001, "avg_weight": 0.5215296745300293, "std_delta_l": 0.01732204481959343, "std_weight": 0.027034396305680275, "weighted_loss": 0.009073707275092602 }, { "avg_delta_l": 0.0004297267587389797, "avg_loss_unweighted": 0.014139216393232346, "avg_weight": 0.5178548097610474, "std_delta_l": 0.015012112446129322, "std_weight": 0.02373966947197914, "weighted_loss": 0.007612126879394054 }, { "avg_delta_l": -0.0010080402716994286, "avg_loss_unweighted": 0.018990667536854744, "avg_weight": 0.5215912461280823, "std_delta_l": 0.014526378363370895, "std_weight": 0.025955453515052795, "weighted_loss": 0.01018291525542736 }, { "avg_delta_l": -0.0022650696337223053, "avg_loss_unweighted": 0.019894424825906754, "avg_weight": 0.523745596408844, "std_delta_l": 0.015381700359284878, "std_weight": 0.025477463379502296, "weighted_loss": 0.01070443820208311 }, { "epoch": 17.93913043478261, "grad_norm": 0.27355041469007135, "learning_rate": 3.277978110503377e-07, "loss": 0.0176, "step": 1040 }, { "avg_delta_l": 0.0031363810412585735, "avg_loss_unweighted": 0.014265662990510464, "avg_weight": 0.513776421546936, "std_delta_l": 0.013912664726376534, "std_weight": 0.020336776971817017, "weighted_loss": 0.0075138891115784645 }, { "avg_delta_l": -0.0002786502009257674, "avg_loss_unweighted": 0.01571456529200077, "avg_weight": 0.5208030939102173, "std_delta_l": 0.01615701988339424, "std_weight": 0.026958569884300232, "weighted_loss": 0.008436590433120728 }, { "avg_delta_l": -0.0019248307216912508, "avg_loss_unweighted": 0.017215216532349586, "avg_weight": 0.522868275642395, "std_delta_l": 0.01581469364464283, "std_weight": 0.025543611496686935, "weighted_loss": 0.009197848848998547 }, { "avg_delta_l": -0.006240916904062033, "avg_loss_unweighted": 0.01929318532347679, "avg_weight": 0.5275587439537048, "std_delta_l": 0.011811569333076477, "std_weight": 0.025883018970489502, "weighted_loss": 0.01042972318828106 }, { "avg_delta_l": 0.0011040015378966928, "avg_loss_unweighted": 0.014793523587286472, "avg_weight": 0.5196369290351868, "std_delta_l": 0.01503632590174675, "std_weight": 0.020832648500800133, "weighted_loss": 0.007899531163275242 }, { "avg_delta_l": 0.0015197007451206446, "avg_loss_unweighted": 0.015164517797529697, "avg_weight": 0.5164334774017334, "std_delta_l": 0.015097212046384811, "std_weight": 0.02477359212934971, "weighted_loss": 0.008078786544501781 }, { "avg_delta_l": 0.002982612932100892, "avg_loss_unweighted": 0.013994576409459114, "avg_weight": 0.514598548412323, "std_delta_l": 0.014857245609164238, "std_weight": 0.019653374329209328, "weighted_loss": 0.007411165162920952 }, { "avg_delta_l": 0.002060337457805872, "avg_loss_unweighted": 0.014144554734230042, "avg_weight": 0.5048654079437256, "std_delta_l": 0.008026638068258762, "std_weight": 0.0078093004412949085, "weighted_loss": 0.007190346252173185 }, { "avg_delta_l": 0.001071087783202529, "avg_loss_unweighted": 0.013558683916926384, "avg_weight": 0.5067232847213745, "std_delta_l": 0.008924809284508228, "std_weight": 0.012252544984221458, "weighted_loss": 0.006939546205103397 }, { "avg_delta_l": -2.374392352066934e-05, "avg_loss_unweighted": 0.016009889543056488, "avg_weight": 0.5096990466117859, "std_delta_l": 0.00982622243463993, "std_weight": 0.016653141006827354, "weighted_loss": 0.008300269022583961 }, { "avg_delta_l": -0.0017434777691960335, "avg_loss_unweighted": 0.016324378550052643, "avg_weight": 0.5119341611862183, "std_delta_l": 0.008341819047927856, "std_weight": 0.015917230397462845, "weighted_loss": 0.00848699826747179 }, { "avg_delta_l": 0.0002496616216376424, "avg_loss_unweighted": 0.014062582515180111, "avg_weight": 0.5086707472801208, "std_delta_l": 0.009623789228498936, "std_weight": 0.01544984895735979, "weighted_loss": 0.007279715035110712 }, { "avg_delta_l": 0.0011676813010126352, "avg_loss_unweighted": 0.01606217958033085, "avg_weight": 0.5135651230812073, "std_delta_l": 0.013607212342321873, "std_weight": 0.022390229627490044, "weighted_loss": 0.008488522842526436 }, { "avg_delta_l": 0.0022827896755188704, "avg_loss_unweighted": 0.013436880894005299, "avg_weight": 0.5064849853515625, "std_delta_l": 0.00958067923784256, "std_weight": 0.01263509877026081, "weighted_loss": 0.006887170020490885 }, { "avg_delta_l": 0.0014734062133356929, "avg_loss_unweighted": 0.014927305281162262, "avg_weight": 0.5077676773071289, "std_delta_l": 0.008460355922579765, "std_weight": 0.011155102401971817, "weighted_loss": 0.0076510170474648476 }, { "avg_delta_l": 0.0018740276573225856, "avg_loss_unweighted": 0.01540521252900362, "avg_weight": 0.5087313055992126, "std_delta_l": 0.010582396760582924, "std_weight": 0.01406131498515606, "weighted_loss": 0.007923110388219357 }, { "avg_delta_l": 0.00036973506212234497, "avg_loss_unweighted": 0.013144079595804214, "avg_weight": 0.5109426975250244, "std_delta_l": 0.01058177649974823, "std_weight": 0.017063964158296585, "weighted_loss": 0.0068518901243805885 }, { "avg_delta_l": 0.0016460188198834658, "avg_loss_unweighted": 0.015884464606642723, "avg_weight": 0.5092266798019409, "std_delta_l": 0.011446302756667137, "std_weight": 0.014924189075827599, "weighted_loss": 0.008197066374123096 }, { "avg_delta_l": 0.00014906741853337735, "avg_loss_unweighted": 0.014126339927315712, "avg_weight": 0.5119156837463379, "std_delta_l": 0.011276690289378166, "std_weight": 0.016354523599147797, "weighted_loss": 0.007321632467210293 }, { "epoch": 18.104347826086958, "grad_norm": 0.24495616008922252, "learning_rate": 2.763396008197833e-07, "loss": 0.015, "step": 1050 }, { "avg_delta_l": 0.0009295610943809152, "avg_loss_unweighted": 0.016897886991500854, "avg_weight": 0.5103798508644104, "std_delta_l": 0.011636217124760151, "std_weight": 0.017649421468377113, "weighted_loss": 0.008731139823794365 }, { "avg_delta_l": -0.0005610486841760576, "avg_loss_unweighted": 0.014441930688917637, "avg_weight": 0.5105249881744385, "std_delta_l": 0.00955281499773264, "std_weight": 0.0162650216370821, "weighted_loss": 0.007484396919608116 }, { "avg_delta_l": 7.724478200543672e-05, "avg_loss_unweighted": 0.014709324575960636, "avg_weight": 0.5109835863113403, "std_delta_l": 0.009020605124533176, "std_weight": 0.012299158610403538, "weighted_loss": 0.007647381164133549 }, { "avg_delta_l": 0.0010550797451287508, "avg_loss_unweighted": 0.0142636364325881, "avg_weight": 0.5095971822738647, "std_delta_l": 0.009934733621776104, "std_weight": 0.012389886192977428, "weighted_loss": 0.007391403894871473 }, { "avg_delta_l": -1.6986392438411713e-05, "avg_loss_unweighted": 0.01660241186618805, "avg_weight": 0.5153846740722656, "std_delta_l": 0.013639121316373348, "std_weight": 0.02150493487715721, "weighted_loss": 0.008731711655855179 }, { "avg_delta_l": -0.0002573896199464798, "avg_loss_unweighted": 0.014447253197431564, "avg_weight": 0.5134151577949524, "std_delta_l": 0.01254977099597454, "std_weight": 0.02144971489906311, "weighted_loss": 0.007632027380168438 }, { "avg_delta_l": 5.408006836660206e-05, "avg_loss_unweighted": 0.015313755720853806, "avg_weight": 0.5125942230224609, "std_delta_l": 0.011951147578656673, "std_weight": 0.0202559232711792, "weighted_loss": 0.008033832535147667 }, { "avg_delta_l": 0.004026384558528662, "avg_loss_unweighted": 0.012657418847084045, "avg_weight": 0.50710129737854, "std_delta_l": 0.011564134620130062, "std_weight": 0.01159397792071104, "weighted_loss": 0.006498037837445736 }, { "avg_delta_l": 0.0006382983410730958, "avg_loss_unweighted": 0.015397630631923676, "avg_weight": 0.5094448328018188, "std_delta_l": 0.009740234352648258, "std_weight": 0.01547941379249096, "weighted_loss": 0.008029560558497906 }, { "avg_delta_l": 0.0014990880154073238, "avg_loss_unweighted": 0.015229087322950363, "avg_weight": 0.5087113380432129, "std_delta_l": 0.010117837227880955, "std_weight": 0.015904687345027924, "weighted_loss": 0.007852882146835327 }, { "avg_delta_l": 0.0024432879872620106, "avg_loss_unweighted": 0.016263756901025772, "avg_weight": 0.5070504546165466, "std_delta_l": 0.009449257515370846, "std_weight": 0.012352537363767624, "weighted_loss": 0.008305875584483147 }, { "avg_delta_l": 0.0012038564309477806, "avg_loss_unweighted": 0.016879459843039513, "avg_weight": 0.5119020938873291, "std_delta_l": 0.012482939288020134, "std_weight": 0.017853286117315292, "weighted_loss": 0.008722010999917984 }, { "avg_delta_l": 0.0024504410102963448, "avg_loss_unweighted": 0.013062446378171444, "avg_weight": 0.509273111820221, "std_delta_l": 0.01148068904876709, "std_weight": 0.015788214281201363, "weighted_loss": 0.0067550488747656345 }, { "avg_delta_l": 0.001170838950201869, "avg_loss_unweighted": 0.014188612811267376, "avg_weight": 0.5047614574432373, "std_delta_l": 0.006159683223813772, "std_weight": 0.008289272896945477, "weighted_loss": 0.007181954570114613 }, { "avg_delta_l": -0.0009347581071779132, "avg_loss_unweighted": 0.014698350802063942, "avg_weight": 0.5126971006393433, "std_delta_l": 0.010384139604866505, "std_weight": 0.018986713141202927, "weighted_loss": 0.007666630204766989 }, { "avg_delta_l": 0.0018109853845089674, "avg_loss_unweighted": 0.015774408355355263, "avg_weight": 0.5084558725357056, "std_delta_l": 0.010829797945916653, "std_weight": 0.013295642100274563, "weighted_loss": 0.008113786578178406 }, { "avg_delta_l": 0.00014483495033346117, "avg_loss_unweighted": 0.01454873662441969, "avg_weight": 0.5132032632827759, "std_delta_l": 0.011467824690043926, "std_weight": 0.01668083667755127, "weighted_loss": 0.007610586937516928 }, { "avg_delta_l": 0.001867835409939289, "avg_loss_unweighted": 0.013698779046535492, "avg_weight": 0.5078129768371582, "std_delta_l": 0.009363537654280663, "std_weight": 0.013184481300413609, "weighted_loss": 0.00701707461848855 }, { "avg_delta_l": 0.001051545375958085, "avg_loss_unweighted": 0.012613697908818722, "avg_weight": 0.5097012519836426, "std_delta_l": 0.010460044257342815, "std_weight": 0.012900019064545631, "weighted_loss": 0.006578175351023674 }, { "avg_delta_l": 0.002992139896377921, "avg_loss_unweighted": 0.012471472844481468, "avg_weight": 0.5064610242843628, "std_delta_l": 0.009845957159996033, "std_weight": 0.009195067919790745, "weighted_loss": 0.006374905817210674 }, { "epoch": 18.278260869565216, "grad_norm": 0.22427444063899146, "learning_rate": 2.2915844369790164e-07, "loss": 0.0152, "step": 1060 }, { "avg_delta_l": 0.0006942623294889927, "avg_loss_unweighted": 0.016232706606388092, "avg_weight": 0.5094860792160034, "std_delta_l": 0.0093489745631814, "std_weight": 0.014620737172663212, "weighted_loss": 0.008338815532624722 }, { "avg_delta_l": -0.000717607734259218, "avg_loss_unweighted": 0.017437800765037537, "avg_weight": 0.5151843428611755, "std_delta_l": 0.012104110792279243, "std_weight": 0.01887352205812931, "weighted_loss": 0.009053242392838001 }, { "avg_delta_l": 0.0013516130857169628, "avg_loss_unweighted": 0.012645382434129715, "avg_weight": 0.5111120939254761, "std_delta_l": 0.0114197488874197, "std_weight": 0.01776498556137085, "weighted_loss": 0.006684357766062021 }, { "avg_delta_l": -0.0010135063203051686, "avg_loss_unweighted": 0.01734241470694542, "avg_weight": 0.515230655670166, "std_delta_l": 0.012823461554944515, "std_weight": 0.02298627607524395, "weighted_loss": 0.009100226685404778 }, { "avg_delta_l": -0.0008862719405442476, "avg_loss_unweighted": 0.015640178695321083, "avg_weight": 0.5128586888313293, "std_delta_l": 0.010549289174377918, "std_weight": 0.019084805622696877, "weighted_loss": 0.008155731484293938 }, { "avg_delta_l": 0.0018124936614185572, "avg_loss_unweighted": 0.016470978036522865, "avg_weight": 0.5122114419937134, "std_delta_l": 0.012301760725677013, "std_weight": 0.017587749287486076, "weighted_loss": 0.008559796959161758 }, { "avg_delta_l": 0.0007920074858702719, "avg_loss_unweighted": 0.016258297488093376, "avg_weight": 0.5081579089164734, "std_delta_l": 0.009131771512329578, "std_weight": 0.014483541250228882, "weighted_loss": 0.008313127793371677 }, { "avg_delta_l": 0.002955096773803234, "avg_loss_unweighted": 0.01462809182703495, "avg_weight": 0.5076515078544617, "std_delta_l": 0.010520584881305695, "std_weight": 0.012868047691881657, "weighted_loss": 0.00753331882879138 }, { "avg_delta_l": -0.0013802641769871116, "avg_loss_unweighted": 0.017380548641085625, "avg_weight": 0.5163125395774841, "std_delta_l": 0.012365548871457577, "std_weight": 0.023097822442650795, "weighted_loss": 0.009125360287725925 }, { "avg_delta_l": 0.0016627674922347069, "avg_loss_unweighted": 0.015452851541340351, "avg_weight": 0.509880542755127, "std_delta_l": 0.010076512582600117, "std_weight": 0.016103966161608696, "weighted_loss": 0.007977667264640331 }, { "avg_delta_l": 1.4780263882130384e-05, "avg_loss_unweighted": 0.013353286311030388, "avg_weight": 0.5130598545074463, "std_delta_l": 0.011772679165005684, "std_weight": 0.021239949390292168, "weighted_loss": 0.006986347958445549 }, { "avg_delta_l": 0.0025571102742105722, "avg_loss_unweighted": 0.012085476890206337, "avg_weight": 0.5052714347839355, "std_delta_l": 0.00865483470261097, "std_weight": 0.009766900911927223, "weighted_loss": 0.00616228673607111 }, { "avg_delta_l": 0.0012450384674593806, "avg_loss_unweighted": 0.017138205468654633, "avg_weight": 0.510977029800415, "std_delta_l": 0.010635333135724068, "std_weight": 0.014679983258247375, "weighted_loss": 0.008845621719956398 }, { "avg_delta_l": 0.0012391717173159122, "avg_loss_unweighted": 0.014258362352848053, "avg_weight": 0.5100011229515076, "std_delta_l": 0.011427521705627441, "std_weight": 0.017798544839024544, "weighted_loss": 0.007474527228623629 }, { "avg_delta_l": 0.0019728392362594604, "avg_loss_unweighted": 0.013580931350588799, "avg_weight": 0.509482741355896, "std_delta_l": 0.010325281880795956, "std_weight": 0.014379412867128849, "weighted_loss": 0.007031605578958988 }, { "avg_delta_l": -0.0024675873573869467, "avg_loss_unweighted": 0.017573382705450058, "avg_weight": 0.5185451507568359, "std_delta_l": 0.011861204169690609, "std_weight": 0.019228147342801094, "weighted_loss": 0.00930414441972971 }, { "avg_delta_l": 0.001022255513817072, "avg_loss_unweighted": 0.013889546506106853, "avg_weight": 0.5149478316307068, "std_delta_l": 0.014321896247565746, "std_weight": 0.023610634729266167, "weighted_loss": 0.007299542892724276 }, { "avg_delta_l": 2.5173358153551817e-05, "avg_loss_unweighted": 0.016652749851346016, "avg_weight": 0.5137556791305542, "std_delta_l": 0.012188645079731941, "std_weight": 0.020343592390418053, "weighted_loss": 0.008683757856488228 }, { "avg_delta_l": -0.0017343999352306128, "avg_loss_unweighted": 0.015132879838347435, "avg_weight": 0.5141091346740723, "std_delta_l": 0.009635457769036293, "std_weight": 0.018409647047519684, "weighted_loss": 0.007953837513923645 }, { "avg_delta_l": -4.950528818881139e-05, "avg_loss_unweighted": 0.015748098492622375, "avg_weight": 0.5145960450172424, "std_delta_l": 0.011623580008745193, "std_weight": 0.018862782046198845, "weighted_loss": 0.00828518159687519 }, { "epoch": 18.452173913043477, "grad_norm": 0.23772468593226323, "learning_rate": 1.8629706001123681e-07, "loss": 0.0161, "step": 1070 }, { "avg_delta_l": 0.0033047841861844063, "avg_loss_unweighted": 0.012777211144566536, "avg_weight": 0.5073573589324951, "std_delta_l": 0.00965303834527731, "std_weight": 0.011851461604237556, "weighted_loss": 0.006542637944221497 }, { "avg_delta_l": 0.0032330304384231567, "avg_loss_unweighted": 0.012144310399889946, "avg_weight": 0.5091387629508972, "std_delta_l": 0.012065735645592213, "std_weight": 0.013352536596357822, "weighted_loss": 0.006281597539782524 }, { "avg_delta_l": -8.627670467831194e-05, "avg_loss_unweighted": 0.016020510345697403, "avg_weight": 0.5149744749069214, "std_delta_l": 0.013041933998465538, "std_weight": 0.021111417561769485, "weighted_loss": 0.008470059372484684 }, { "avg_delta_l": 3.584899241104722e-05, "avg_loss_unweighted": 0.016334956511855125, "avg_weight": 0.5149400234222412, "std_delta_l": 0.013509980402886868, "std_weight": 0.021958002820611, "weighted_loss": 0.008584663271903992 }, { "avg_delta_l": 0.00018031627405434847, "avg_loss_unweighted": 0.015743259340524673, "avg_weight": 0.5117356777191162, "std_delta_l": 0.010779843665659428, "std_weight": 0.018098410218954086, "weighted_loss": 0.008248668164014816 }, { "avg_delta_l": 0.0033526804763823748, "avg_loss_unweighted": 0.01397259533405304, "avg_weight": 0.5115171074867249, "std_delta_l": 0.014245684258639812, "std_weight": 0.018325509503483772, "weighted_loss": 0.007312327157706022 }, { "avg_delta_l": 0.0035748761147260666, "avg_loss_unweighted": 0.012878191657364368, "avg_weight": 0.5102379322052002, "std_delta_l": 0.013972761109471321, "std_weight": 0.015091993845999241, "weighted_loss": 0.006696943659335375 }, { "avg_delta_l": 0.0028933605644851923, "avg_loss_unweighted": 0.013203385286033154, "avg_weight": 0.5109050273895264, "std_delta_l": 0.012651616707444191, "std_weight": 0.017998075112700462, "weighted_loss": 0.0068906452506780624 }, { "avg_delta_l": -0.001927488367073238, "avg_loss_unweighted": 0.015531896613538265, "avg_weight": 0.5185560584068298, "std_delta_l": 0.014271032065153122, "std_weight": 0.026632778346538544, "weighted_loss": 0.008368932642042637 }, { "avg_delta_l": -0.0010310369543731213, "avg_loss_unweighted": 0.01589045859873295, "avg_weight": 0.5167478919029236, "std_delta_l": 0.012733394280076027, "std_weight": 0.02197844907641411, "weighted_loss": 0.00844342540949583 }, { "avg_delta_l": -0.0017333481227979064, "avg_loss_unweighted": 0.012559306807816029, "avg_weight": 0.5136104226112366, "std_delta_l": 0.009636606089770794, "std_weight": 0.017841629683971405, "weighted_loss": 0.006611517630517483 }, { "avg_delta_l": 0.0005068214377388358, "avg_loss_unweighted": 0.013945110142230988, "avg_weight": 0.5129203796386719, "std_delta_l": 0.01169788371771574, "std_weight": 0.017868714407086372, "weighted_loss": 0.007302448153495789 }, { "avg_delta_l": -0.0004286860057618469, "avg_loss_unweighted": 0.013370751403272152, "avg_weight": 0.5157468318939209, "std_delta_l": 0.014506043866276741, "std_weight": 0.023149212822318077, "weighted_loss": 0.007098287343978882 }, { "avg_delta_l": 0.00023147242609411478, "avg_loss_unweighted": 0.016922123730182648, "avg_weight": 0.5123138427734375, "std_delta_l": 0.011355044320225716, "std_weight": 0.01863437332212925, "weighted_loss": 0.008828158490359783 }, { "avg_delta_l": 0.00295649073086679, "avg_loss_unweighted": 0.011190874502062798, "avg_weight": 0.5092976093292236, "std_delta_l": 0.011152287945151329, "std_weight": 0.012288440018892288, "weighted_loss": 0.005786965135484934 }, { "avg_delta_l": 0.0009625360835343599, "avg_loss_unweighted": 0.013855799101293087, "avg_weight": 0.5129815936088562, "std_delta_l": 0.013403925113379955, "std_weight": 0.022034242749214172, "weighted_loss": 0.007285906467586756 }, { "avg_delta_l": -0.0015565331559628248, "avg_loss_unweighted": 0.01380727719515562, "avg_weight": 0.5160110592842102, "std_delta_l": 0.009774542413651943, "std_weight": 0.016567520797252655, "weighted_loss": 0.0073046330362558365 }, { "avg_delta_l": -0.0016791194211691618, "avg_loss_unweighted": 0.01606765016913414, "avg_weight": 0.5179156064987183, "std_delta_l": 0.013087166473269463, "std_weight": 0.02407591976225376, "weighted_loss": 0.008528065867722034 }, { "avg_delta_l": 0.0003739715612027794, "avg_loss_unweighted": 0.01438182219862938, "avg_weight": 0.5121127963066101, "std_delta_l": 0.010357345454394817, "std_weight": 0.0149428965523839, "weighted_loss": 0.007492925506085157 }, { "avg_delta_l": -0.003370590042322874, "avg_loss_unweighted": 0.017398938536643982, "avg_weight": 0.5192188620567322, "std_delta_l": 0.01135186105966568, "std_weight": 0.023994503542780876, "weighted_loss": 0.009206258691847324 }, { "epoch": 18.62608695652174, "grad_norm": 0.2414071497607875, "learning_rate": 1.477942587339426e-07, "loss": 0.0151, "step": 1080 }, { "avg_delta_l": -0.001587033737450838, "avg_loss_unweighted": 0.01568078249692917, "avg_weight": 0.5180652141571045, "std_delta_l": 0.011957734823226929, "std_weight": 0.02048603445291519, "weighted_loss": 0.008280194364488125 }, { "avg_delta_l": -0.00015913817333057523, "avg_loss_unweighted": 0.01813446544110775, "avg_weight": 0.5148199200630188, "std_delta_l": 0.011829286813735962, "std_weight": 0.020439233630895615, "weighted_loss": 0.009476177394390106 }, { "avg_delta_l": 0.0008448096341453493, "avg_loss_unweighted": 0.01646103523671627, "avg_weight": 0.5109447240829468, "std_delta_l": 0.010588368400931358, "std_weight": 0.015775933861732483, "weighted_loss": 0.00851435586810112 }, { "avg_delta_l": 0.0032317396253347397, "avg_loss_unweighted": 0.012791397050023079, "avg_weight": 0.5122028589248657, "std_delta_l": 0.014125416986644268, "std_weight": 0.017372824251651764, "weighted_loss": 0.006693833041936159 }, { "avg_delta_l": 0.0015051080845296383, "avg_loss_unweighted": 0.013446499593555927, "avg_weight": 0.5144680738449097, "std_delta_l": 0.013690207153558731, "std_weight": 0.01966666989028454, "weighted_loss": 0.007110138423740864 }, { "avg_delta_l": -0.0007920673815533519, "avg_loss_unweighted": 0.014364880509674549, "avg_weight": 0.5157251954078674, "std_delta_l": 0.011844273656606674, "std_weight": 0.022389279678463936, "weighted_loss": 0.007618413306772709 }, { "avg_delta_l": -0.0002729489642661065, "avg_loss_unweighted": 0.017654022201895714, "avg_weight": 0.5148080587387085, "std_delta_l": 0.012277208268642426, "std_weight": 0.01808960922062397, "weighted_loss": 0.00929853692650795 }, { "avg_delta_l": 0.0020638194400817156, "avg_loss_unweighted": 0.014690372161567211, "avg_weight": 0.5159131288528442, "std_delta_l": 0.015375788323581219, "std_weight": 0.024587808176875114, "weighted_loss": 0.0077956439927220345 }, { "avg_delta_l": -0.00019582119421102107, "avg_loss_unweighted": 0.013854731805622578, "avg_weight": 0.5169286131858826, "std_delta_l": 0.013956747949123383, "std_weight": 0.025322265923023224, "weighted_loss": 0.007409235928207636 }, { "avg_delta_l": -0.0005123266018927097, "avg_loss_unweighted": 0.01529537420719862, "avg_weight": 0.5151970386505127, "std_delta_l": 0.011853407137095928, "std_weight": 0.021173061802983284, "weighted_loss": 0.00803434383124113 }, { "avg_delta_l": 0.001675385981798172, "avg_loss_unweighted": 0.014052401296794415, "avg_weight": 0.5137647986412048, "std_delta_l": 0.014502519741654396, "std_weight": 0.021424271166324615, "weighted_loss": 0.0074665057472884655 }, { "avg_delta_l": -0.002092079259455204, "avg_loss_unweighted": 0.017186187207698822, "avg_weight": 0.5221956968307495, "std_delta_l": 0.013548382557928562, "std_weight": 0.024155253544449806, "weighted_loss": 0.0092367734760046 }, { "avg_delta_l": 0.0002229445381090045, "avg_loss_unweighted": 0.013611756265163422, "avg_weight": 0.514375627040863, "std_delta_l": 0.013122104108333588, "std_weight": 0.022834090515971184, "weighted_loss": 0.007177762687206268 }, { "avg_delta_l": -0.0011623739264905453, "avg_loss_unweighted": 0.016033707186579704, "avg_weight": 0.5166860818862915, "std_delta_l": 0.011537513695657253, "std_weight": 0.020529041066765785, "weighted_loss": 0.008434608578681946 }, { "avg_delta_l": 0.0036836001090705395, "avg_loss_unweighted": 0.013476434163749218, "avg_weight": 0.5123331546783447, "std_delta_l": 0.014854814857244492, "std_weight": 0.019770005717873573, "weighted_loss": 0.00705461110919714 }, { "avg_delta_l": -5.1984417950734496e-05, "avg_loss_unweighted": 0.014720781706273556, "avg_weight": 0.5142053365707397, "std_delta_l": 0.012571732513606548, "std_weight": 0.022582409903407097, "weighted_loss": 0.00774375069886446 }, { "avg_delta_l": -0.0014716945588588715, "avg_loss_unweighted": 0.016292424872517586, "avg_weight": 0.5217352509498596, "std_delta_l": 0.01663711480796337, "std_weight": 0.028996115550398827, "weighted_loss": 0.008809003978967667 }, { "avg_delta_l": 0.0009869082132354379, "avg_loss_unweighted": 0.011228164657950401, "avg_weight": 0.5150707364082336, "std_delta_l": 0.013660667464137077, "std_weight": 0.021385271102190018, "weighted_loss": 0.005958939902484417 }, { "avg_delta_l": 0.002819251734763384, "avg_loss_unweighted": 0.012781920842826366, "avg_weight": 0.5121313333511353, "std_delta_l": 0.013278094120323658, "std_weight": 0.017088757827878, "weighted_loss": 0.006677420809864998 }, { "avg_delta_l": -0.0010761914309114218, "avg_loss_unweighted": 0.016245989128947258, "avg_weight": 0.5153980255126953, "std_delta_l": 0.010955380275845528, "std_weight": 0.018515024334192276, "weighted_loss": 0.008508953265845776 }, { "epoch": 18.8, "grad_norm": 0.2334958530832664, "learning_rate": 1.1368490234807295e-07, "loss": 0.0157, "step": 1090 }, { "avg_delta_l": -0.0005794519674964249, "avg_loss_unweighted": 0.014469701796770096, "avg_weight": 0.51772141456604, "std_delta_l": 0.013306200504302979, "std_weight": 0.022311309352517128, "weighted_loss": 0.0076983291655778885 }, { "avg_delta_l": 0.00019052514107897878, "avg_loss_unweighted": 0.01627359166741371, "avg_weight": 0.5181920528411865, "std_delta_l": 0.013832854107022285, "std_weight": 0.021500658243894577, "weighted_loss": 0.008631126955151558 }, { "avg_delta_l": -0.002713349647819996, "avg_loss_unweighted": 0.014828886836767197, "avg_weight": 0.5202436447143555, "std_delta_l": 0.012498236261308193, "std_weight": 0.02392926812171936, "weighted_loss": 0.007938843220472336 }, { "avg_delta_l": 0.0009244601824320853, "avg_loss_unweighted": 0.014390263706445694, "avg_weight": 0.5162597894668579, "std_delta_l": 0.01360766589641571, "std_weight": 0.021629782393574715, "weighted_loss": 0.007649117149412632 }, { "avg_delta_l": 0.0004268447810318321, "avg_loss_unweighted": 0.015530159696936607, "avg_weight": 0.5164521932601929, "std_delta_l": 0.013908924534916878, "std_weight": 0.020562179386615753, "weighted_loss": 0.008214021101593971 }, { "avg_delta_l": 0.0046164728701114655, "avg_loss_unweighted": 0.013740180060267448, "avg_weight": 0.5113266110420227, "std_delta_l": 0.014008532278239727, "std_weight": 0.01597476936876774, "weighted_loss": 0.007175726816058159 }, { "avg_delta_l": -0.004196408670395613, "avg_loss_unweighted": 0.01571808010339737, "avg_weight": 0.5214366912841797, "std_delta_l": 0.011515384539961815, "std_weight": 0.023110022768378258, "weighted_loss": 0.008324306458234787 }, { "avg_delta_l": -0.001713801990263164, "avg_loss_unweighted": 0.0172171201556921, "avg_weight": 0.522587239742279, "std_delta_l": 0.017006324604153633, "std_weight": 0.030869467183947563, "weighted_loss": 0.009365750476717949 }, { "avg_delta_l": -0.0016404527705162764, "avg_loss_unweighted": 0.015504393726587296, "avg_weight": 0.520861029624939, "std_delta_l": 0.01467978022992611, "std_weight": 0.027198804542422295, "weighted_loss": 0.00837229285389185 }, { "avg_delta_l": -0.0016906051896512508, "avg_loss_unweighted": 0.014595860615372658, "avg_weight": 0.5180950164794922, "std_delta_l": 0.012633186765015125, "std_weight": 0.021389713510870934, "weighted_loss": 0.007753907702863216 }, { "avg_delta_l": -0.00338695477694273, "avg_loss_unweighted": 0.01563446968793869, "avg_weight": 0.5230792164802551, "std_delta_l": 0.014173842035233974, "std_weight": 0.02422538585960865, "weighted_loss": 0.008437352254986763 }, { "avg_delta_l": 0.0003130411496385932, "avg_loss_unweighted": 0.015272573567926884, "avg_weight": 0.5164170265197754, "std_delta_l": 0.013504013419151306, "std_weight": 0.023618269711732864, "weighted_loss": 0.008099697530269623 }, { "avg_delta_l": 0.0016519335331395268, "avg_loss_unweighted": 0.014536217786371708, "avg_weight": 0.5153066515922546, "std_delta_l": 0.01436364185065031, "std_weight": 0.018063265830278397, "weighted_loss": 0.007603699807077646 }, { "avg_delta_l": 0.00021157102310098708, "avg_loss_unweighted": 0.014224065467715263, "avg_weight": 0.5192497372627258, "std_delta_l": 0.015735410153865814, "std_weight": 0.022322099655866623, "weighted_loss": 0.0076156072318553925 }, { "avg_delta_l": -0.0027420648839324713, "avg_loss_unweighted": 0.016240492463111877, "avg_weight": 0.5199952125549316, "std_delta_l": 0.012681062333285809, "std_weight": 0.02169138751924038, "weighted_loss": 0.00860682688653469 }, { "avg_delta_l": 0.003385777585208416, "avg_loss_unweighted": 0.014625772833824158, "avg_weight": 0.5156134366989136, "std_delta_l": 0.015704182907938957, "std_weight": 0.020726799964904785, "weighted_loss": 0.007751279976218939 }, { "avg_delta_l": 0.00037009321385994554, "avg_loss_unweighted": 0.015848591923713684, "avg_weight": 0.5188525319099426, "std_delta_l": 0.01449417695403099, "std_weight": 0.023922137916088104, "weighted_loss": 0.008432921953499317 }, { "avg_delta_l": -0.002002531662583351, "avg_loss_unweighted": 0.01593325473368168, "avg_weight": 0.5212770700454712, "std_delta_l": 0.012658118270337582, "std_weight": 0.019637392833828926, "weighted_loss": 0.00846369843930006 }, { "avg_delta_l": 2.096407115459442e-05, "avg_loss_unweighted": 0.017040561884641647, "avg_weight": 0.5171314477920532, "std_delta_l": 0.01446067076176405, "std_weight": 0.02314906194806099, "weighted_loss": 0.008997773751616478 }, { "avg_delta_l": 9.364460129290819e-05, "avg_loss_unweighted": 0.01262295339256525, "avg_weight": 0.5183345675468445, "std_delta_l": 0.015399538911879063, "std_weight": 0.02746223658323288, "weighted_loss": 0.006807398982346058 }, { "epoch": 18.973913043478262, "grad_norm": 0.2275067096610568, "learning_rate": 8.399987527723919e-08, "loss": 0.0162, "step": 1100 }, { "avg_delta_l": -0.002756170928478241, "avg_loss_unweighted": 0.017173197120428085, "avg_weight": 0.5209101438522339, "std_delta_l": 0.014429149217903614, "std_weight": 0.026431424543261528, "weighted_loss": 0.00919474195688963 }, { "avg_delta_l": 8.84352921275422e-05, "avg_loss_unweighted": 0.014826846309006214, "avg_weight": 0.5181043148040771, "std_delta_l": 0.013996602967381477, "std_weight": 0.023343348875641823, "weighted_loss": 0.007902194745838642 }, { "avg_delta_l": 0.00163283571600914, "avg_loss_unweighted": 0.012902014888823032, "avg_weight": 0.5164778232574463, "std_delta_l": 0.01588095724582672, "std_weight": 0.0214589424431324, "weighted_loss": 0.006846816744655371 }, { "avg_delta_l": -0.0005512832431122661, "avg_loss_unweighted": 0.01603270322084427, "avg_weight": 0.5141065120697021, "std_delta_l": 0.012802560813724995, "std_weight": 0.02073119953274727, "weighted_loss": 0.008456948213279247 }, { "avg_delta_l": -9.709058213047683e-05, "avg_loss_unweighted": 0.01304444670677185, "avg_weight": 0.5103659629821777, "std_delta_l": 0.009445812553167343, "std_weight": 0.017278868705034256, "weighted_loss": 0.006825876887887716 }, { "avg_delta_l": -0.0003270452725701034, "avg_loss_unweighted": 0.013091005384922028, "avg_weight": 0.5100464820861816, "std_delta_l": 0.00886674877256155, "std_weight": 0.01595933549106121, "weighted_loss": 0.006845065392553806 }, { "avg_delta_l": 0.003658573143184185, "avg_loss_unweighted": 0.012248337268829346, "avg_weight": 0.5057092308998108, "std_delta_l": 0.009049876593053341, "std_weight": 0.009883100166916847, "weighted_loss": 0.006252143066376448 }, { "avg_delta_l": 0.0016064001247286797, "avg_loss_unweighted": 0.01585170440375805, "avg_weight": 0.507940948009491, "std_delta_l": 0.009995192289352417, "std_weight": 0.014926308766007423, "weighted_loss": 0.008106510154902935 }, { "avg_delta_l": 0.00019633406191132963, "avg_loss_unweighted": 0.014070040546357632, "avg_weight": 0.5086718201637268, "std_delta_l": 0.008203106001019478, "std_weight": 0.013498254120349884, "weighted_loss": 0.007280196063220501 }, { "avg_delta_l": 0.0024922764860093594, "avg_loss_unweighted": 0.01349090225994587, "avg_weight": 0.5050816535949707, "std_delta_l": 0.009390318766236305, "std_weight": 0.008405402302742004, "weighted_loss": 0.0068424539640545845 }, { "avg_delta_l": 0.00040600780630484223, "avg_loss_unweighted": 0.013452598825097084, "avg_weight": 0.5083070993423462, "std_delta_l": 0.008881191723048687, "std_weight": 0.014086749404668808, "weighted_loss": 0.006970454007387161 }, { "avg_delta_l": 0.00333492667414248, "avg_loss_unweighted": 0.014273077249526978, "avg_weight": 0.5056716799736023, "std_delta_l": 0.008080187253654003, "std_weight": 0.009285679087042809, "weighted_loss": 0.0072736176662147045 }, { "avg_delta_l": 0.000893476651981473, "avg_loss_unweighted": 0.014135263860225677, "avg_weight": 0.5102855563163757, "std_delta_l": 0.011174606159329414, "std_weight": 0.01621882990002632, "weighted_loss": 0.007349826395511627 }, { "avg_delta_l": -0.001841312856413424, "avg_loss_unweighted": 0.017487673088908195, "avg_weight": 0.5138123631477356, "std_delta_l": 0.00985894538462162, "std_weight": 0.020447582006454468, "weighted_loss": 0.009159263223409653 }, { "avg_delta_l": -0.0005595018737949431, "avg_loss_unweighted": 0.01854035258293152, "avg_weight": 0.5138580799102783, "std_delta_l": 0.011137716472148895, "std_weight": 0.018738072365522385, "weighted_loss": 0.00973848532885313 }, { "avg_delta_l": 0.000942171027418226, "avg_loss_unweighted": 0.016181929036974907, "avg_weight": 0.5086692571640015, "std_delta_l": 0.008799243718385696, "std_weight": 0.011951549910008907, "weighted_loss": 0.00830154586583376 }, { "avg_delta_l": 0.0023870456498116255, "avg_loss_unweighted": 0.014668313786387444, "avg_weight": 0.5048640370368958, "std_delta_l": 0.009040099568665028, "std_weight": 0.009435655549168587, "weighted_loss": 0.007484615780413151 }, { "avg_delta_l": 0.0008546494063921273, "avg_loss_unweighted": 0.01569116860628128, "avg_weight": 0.511993944644928, "std_delta_l": 0.012370452284812927, "std_weight": 0.019790271297097206, "weighted_loss": 0.008144239895045757 }, { "avg_delta_l": -0.0019550251308828592, "avg_loss_unweighted": 0.018338724970817566, "avg_weight": 0.5105112195014954, "std_delta_l": 0.007247675210237503, "std_weight": 0.01526466105133295, "weighted_loss": 0.009436676278710365 }, { "epoch": 19.139130434782608, "grad_norm": 0.22408401647843224, "learning_rate": 5.8766055922211965e-08, "loss": 0.0148, "step": 1110 }, { "avg_delta_l": 0.000911901646759361, "avg_loss_unweighted": 0.012365929782390594, "avg_weight": 0.5075237154960632, "std_delta_l": 0.007454897277057171, "std_weight": 0.01164824329316616, "weighted_loss": 0.006374171935021877 }, { "avg_delta_l": 0.0030785466078668833, "avg_loss_unweighted": 0.013534893281757832, "avg_weight": 0.5063376426696777, "std_delta_l": 0.01046904269605875, "std_weight": 0.01142351608723402, "weighted_loss": 0.006921855267137289 }, { "avg_delta_l": 0.003923977725207806, "avg_loss_unweighted": 0.01380196027457714, "avg_weight": 0.5051265954971313, "std_delta_l": 0.009928698651492596, "std_weight": 0.00948670320212841, "weighted_loss": 0.007061162032186985 }, { "avg_delta_l": -0.000561484310310334, "avg_loss_unweighted": 0.0148339057341218, "avg_weight": 0.511136531829834, "std_delta_l": 0.00975308008491993, "std_weight": 0.016371585428714752, "weighted_loss": 0.00775638734921813 }, { "avg_delta_l": -0.0016763783060014248, "avg_loss_unweighted": 0.01836968958377838, "avg_weight": 0.5153518915176392, "std_delta_l": 0.011711828410625458, "std_weight": 0.020205577835440636, "weighted_loss": 0.009617687202990055 }, { "avg_delta_l": 0.001109208562411368, "avg_loss_unweighted": 0.013269414193928242, "avg_weight": 0.5080941915512085, "std_delta_l": 0.009255913086235523, "std_weight": 0.012373929843306541, "weighted_loss": 0.006827689707279205 }, { "avg_delta_l": -0.00038803822826594114, "avg_loss_unweighted": 0.013683803379535675, "avg_weight": 0.5123524069786072, "std_delta_l": 0.01062147319316864, "std_weight": 0.018593713641166687, "weighted_loss": 0.007153610233217478 }, { "avg_delta_l": 0.000750683480873704, "avg_loss_unweighted": 0.01475992426276207, "avg_weight": 0.509937584400177, "std_delta_l": 0.010683327913284302, "std_weight": 0.016234872862696648, "weighted_loss": 0.007639958523213863 }, { "avg_delta_l": 0.0012471249792724848, "avg_loss_unweighted": 0.01425109151750803, "avg_weight": 0.5089417695999146, "std_delta_l": 0.009776907041668892, "std_weight": 0.013329992070794106, "weighted_loss": 0.007351070176810026 }, { "avg_delta_l": 0.0031121266074478626, "avg_loss_unweighted": 0.011782744899392128, "avg_weight": 0.5065852403640747, "std_delta_l": 0.011017817072570324, "std_weight": 0.010660644620656967, "weighted_loss": 0.006027643568813801 }, { "avg_delta_l": -0.0016130729345604777, "avg_loss_unweighted": 0.013910407200455666, "avg_weight": 0.5160034894943237, "std_delta_l": 0.013464131392538548, "std_weight": 0.024073656648397446, "weighted_loss": 0.007420642301440239 }, { "avg_delta_l": 0.0006779331597499549, "avg_loss_unweighted": 0.013964397832751274, "avg_weight": 0.5086284279823303, "std_delta_l": 0.00932024884968996, "std_weight": 0.013173538260161877, "weighted_loss": 0.0071897367015480995 }, { "avg_delta_l": -0.0003122373018413782, "avg_loss_unweighted": 0.013358410447835922, "avg_weight": 0.5096625089645386, "std_delta_l": 0.008642789907753468, "std_weight": 0.013819253072142601, "weighted_loss": 0.006889185402542353 }, { "avg_delta_l": 0.0007543793180957437, "avg_loss_unweighted": 0.013739701360464096, "avg_weight": 0.511547327041626, "std_delta_l": 0.0111226262524724, "std_weight": 0.016996504738926888, "weighted_loss": 0.007158216089010239 }, { "avg_delta_l": -0.0009409241611137986, "avg_loss_unweighted": 0.013011747971177101, "avg_weight": 0.5116188526153564, "std_delta_l": 0.009722826071083546, "std_weight": 0.018111452460289, "weighted_loss": 0.006748752202838659 }, { "avg_delta_l": 1.4161865692585707e-05, "avg_loss_unweighted": 0.015236097387969494, "avg_weight": 0.5104141235351562, "std_delta_l": 0.009484140202403069, "std_weight": 0.01510492991656065, "weighted_loss": 0.007922272197902203 }, { "avg_delta_l": 0.001690233126282692, "avg_loss_unweighted": 0.014678644016385078, "avg_weight": 0.509674072265625, "std_delta_l": 0.01144931185990572, "std_weight": 0.014353263191878796, "weighted_loss": 0.007591203786432743 }, { "avg_delta_l": 0.001525591011159122, "avg_loss_unweighted": 0.011942341923713684, "avg_weight": 0.5089870095252991, "std_delta_l": 0.009380219504237175, "std_weight": 0.013401675969362259, "weighted_loss": 0.00622327346354723 }, { "avg_delta_l": -7.956335321068764e-05, "avg_loss_unweighted": 0.014187558554112911, "avg_weight": 0.5127167701721191, "std_delta_l": 0.011712661944329739, "std_weight": 0.01821914128959179, "weighted_loss": 0.007489132694900036 }, { "avg_delta_l": 0.001228498062118888, "avg_loss_unweighted": 0.015961242839694023, "avg_weight": 0.5114006400108337, "std_delta_l": 0.011188759468495846, "std_weight": 0.015767235308885574, "weighted_loss": 0.008246522396802902 }, { "epoch": 19.31304347826087, "grad_norm": 0.20764787718294236, "learning_rate": 3.80062923237895e-08, "loss": 0.0146, "step": 1120 }, { "avg_delta_l": -0.0014972304925322533, "avg_loss_unweighted": 0.016607990488409996, "avg_weight": 0.5146665573120117, "std_delta_l": 0.01081331167370081, "std_weight": 0.020671149715781212, "weighted_loss": 0.00866619497537613 }, { "avg_delta_l": -0.0006089682574383914, "avg_loss_unweighted": 0.015400601550936699, "avg_weight": 0.5154380202293396, "std_delta_l": 0.013986990787088871, "std_weight": 0.024146173149347305, "weighted_loss": 0.008170539513230324 }, { "avg_delta_l": -0.0026065288111567497, "avg_loss_unweighted": 0.0162163395434618, "avg_weight": 0.5144009590148926, "std_delta_l": 0.009780588559806347, "std_weight": 0.020946279168128967, "weighted_loss": 0.008476081304252148 }, { "avg_delta_l": -0.0008686125511303544, "avg_loss_unweighted": 0.016146663576364517, "avg_weight": 0.5160748958587646, "std_delta_l": 0.013070422224700451, "std_weight": 0.023872090503573418, "weighted_loss": 0.008505911566317081 }, { "avg_delta_l": -0.001172651769593358, "avg_loss_unweighted": 0.014409003779292107, "avg_weight": 0.5135245323181152, "std_delta_l": 0.01086585596203804, "std_weight": 0.02209882065653801, "weighted_loss": 0.007596092764288187 }, { "avg_delta_l": -0.0005711217527277768, "avg_loss_unweighted": 0.015443708747625351, "avg_weight": 0.516308605670929, "std_delta_l": 0.012995515950024128, "std_weight": 0.017834937199950218, "weighted_loss": 0.008191223256289959 }, { "avg_delta_l": 0.0014296903973445296, "avg_loss_unweighted": 0.014937790110707283, "avg_weight": 0.5092371702194214, "std_delta_l": 0.011078186333179474, "std_weight": 0.016421062871813774, "weighted_loss": 0.0077594188041985035 }, { "avg_delta_l": -0.0005911460611969233, "avg_loss_unweighted": 0.016002453863620758, "avg_weight": 0.5157152414321899, "std_delta_l": 0.01331657636910677, "std_weight": 0.022725718095898628, "weighted_loss": 0.008466945961117744 }, { "avg_delta_l": -0.0006468815263360739, "avg_loss_unweighted": 0.013968273997306824, "avg_weight": 0.5132925510406494, "std_delta_l": 0.011738883331418037, "std_weight": 0.020539842545986176, "weighted_loss": 0.007338335737586021 }, { "avg_delta_l": -0.0018592274282127619, "avg_loss_unweighted": 0.012785094790160656, "avg_weight": 0.5132794380187988, "std_delta_l": 0.009815485216677189, "std_weight": 0.01860317401587963, "weighted_loss": 0.006692078895866871 }, { "avg_delta_l": 0.00018740500672720373, "avg_loss_unweighted": 0.012569818645715714, "avg_weight": 0.5140298008918762, "std_delta_l": 0.012011820450425148, "std_weight": 0.02049773558974266, "weighted_loss": 0.006614316254854202 }, { "avg_delta_l": 0.0013587601715698838, "avg_loss_unweighted": 0.014995768666267395, "avg_weight": 0.5117010474205017, "std_delta_l": 0.011735955253243446, "std_weight": 0.015180839225649834, "weighted_loss": 0.0077926479279994965 }, { "avg_delta_l": 0.0010630835313349962, "avg_loss_unweighted": 0.014422950334846973, "avg_weight": 0.5075885057449341, "std_delta_l": 0.009156376123428345, "std_weight": 0.013029137626290321, "weighted_loss": 0.007364258635789156 }, { "avg_delta_l": 0.004599819425493479, "avg_loss_unweighted": 0.014985436573624611, "avg_weight": 0.5110034346580505, "std_delta_l": 0.013987841084599495, "std_weight": 0.017321093007922173, "weighted_loss": 0.007772757206112146 }, { "avg_delta_l": 0.004377215635031462, "avg_loss_unweighted": 0.012295627035200596, "avg_weight": 0.5056028962135315, "std_delta_l": 0.009794417768716812, "std_weight": 0.008125228807330132, "weighted_loss": 0.006310990080237389 }, { "avg_delta_l": 0.00023479360970668495, "avg_loss_unweighted": 0.016009081155061722, "avg_weight": 0.5125373005867004, "std_delta_l": 0.011081083677709103, "std_weight": 0.016755668446421623, "weighted_loss": 0.008366861380636692 }, { "avg_delta_l": 0.00015056796837598085, "avg_loss_unweighted": 0.01688251458108425, "avg_weight": 0.514807939529419, "std_delta_l": 0.013672750443220139, "std_weight": 0.02125965803861618, "weighted_loss": 0.008891684003174305 }, { "avg_delta_l": 5.930991028435528e-05, "avg_loss_unweighted": 0.014498040080070496, "avg_weight": 0.5133973360061646, "std_delta_l": 0.012118319980800152, "std_weight": 0.019287103787064552, "weighted_loss": 0.0075970119796693325 }, { "avg_delta_l": -0.0031755866948515177, "avg_loss_unweighted": 0.016671905294060707, "avg_weight": 0.5169544219970703, "std_delta_l": 0.011779680848121643, "std_weight": 0.025891944766044617, "weighted_loss": 0.008866996504366398 }, { "avg_delta_l": -0.0005937227979302406, "avg_loss_unweighted": 0.013609284535050392, "avg_weight": 0.5132803916931152, "std_delta_l": 0.011070312932133675, "std_weight": 0.017369575798511505, "weighted_loss": 0.007150427438318729 }, { "epoch": 19.48695652173913, "grad_norm": 0.23245723213459848, "learning_rate": 2.1739381474966124e-08, "loss": 0.0157, "step": 1130 }, { "avg_delta_l": -0.0001075564359780401, "avg_loss_unweighted": 0.013858765363693237, "avg_weight": 0.5144530534744263, "std_delta_l": 0.012211722321808338, "std_weight": 0.018663259223103523, "weighted_loss": 0.007290301378816366 }, { "avg_delta_l": -0.0017994000809267163, "avg_loss_unweighted": 0.016165386885404587, "avg_weight": 0.5176420211791992, "std_delta_l": 0.011532851494848728, "std_weight": 0.01987791247665882, "weighted_loss": 0.008557818830013275 }, { "avg_delta_l": -0.0001564403501106426, "avg_loss_unweighted": 0.013993158005177975, "avg_weight": 0.5123516917228699, "std_delta_l": 0.010763769969344139, "std_weight": 0.016627008095383644, "weighted_loss": 0.007326282095164061 }, { "avg_delta_l": 0.0003837035910692066, "avg_loss_unweighted": 0.0127559220418334, "avg_weight": 0.5122441053390503, "std_delta_l": 0.0109981345012784, "std_weight": 0.016404958441853523, "weighted_loss": 0.006667856127023697 }, { "avg_delta_l": 0.0013676472008228302, "avg_loss_unweighted": 0.014749914407730103, "avg_weight": 0.5115098357200623, "std_delta_l": 0.011439234018325806, "std_weight": 0.01782854087650776, "weighted_loss": 0.007696281187236309 }, { "avg_delta_l": -0.00134059670381248, "avg_loss_unweighted": 0.014849372208118439, "avg_weight": 0.5151231288909912, "std_delta_l": 0.011045213788747787, "std_weight": 0.020337162539362907, "weighted_loss": 0.007832327857613564 }, { "avg_delta_l": -0.002126326784491539, "avg_loss_unweighted": 0.015119042247533798, "avg_weight": 0.5175298452377319, "std_delta_l": 0.011572976596653461, "std_weight": 0.02330971322953701, "weighted_loss": 0.008001202717423439 }, { "avg_delta_l": 0.0034014959819614887, "avg_loss_unweighted": 0.011242649517953396, "avg_weight": 0.5086411237716675, "std_delta_l": 0.011871151626110077, "std_weight": 0.012259037233889103, "weighted_loss": 0.005804090760648251 }, { "avg_delta_l": -0.0002898531383834779, "avg_loss_unweighted": 0.015334652736783028, "avg_weight": 0.518358588218689, "std_delta_l": 0.01527989562600851, "std_weight": 0.024238286539912224, "weighted_loss": 0.00818558968603611 }, { "avg_delta_l": 0.0034361323341727257, "avg_loss_unweighted": 0.013324867002665997, "avg_weight": 0.5100694894790649, "std_delta_l": 0.01243710145354271, "std_weight": 0.014236662536859512, "weighted_loss": 0.006905166432261467 }, { "avg_delta_l": -0.00029585411539301276, "avg_loss_unweighted": 0.01641346700489521, "avg_weight": 0.515254020690918, "std_delta_l": 0.013304069638252258, "std_weight": 0.021815616637468338, "weighted_loss": 0.008598139509558678 }, { "avg_delta_l": -0.0025855419225990772, "avg_loss_unweighted": 0.01747635379433632, "avg_weight": 0.5167666077613831, "std_delta_l": 0.010981081984937191, "std_weight": 0.02122458629310131, "weighted_loss": 0.009221479296684265 }, { "avg_delta_l": 0.0004501749062910676, "avg_loss_unweighted": 0.013433721847832203, "avg_weight": 0.5131933093070984, "std_delta_l": 0.012243624776601791, "std_weight": 0.01821419782936573, "weighted_loss": 0.0070211198180913925 }, { "avg_delta_l": -0.0015251957811415195, "avg_loss_unweighted": 0.015646018087863922, "avg_weight": 0.5130285620689392, "std_delta_l": 0.010466087609529495, "std_weight": 0.02072538062930107, "weighted_loss": 0.008165290579199791 }, { "avg_delta_l": -0.0004965965636074543, "avg_loss_unweighted": 0.014417840167880058, "avg_weight": 0.5167849659919739, "std_delta_l": 0.01192250195890665, "std_weight": 0.020391058176755905, "weighted_loss": 0.007688179612159729 }, { "avg_delta_l": 0.00043923722114413977, "avg_loss_unweighted": 0.012724638916552067, "avg_weight": 0.51419997215271, "std_delta_l": 0.01157945953309536, "std_weight": 0.01912933588027954, "weighted_loss": 0.006747820880264044 }, { "avg_delta_l": -0.0012198976473882794, "avg_loss_unweighted": 0.018036052584648132, "avg_weight": 0.5178548693656921, "std_delta_l": 0.014960729517042637, "std_weight": 0.02625207230448723, "weighted_loss": 0.00956006906926632 }, { "avg_delta_l": 0.0008096962701529264, "avg_loss_unweighted": 0.013712071813642979, "avg_weight": 0.5117943286895752, "std_delta_l": 0.012328495271503925, "std_weight": 0.017098283395171165, "weighted_loss": 0.0071836248971521854 }, { "avg_delta_l": -0.0018175278091803193, "avg_loss_unweighted": 0.016090290620923042, "avg_weight": 0.5184926390647888, "std_delta_l": 0.01261010393500328, "std_weight": 0.02327575534582138, "weighted_loss": 0.008579590357840061 }, { "avg_delta_l": 0.0027441976126283407, "avg_loss_unweighted": 0.012625361792743206, "avg_weight": 0.5083783864974976, "std_delta_l": 0.010519979521632195, "std_weight": 0.01337639894336462, "weighted_loss": 0.006507906597107649 }, { "epoch": 19.660869565217393, "grad_norm": 0.25034128567775754, "learning_rate": 9.980052301137854e-09, "loss": 0.0154, "step": 1140 }, { "avg_delta_l": 0.0018677643965929747, "avg_loss_unweighted": 0.013136704452335835, "avg_weight": 0.5124759674072266, "std_delta_l": 0.0127920126542449, "std_weight": 0.018673714250326157, "weighted_loss": 0.0069379424676299095 }, { "avg_delta_l": 0.00022717457613907754, "avg_loss_unweighted": 0.01510943565517664, "avg_weight": 0.5144497156143188, "std_delta_l": 0.012198830023407936, "std_weight": 0.018636031076312065, "weighted_loss": 0.00793550070375204 }, { "avg_delta_l": -0.0021033361554145813, "avg_loss_unweighted": 0.014554472640156746, "avg_weight": 0.5168535709381104, "std_delta_l": 0.011942649260163307, "std_weight": 0.022572111338377, "weighted_loss": 0.007696508429944515 }, { "avg_delta_l": 0.0005785282701253891, "avg_loss_unweighted": 0.01369612105190754, "avg_weight": 0.5169356465339661, "std_delta_l": 0.014037714339792728, "std_weight": 0.019781451672315598, "weighted_loss": 0.007256948389112949 }, { "avg_delta_l": 0.0011961825657635927, "avg_loss_unweighted": 0.016223667189478874, "avg_weight": 0.5149827599525452, "std_delta_l": 0.01336800865828991, "std_weight": 0.021053975448012352, "weighted_loss": 0.008500545285642147 }, { "avg_delta_l": -0.002329714596271515, "avg_loss_unweighted": 0.01608450524508953, "avg_weight": 0.5194881558418274, "std_delta_l": 0.012234743684530258, "std_weight": 0.021148882806301117, "weighted_loss": 0.008576296269893646 }, { "avg_delta_l": 0.0015303249238058925, "avg_loss_unweighted": 0.014308528043329716, "avg_weight": 0.5142324566841125, "std_delta_l": 0.013205812312662601, "std_weight": 0.01752229407429695, "weighted_loss": 0.007480747532099485 }, { "avg_delta_l": 0.002001429907977581, "avg_loss_unweighted": 0.015027245506644249, "avg_weight": 0.5106984972953796, "std_delta_l": 0.011354966089129448, "std_weight": 0.01681026630103588, "weighted_loss": 0.007775570265948772 }, { "avg_delta_l": 0.0013834034325554967, "avg_loss_unweighted": 0.013411663472652435, "avg_weight": 0.5152730345726013, "std_delta_l": 0.01581052504479885, "std_weight": 0.024448420852422714, "weighted_loss": 0.0071604810655117035 }, { "avg_delta_l": -0.0015031177317723632, "avg_loss_unweighted": 0.015471062622964382, "avg_weight": 0.5173672437667847, "std_delta_l": 0.010741312988102436, "std_weight": 0.019704129546880722, "weighted_loss": 0.008230502717196941 }, { "avg_delta_l": -5.399750079959631e-05, "avg_loss_unweighted": 0.01299256831407547, "avg_weight": 0.5188513994216919, "std_delta_l": 0.015330564230680466, "std_weight": 0.023822834715247154, "weighted_loss": 0.006972652394324541 }, { "avg_delta_l": 0.0005656784633174539, "avg_loss_unweighted": 0.015356021001935005, "avg_weight": 0.5129384994506836, "std_delta_l": 0.012110686860978603, "std_weight": 0.01951788365840912, "weighted_loss": 0.007985136471688747 }, { "avg_delta_l": 0.0016533565940335393, "avg_loss_unweighted": 0.0146942762658, "avg_weight": 0.5132704377174377, "std_delta_l": 0.014183037914335728, "std_weight": 0.01949058286845684, "weighted_loss": 0.007656522560864687 }, { "avg_delta_l": 0.001501757767982781, "avg_loss_unweighted": 0.011432066559791565, "avg_weight": 0.5136456489562988, "std_delta_l": 0.013282847590744495, "std_weight": 0.019163766875863075, "weighted_loss": 0.006090464070439339 }, { "avg_delta_l": -0.0038695153780281544, "avg_loss_unweighted": 0.015647975727915764, "avg_weight": 0.5230522155761719, "std_delta_l": 0.012450529262423515, "std_weight": 0.023049376904964447, "weighted_loss": 0.008453910239040852 }, { "avg_delta_l": -0.00038161518750712276, "avg_loss_unweighted": 0.01348566822707653, "avg_weight": 0.5170660018920898, "std_delta_l": 0.011985358782112598, "std_weight": 0.02035616897046566, "weighted_loss": 0.007144292816519737 }, { "avg_delta_l": 0.001320705283433199, "avg_loss_unweighted": 0.013338204473257065, "avg_weight": 0.5156671404838562, "std_delta_l": 0.01377171277999878, "std_weight": 0.020718762651085854, "weighted_loss": 0.007087184116244316 }, { "avg_delta_l": 0.0016152090393006802, "avg_loss_unweighted": 0.01362625602632761, "avg_weight": 0.5141677856445312, "std_delta_l": 0.014387780800461769, "std_weight": 0.02164439670741558, "weighted_loss": 0.007231934927403927 }, { "avg_delta_l": -0.0005143759772181511, "avg_loss_unweighted": 0.015362994745373726, "avg_weight": 0.5199184417724609, "std_delta_l": 0.015463347546756268, "std_weight": 0.026123004034161568, "weighted_loss": 0.008244631811976433 }, { "avg_delta_l": 0.002385272178798914, "avg_loss_unweighted": 0.013838464394211769, "avg_weight": 0.5160945653915405, "std_delta_l": 0.014714963734149933, "std_weight": 0.020435959100723267, "weighted_loss": 0.00732141500338912 }, { "epoch": 19.83478260869565, "grad_norm": 0.21908299737657072, "learning_rate": 2.7389523237503214e-09, "loss": 0.0152, "step": 1150 }, { "avg_delta_l": 0.002676478587090969, "avg_loss_unweighted": 0.013342672027647495, "avg_weight": 0.5141363143920898, "std_delta_l": 0.0133585250005126, "std_weight": 0.017719360068440437, "weighted_loss": 0.007080463692545891 }, { "avg_delta_l": 0.0002874669444281608, "avg_loss_unweighted": 0.01586882211267948, "avg_weight": 0.513896644115448, "std_delta_l": 0.012567112222313881, "std_weight": 0.02141760289669037, "weighted_loss": 0.008331021293997765 }, { "avg_delta_l": -0.00022246988373808563, "avg_loss_unweighted": 0.013739809393882751, "avg_weight": 0.5162320137023926, "std_delta_l": 0.011684060096740723, "std_weight": 0.018990864977240562, "weighted_loss": 0.0072713145054876804 }, { "avg_delta_l": 0.00023023190442472696, "avg_loss_unweighted": 0.01589363068342209, "avg_weight": 0.5189360976219177, "std_delta_l": 0.015788061544299126, "std_weight": 0.02244877815246582, "weighted_loss": 0.008407415822148323 }, { "avg_delta_l": -0.0016751795774325728, "avg_loss_unweighted": 0.013732485473155975, "avg_weight": 0.5185481309890747, "std_delta_l": 0.012231618165969849, "std_weight": 0.020420437678694725, "weighted_loss": 0.007274143863469362 }, { "avg_delta_l": 0.0002603471220936626, "avg_loss_unweighted": 0.014550698921084404, "avg_weight": 0.5164692401885986, "std_delta_l": 0.014271823689341545, "std_weight": 0.022526400163769722, "weighted_loss": 0.007694135420024395 }, { "avg_delta_l": 0.00124380923807621, "avg_loss_unweighted": 0.01506569143384695, "avg_weight": 0.5139535665512085, "std_delta_l": 0.012385345995426178, "std_weight": 0.01986060105264187, "weighted_loss": 0.007938234135508537 }, { "avg_delta_l": -0.001253590453416109, "avg_loss_unweighted": 0.01679709367454052, "avg_weight": 0.5207093954086304, "std_delta_l": 0.014867366291582584, "std_weight": 0.02587355300784111, "weighted_loss": 0.009047413244843483 }, { "avg_delta_l": 0.0007282333681359887, "avg_loss_unweighted": 0.012227637693285942, "avg_weight": 0.513392984867096, "std_delta_l": 0.010733923874795437, "std_weight": 0.015568006783723831, "weighted_loss": 0.006353870499879122 }, { "avg_delta_l": -0.0006809139158576727, "avg_loss_unweighted": 0.013713735155761242, "avg_weight": 0.5163565874099731, "std_delta_l": 0.011913706548511982, "std_weight": 0.02164573408663273, "weighted_loss": 0.00728962104767561 }, { "avg_delta_l": 0.001656526466831565, "avg_loss_unweighted": 0.013156292960047722, "avg_weight": 0.5125716328620911, "std_delta_l": 0.012841638177633286, "std_weight": 0.017008839175105095, "weighted_loss": 0.006916929502040148 }, { "avg_delta_l": -0.0023239003494381905, "avg_loss_unweighted": 0.017249615862965584, "avg_weight": 0.5223548412322998, "std_delta_l": 0.015443023294210434, "std_weight": 0.02742706425487995, "weighted_loss": 0.009274523705244064 }, { "avg_delta_l": -0.0017793183214962482, "avg_loss_unweighted": 0.014937473461031914, "avg_weight": 0.5207799077033997, "std_delta_l": 0.015438977628946304, "std_weight": 0.03010576218366623, "weighted_loss": 0.008113925345242023 }, { "avg_delta_l": 0.0023212330415844917, "avg_loss_unweighted": 0.013703988865017891, "avg_weight": 0.5133146643638611, "std_delta_l": 0.013493241742253304, "std_weight": 0.02102915570139885, "weighted_loss": 0.007209193892776966 }, { "avg_delta_l": 0.001103963004425168, "avg_loss_unweighted": 0.01417086087167263, "avg_weight": 0.5147486925125122, "std_delta_l": 0.012314759194850922, "std_weight": 0.021108537912368774, "weighted_loss": 0.007510557770729065 }, { "avg_delta_l": -0.0005741282366216183, "avg_loss_unweighted": 0.014788070693612099, "avg_weight": 0.5177878737449646, "std_delta_l": 0.013915248215198517, "std_weight": 0.024118291214108467, "weighted_loss": 0.0078788623213768 }, { "avg_delta_l": -0.0032037419732660055, "avg_loss_unweighted": 0.016593633219599724, "avg_weight": 0.5247268080711365, "std_delta_l": 0.01614178903400898, "std_weight": 0.030155466869473457, "weighted_loss": 0.009022457525134087 }, { "avg_delta_l": -0.0021900306455790997, "avg_loss_unweighted": 0.01517319492995739, "avg_weight": 0.5209560990333557, "std_delta_l": 0.01490831095725298, "std_weight": 0.027639083564281464, "weighted_loss": 0.008234472014009953 }, { "avg_delta_l": -0.0013474096776917577, "avg_loss_unweighted": 0.016550585627555847, "avg_weight": 0.5217501521110535, "std_delta_l": 0.014828622341156006, "std_weight": 0.023228125646710396, "weighted_loss": 0.008936657570302486 }, { "epoch": 20.0, "grad_norm": 0.2293879053909037, "learning_rate": 2.2638019465115457e-11, "loss": 0.015, "step": 1160 } ], "logging_steps": 10, "max_steps": 1160, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 810007227269120.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }