{ "config": { "num_layers": 20, "hidden_dim": 64, "num_samples": 1024, "training_steps": 500, "learning_rate": 0.001, "batch_size": 64, "initialization": "Kaiming He + 1/sqrt(num_layers) scaling (IDENTICAL for both)" }, "plain_mlp": { "final_loss": 0.3332558274269104, "initial_loss": 0.332968533039093, "loss_history": [ 0.332968533039093, 0.3402037024497986, 0.3364717364311218, 0.3382267951965332, 0.3330632448196411, 0.3238801956176758, 0.33725303411483765, 0.33949077129364014, 0.3349677622318268, 0.3314793109893799, 0.3350830376148224, 0.3396027088165283, 0.3461214303970337, 0.33500319719314575, 0.33258378505706787, 0.3271251916885376, 0.33106938004493713, 0.330522745847702, 0.3350549042224884, 0.34020766615867615, 0.3346160352230072, 0.33573126792907715, 0.331778347492218, 0.3422446846961975, 0.32669609785079956, 0.32841432094573975, 0.33652669191360474, 0.3339478373527527, 0.33742019534111023, 0.3364412188529968, 0.3310929536819458, 0.329889178276062, 0.32710886001586914, 0.3314291536808014, 0.32728779315948486, 0.33449968695640564, 0.3348814845085144, 0.33067554235458374, 0.33825111389160156, 0.32776153087615967, 0.3310733437538147, 0.33599284291267395, 0.3276815712451935, 0.33107852935791016, 0.3359324038028717, 0.3308407664299011, 0.3272636830806732, 0.3326541483402252, 0.33763229846954346, 0.3272179663181305, 0.3317231833934784, 0.33339571952819824, 0.33039557933807373, 0.33708441257476807, 0.3325483500957489, 0.32474005222320557, 0.3338140845298767, 0.3298497796058655, 0.3405594825744629, 0.3340454399585724, 0.3293362855911255, 0.33182770013809204, 0.32914435863494873, 0.33043327927589417, 0.342511922121048, 0.33861756324768066, 0.3329136371612549, 0.3396056294441223, 0.33696088194847107, 0.3394179344177246, 0.3365488648414612, 0.333668977022171, 0.3302392065525055, 0.3311960697174072, 0.331310898065567, 0.33419355750083923, 0.33611857891082764, 0.3307862877845764, 0.33424338698387146, 0.3314332962036133, 0.33120185136795044, 0.3432038128376007, 0.3276495635509491, 0.3253132998943329, 0.33370089530944824, 0.3334325850009918, 0.33796757459640503, 0.3381417691707611, 0.336396187543869, 0.3448200821876526, 0.3283367156982422, 0.33384162187576294, 0.33309417963027954, 0.3408854305744171, 0.33603304624557495, 0.3335626721382141, 0.33273571729660034, 0.332747220993042, 0.33269503712654114, 0.337196946144104, 0.33257246017456055, 0.3369458317756653, 0.3334164321422577, 0.3322732746601105, 0.33338549733161926, 0.34215712547302246, 0.3291473388671875, 0.3304932713508606, 0.3349063992500305, 0.3322477340698242, 0.33423349261283875, 0.33466511964797974, 0.3396754562854767, 0.3337388038635254, 0.3390074074268341, 0.33351635932922363, 0.33572056889533997, 0.33725765347480774, 0.33980029821395874, 0.3307250738143921, 0.3347432315349579, 0.3250923156738281, 0.33779600262641907, 0.3293820023536682, 0.3356536626815796, 0.33151179552078247, 0.3418586850166321, 0.3444397449493408, 0.3353630006313324, 0.3337811827659607, 0.3424583971500397, 0.3381706774234772, 0.32626256346702576, 0.3191347122192383, 0.33715391159057617, 0.33729690313339233, 0.33802124857902527, 0.3238735795021057, 0.3331955671310425, 0.33632975816726685, 0.3381429612636566, 0.3314988613128662, 0.3366459608078003, 0.33990392088890076, 0.3366839587688446, 0.3358498811721802, 0.33553940057754517, 0.3383553624153137, 0.3372274935245514, 0.3360890746116638, 0.3336467444896698, 0.33137425780296326, 0.3313506543636322, 0.33801984786987305, 0.3348143696784973, 0.3381652235984802, 0.33109965920448303, 0.33065634965896606, 0.33452922105789185, 0.3361864984035492, 0.3278672397136688, 0.3391719460487366, 0.3253280222415924, 0.33980295062065125, 0.3361275792121887, 0.33439165353775024, 0.3297124207019806, 0.3297994136810303, 0.3315487504005432, 0.34181031584739685, 0.34069621562957764, 0.3201693892478943, 0.33051198720932007, 0.32677409052848816, 0.33463847637176514, 0.334945946931839, 0.33414000272750854, 0.3312710225582123, 0.3372732102870941, 0.3287302255630493, 0.32816988229751587, 0.34039369225502014, 0.3414865732192993, 0.3287455439567566, 0.3278599977493286, 0.3294675946235657, 0.3318881392478943, 0.33630383014678955, 0.33583498001098633, 0.3277794122695923, 0.3441321849822998, 0.33931320905685425, 0.3246610164642334, 0.3394920825958252, 0.3324398994445801, 0.32542577385902405, 0.33746016025543213, 0.33016330003738403, 0.33138081431388855, 0.3333736062049866, 0.33004230260849, 0.335909366607666, 0.33485135436058044, 0.3469405174255371, 0.3376665711402893, 0.32913172245025635, 0.3373066782951355, 0.34056517481803894, 0.33372431993484497, 0.3341078758239746, 0.33511120080947876, 0.3352786600589752, 0.3287707567214966, 0.32857248187065125, 0.33455517888069153, 0.3378414511680603, 0.32852864265441895, 0.3366684913635254, 0.3318488597869873, 0.33084720373153687, 0.3281920850276947, 0.32972854375839233, 0.33841508626937866, 0.3286718726158142, 0.3372878432273865, 0.3256922662258148, 0.32902824878692627, 0.3393133878707886, 0.32971060276031494, 0.3320726156234741, 0.33548030257225037, 0.33959805965423584, 0.3317180573940277, 0.3321043848991394, 0.33242157101631165, 0.3237553834915161, 0.3358321487903595, 0.3395066559314728, 0.33536916971206665, 0.3325914144515991, 0.3364296555519104, 0.3329724967479706, 0.3324553668498993, 0.3354540765285492, 0.3420693874359131, 0.3459964692592621, 0.33733582496643066, 0.33702749013900757, 0.33047980070114136, 0.33542799949645996, 0.34718090295791626, 0.3310956060886383, 0.328813374042511, 0.3344953656196594, 0.3336995840072632, 0.3262229263782501, 0.3373294174671173, 0.3287215232849121, 0.3260827660560608, 0.3272097110748291, 0.3332976698875427, 0.34142327308654785, 0.3398308753967285, 0.33534085750579834, 0.33829042315483093, 0.3399216830730438, 0.3382388949394226, 0.3297419548034668, 0.33515945076942444, 0.3321044147014618, 0.33267727494239807, 0.32454240322113037, 0.3303866684436798, 0.3406679034233093, 0.3363434076309204, 0.33855724334716797, 0.3321431875228882, 0.34290140867233276, 0.3352099359035492, 0.3338983356952667, 0.3258536756038666, 0.33761051297187805, 0.33715319633483887, 0.3330615758895874, 0.32767096161842346, 0.33492639660835266, 0.3231773376464844, 0.33072930574417114, 0.3423689007759094, 0.33502012491226196, 0.33624961972236633, 0.3346553146839142, 0.3279629945755005, 0.3266867995262146, 0.3321791887283325, 0.33108729124069214, 0.33155086636543274, 0.3317343592643738, 0.3374349772930145, 0.3357178270816803, 0.32846030592918396, 0.328380286693573, 0.3357793390750885, 0.3296501636505127, 0.33293581008911133, 0.3345809578895569, 0.34283316135406494, 0.3336940407752991, 0.3340163826942444, 0.33170098066329956, 0.3387867212295532, 0.3395037055015564, 0.33608272671699524, 0.3381228744983673, 0.3256526589393616, 0.32226407527923584, 0.3270655870437622, 0.32900455594062805, 0.32871338725090027, 0.3325023949146271, 0.34181392192840576, 0.33826935291290283, 0.3314586281776428, 0.33122166991233826, 0.3373032808303833, 0.3400927484035492, 0.33277270197868347, 0.33700406551361084, 0.3378522992134094, 0.3333711326122284, 0.33014780282974243, 0.33368366956710815, 0.32724565267562866, 0.3266543745994568, 0.33365482091903687, 0.3297240734100342, 0.32680970430374146, 0.33299076557159424, 0.33300912380218506, 0.33403903245925903, 0.3431350588798523, 0.33904892206192017, 0.3324755132198334, 0.32933175563812256, 0.33719339966773987, 0.32767781615257263, 0.33541908860206604, 0.3346007466316223, 0.33879315853118896, 0.3345041275024414, 0.33489879965782166, 0.3363777995109558, 0.33767563104629517, 0.3328354060649872, 0.3331868052482605, 0.3402370810508728, 0.33388105034828186, 0.33734720945358276, 0.332383394241333, 0.33302611112594604, 0.33460700511932373, 0.3303910791873932, 0.3368116021156311, 0.3294057250022888, 0.3381325602531433, 0.33359014987945557, 0.33498138189315796, 0.33019959926605225, 0.33259788155555725, 0.3409283459186554, 0.3365859091281891, 0.33277761936187744, 0.3394026756286621, 0.3290417790412903, 0.3401448130607605, 0.3251156806945801, 0.33011412620544434, 0.3365683853626251, 0.33974623680114746, 0.33201247453689575, 0.3294023275375366, 0.3409932851791382, 0.34747999906539917, 0.34078940749168396, 0.33660799264907837, 0.3298097848892212, 0.3322901725769043, 0.3355182111263275, 0.341055691242218, 0.33802860975265503, 0.33179306983947754, 0.3440161943435669, 0.33081281185150146, 0.3272419273853302, 0.3260897696018219, 0.33437293767929077, 0.3293749690055847, 0.3273012936115265, 0.332683801651001, 0.3310787081718445, 0.3282574415206909, 0.3340969979763031, 0.339628666639328, 0.3317316770553589, 0.32998812198638916, 0.33901870250701904, 0.33634477853775024, 0.32989999651908875, 0.3339465856552124, 0.3389207720756531, 0.34303727746009827, 0.33803778886795044, 0.33491209149360657, 0.33020907640457153, 0.3321894407272339, 0.3333378732204437, 0.3393198847770691, 0.34461069107055664, 0.3298798203468323, 0.33570319414138794, 0.3291119933128357, 0.3379652202129364, 0.3366316556930542, 0.3392751216888428, 0.330910325050354, 0.3388398289680481, 0.32620128989219666, 0.34021633863449097, 0.3330092430114746, 0.33735135197639465, 0.33621883392333984, 0.33607298135757446, 0.33188751339912415, 0.33755841851234436, 0.3280537724494934, 0.3305507302284241, 0.32686758041381836, 0.32987216114997864, 0.3396640717983246, 0.33654022216796875, 0.3425779342651367, 0.32754188776016235, 0.3275264501571655, 0.33224233984947205, 0.33522123098373413, 0.3300197422504425, 0.3299338221549988, 0.33014100790023804, 0.33165717124938965, 0.33280014991760254, 0.3325314521789551, 0.33692190051078796, 0.3337472677230835, 0.3372674286365509, 0.33729901909828186, 0.3348565995693207, 0.3356059491634369, 0.32656145095825195, 0.33172452449798584, 0.3321564793586731, 0.32580462098121643, 0.33925479650497437, 0.3400028944015503, 0.3347415030002594, 0.333713561296463, 0.33577901124954224, 0.3256620466709137, 0.32763227820396423, 0.33191540837287903, 0.3397204875946045, 0.3354407250881195, 0.337093323469162, 0.3282209038734436, 0.32860079407691956, 0.3297363817691803, 0.33467888832092285, 0.34158656001091003, 0.3324458599090576, 0.3320366442203522, 0.3411107659339905, 0.33266565203666687, 0.3380263149738312, 0.32896336913108826, 0.32966428995132446, 0.3425551652908325, 0.3358476161956787, 0.33467555046081543, 0.3310127854347229, 0.33904367685317993, 0.33704298734664917, 0.32885774970054626, 0.3259532153606415, 0.336847722530365, 0.33440321683883667, 0.3305370807647705, 0.33669552206993103, 0.33197110891342163, 0.33256542682647705, 0.33659619092941284, 0.3332558274269104 ], "gradient_norms": [ 8.64771111707464e-19, 3.8712929478931e-18, 1.745819773021961e-17, 7.49907583485141e-17, 2.979541624107448e-16, 1.3384685459987998e-15, 5.608540992334309e-15, 2.6169963819686753e-14, 2.812787708913328e-13, 2.449897706577331e-12, 2.7808975608389908e-11, 2.4811785959144572e-09, 1.491367562778123e-08, 1.0903946190410352e-07, 7.237420049932553e-07, 4.253757651895285e-06, 2.5013401682372205e-05, 0.00014615175314247608, 0.0007018009782768786, 0.006607615854591131 ], "activation_means": [ 0.004745648708194494, -8.492707274854183e-05, 0.0009839760605245829, 0.00041824899381026626, -3.091913094976917e-05, -3.619983544922434e-06, -4.25032339990139e-06, -1.3652680536324624e-05, -4.909698327537626e-05, -0.0001918129128171131, -0.0005527863977476954, -0.0006557117449119687, -0.0008800626383163035, -0.0013834433630108833, -0.0029748189263045788, -0.0030720613431185484, -0.0037732573691755533, -0.0025511696003377438, -0.003509903559461236, -0.001648824429139495 ], "activation_stds": [ 0.17953188717365265, 0.04192492738366127, 0.00943511351943016, 0.0022596188355237246, 0.0005189738003537059, 0.00011896424257429317, 2.454679270158522e-05, 1.9420922399149276e-05, 8.438384247710928e-05, 0.0003417829575482756, 0.0005783369415439665, 0.00275764730758965, 0.004125435370951891, 0.005355025641620159, 0.004256225656718016, 0.004457178059965372, 0.003838905831798911, 0.007069852203130722, 0.007913228124380112, 0.010841521434485912 ] }, "res_mlp": { "final_loss": 0.0629926323890686, "initial_loss": 13.825733184814453, "loss_history": [ 13.825733184814453, 6.929441928863525, 4.056141376495361, 2.5541770458221436, 1.7824699878692627, 1.416144847869873, 1.1620547771453857, 0.9483454823493958, 0.7843164801597595, 0.697279155254364, 0.6453202366828918, 0.5573611855506897, 0.5255516171455383, 0.5015190839767456, 0.45454704761505127, 0.41940945386886597, 0.3973648250102997, 0.3921322822570801, 0.3478171229362488, 0.3742031157016754, 0.33467820286750793, 0.33183223009109497, 0.3318370580673218, 0.3350353240966797, 0.3137206435203552, 0.30033594369888306, 0.3070758879184723, 0.28984498977661133, 0.2851133346557617, 0.26924896240234375, 0.2905859053134918, 0.279144287109375, 0.2638997435569763, 0.2549370229244232, 0.24959874153137207, 0.2570638060569763, 0.2534509301185608, 0.25506916642189026, 0.24851691722869873, 0.23037107288837433, 0.24302563071250916, 0.22616733610630035, 0.24495507776737213, 0.23248232901096344, 0.24176537990570068, 0.2333303689956665, 0.22003066539764404, 0.245095893740654, 0.23193161189556122, 0.2175668478012085, 0.22363689541816711, 0.2120945304632187, 0.23737770318984985, 0.23073768615722656, 0.21438273787498474, 0.21955570578575134, 0.21080368757247925, 0.21987861394882202, 0.21614855527877808, 0.21649761497974396, 0.19733500480651855, 0.2083965241909027, 0.20562587678432465, 0.22173789143562317, 0.2059110701084137, 0.1945328712463379, 0.21662525832653046, 0.19985336065292358, 0.19271507859230042, 0.21030962467193604, 0.2069288194179535, 0.1867353767156601, 0.20643624663352966, 0.1913994699716568, 0.18623554706573486, 0.20420098304748535, 0.20161986351013184, 0.1983901560306549, 0.19239430129528046, 0.1878090798854828, 0.19025678932666779, 0.1812693476676941, 0.19511495530605316, 0.19247011840343475, 0.19345001876354218, 0.18723750114440918, 0.17752212285995483, 0.18105608224868774, 0.17320361733436584, 0.17695054411888123, 0.18027013540267944, 0.18198877573013306, 0.1806012988090515, 0.17939069867134094, 0.19096586108207703, 0.17916780710220337, 0.1848122477531433, 0.16437095403671265, 0.1749158799648285, 0.1854352056980133, 0.17872394621372223, 0.1727355718612671, 0.17188334465026855, 0.17672254145145416, 0.17822399735450745, 0.17592446506023407, 0.1675281673669815, 0.16010603308677673, 0.17089994251728058, 0.1601085066795349, 0.1725354939699173, 0.1574731022119522, 0.1598099172115326, 0.16953468322753906, 0.16409367322921753, 0.1595795452594757, 0.15564961731433868, 0.1542244702577591, 0.14573857188224792, 0.1609399914741516, 0.15494975447654724, 0.15977708995342255, 0.1628899723291397, 0.15699170529842377, 0.15361425280570984, 0.16150403022766113, 0.15789395570755005, 0.13668665289878845, 0.16025841236114502, 0.14876320958137512, 0.15640254318714142, 0.13928887248039246, 0.15516719222068787, 0.14445939660072327, 0.1433185338973999, 0.14129473268985748, 0.14215922355651855, 0.15496797859668732, 0.1444668471813202, 0.14378705620765686, 0.14088605344295502, 0.12942053377628326, 0.1482706069946289, 0.13720951974391937, 0.1429380476474762, 0.14560531079769135, 0.13521014153957367, 0.1422801911830902, 0.1372361034154892, 0.13660244643688202, 0.13699297606945038, 0.14125284552574158, 0.1396929919719696, 0.13849902153015137, 0.14266842603683472, 0.14512087404727936, 0.14272311329841614, 0.13200822472572327, 0.12528839707374573, 0.1362495869398117, 0.13740304112434387, 0.14004293084144592, 0.13355493545532227, 0.13202989101409912, 0.12513452768325806, 0.12320512533187866, 0.13207478821277618, 0.12799137830734253, 0.12362419813871384, 0.13280878961086273, 0.1266961544752121, 0.13285361230373383, 0.13146376609802246, 0.1272575557231903, 0.13236036896705627, 0.13058793544769287, 0.12451867759227753, 0.13076333701610565, 0.12537893652915955, 0.1286524534225464, 0.1255934089422226, 0.12199349701404572, 0.12703447043895721, 0.1205616444349289, 0.11638176441192627, 0.12982048094272614, 0.11749467253684998, 0.11279859393835068, 0.11866491287946701, 0.12125350534915924, 0.12608124315738678, 0.1334507018327713, 0.12499742209911346, 0.12244322896003723, 0.12269967049360275, 0.11862020939588547, 0.11574031412601471, 0.12193585187196732, 0.11600715667009354, 0.11964584141969681, 0.1162799671292305, 0.12539267539978027, 0.12248687446117401, 0.12070564925670624, 0.13140465319156647, 0.11640572547912598, 0.10755001753568649, 0.11134535819292068, 0.11165831983089447, 0.1097593680024147, 0.11635623127222061, 0.11294370889663696, 0.10825790464878082, 0.10611490160226822, 0.11304027587175369, 0.11056037247180939, 0.11218193918466568, 0.11071833968162537, 0.11433180421590805, 0.1080167144536972, 0.11098785698413849, 0.10625725984573364, 0.10640506446361542, 0.11352894455194473, 0.11021991074085236, 0.10663466155529022, 0.10082174837589264, 0.10823401808738708, 0.10888013243675232, 0.10405822843313217, 0.10449113696813583, 0.10663808882236481, 0.1043190062046051, 0.10356761515140533, 0.10988475382328033, 0.11231439560651779, 0.10832472890615463, 0.10288867354393005, 0.11017926037311554, 0.10268239676952362, 0.10657632350921631, 0.10329350084066391, 0.11197119206190109, 0.10926295816898346, 0.1126130148768425, 0.0998806282877922, 0.10103444755077362, 0.09290547668933868, 0.09799303859472275, 0.10779248923063278, 0.10077505558729172, 0.10144646465778351, 0.10275106132030487, 0.09488312900066376, 0.0980915054678917, 0.09439370036125183, 0.10310178995132446, 0.09724435210227966, 0.10685458779335022, 0.09623485803604126, 0.09753909707069397, 0.08963052928447723, 0.09267763793468475, 0.09678862243890762, 0.09540798515081406, 0.08834634721279144, 0.09270018339157104, 0.08261752128601074, 0.1081431582570076, 0.10075649619102478, 0.09674010425806046, 0.09663750231266022, 0.09644731134176254, 0.09872166812419891, 0.09951721131801605, 0.088838130235672, 0.09299120306968689, 0.09496532380580902, 0.09245827049016953, 0.09697187691926956, 0.0878148078918457, 0.09270801395177841, 0.09216231107711792, 0.09456576406955719, 0.08298899233341217, 0.09676386415958405, 0.09488122165203094, 0.09232550114393234, 0.09934262931346893, 0.08506445586681366, 0.09329526126384735, 0.09657076001167297, 0.09500131011009216, 0.09519918262958527, 0.09765627980232239, 0.08732327073812485, 0.09301722794771194, 0.09543395042419434, 0.0873609408736229, 0.08974739909172058, 0.08848126232624054, 0.08153203874826431, 0.0837751030921936, 0.09327980130910873, 0.0937112420797348, 0.0836104080080986, 0.08840271085500717, 0.08613412827253342, 0.08702630549669266, 0.0854334831237793, 0.07984519749879837, 0.0783020630478859, 0.0839296504855156, 0.0771411657333374, 0.07718954980373383, 0.08612189441919327, 0.08547349274158478, 0.08440866321325302, 0.0895165503025055, 0.08419612795114517, 0.08556754887104034, 0.08557409048080444, 0.08724275976419449, 0.08401279896497726, 0.08161499351263046, 0.08135215193033218, 0.07858897000551224, 0.07412116229534149, 0.09073606133460999, 0.0884423777461052, 0.07584438472986221, 0.08999098837375641, 0.07833802700042725, 0.08642788231372833, 0.08511155843734741, 0.07571419328451157, 0.07647889107465744, 0.0770968496799469, 0.07909407466650009, 0.07677635550498962, 0.07856360077857971, 0.07897358387708664, 0.08044072985649109, 0.0849953144788742, 0.07985112816095352, 0.07496027648448944, 0.0693586990237236, 0.077299565076828, 0.07525025308132172, 0.07886706292629242, 0.07089076936244965, 0.0745236724615097, 0.08298902958631516, 0.07434681057929993, 0.07863019406795502, 0.07296870648860931, 0.07038713246583939, 0.07498430460691452, 0.0731402337551117, 0.0791475921869278, 0.07742582261562347, 0.07537870854139328, 0.0774664357304573, 0.08444315940141678, 0.06756129115819931, 0.07491543889045715, 0.07476594299077988, 0.07262799143791199, 0.0735686719417572, 0.07892186939716339, 0.07961255311965942, 0.07117517292499542, 0.06840993463993073, 0.06531023234128952, 0.07480637729167938, 0.07482421398162842, 0.07697328180074692, 0.08092670142650604, 0.07015441358089447, 0.07309134304523468, 0.07465572655200958, 0.07515942305326462, 0.07721630483865738, 0.07378698885440826, 0.0710270032286644, 0.07413457334041595, 0.07682187855243683, 0.08446906507015228, 0.07762646675109863, 0.07136626541614532, 0.07230590283870697, 0.07373417913913727, 0.07236794382333755, 0.07123907655477524, 0.08195364475250244, 0.06900320947170258, 0.060011740773916245, 0.07414371520280838, 0.07555309683084488, 0.0644296333193779, 0.07632855325937271, 0.07520553469657898, 0.07276205718517303, 0.07271023094654083, 0.06531301140785217, 0.07297447323799133, 0.07270961999893188, 0.07454632222652435, 0.07276542484760284, 0.06848332285881042, 0.06939152628183365, 0.07199845463037491, 0.07199475914239883, 0.06776336580514908, 0.06876210123300552, 0.07206545770168304, 0.06846804171800613, 0.07429170608520508, 0.0714627206325531, 0.07270139455795288, 0.07588784396648407, 0.07119745016098022, 0.06948363780975342, 0.06362828612327576, 0.07597526907920837, 0.07059313356876373, 0.0640760287642479, 0.06041271239519119, 0.062441278249025345, 0.06848485767841339, 0.06524111330509186, 0.06564353406429291, 0.07082067430019379, 0.0676988959312439, 0.07045383006334305, 0.06132864952087402, 0.0676838830113411, 0.064274862408638, 0.06932217627763748, 0.06570309400558472, 0.06711895018815994, 0.06243062764406204, 0.0685991570353508, 0.0648145079612732, 0.0689878761768341, 0.06215982884168625, 0.06463310867547989, 0.055849675089120865, 0.06691331416368484, 0.06402920186519623, 0.05663374066352844, 0.056076034903526306, 0.06126324087381363, 0.06105763465166092, 0.06767643988132477, 0.06495392322540283, 0.06322813034057617, 0.0626610741019249, 0.05588827282190323, 0.06816142797470093, 0.06441586464643478, 0.06622708588838577, 0.053589969873428345, 0.06528521329164505, 0.06313464045524597, 0.06422115117311478, 0.06403939425945282, 0.05603744089603424, 0.06019753962755203, 0.05978955700993538, 0.058692023158073425, 0.05943136289715767, 0.06272856146097183, 0.06204809248447418, 0.05660928413271904, 0.06422382593154907, 0.06855501979589462, 0.05489934980869293, 0.05919265374541283, 0.06072087585926056, 0.06199055165052414, 0.06212494522333145, 0.0532408244907856, 0.05850880220532417, 0.06332679837942123, 0.06473984569311142, 0.05620533972978592, 0.056782789528369904, 0.05984317883849144, 0.06279785186052322, 0.0680396780371666, 0.06252189725637436, 0.057086262851953506, 0.057004414498806, 0.05471985042095184, 0.05932869017124176, 0.06498537957668304, 0.06377285718917847, 0.06319063156843185, 0.0629926323890686 ], "gradient_norms": [ 0.0037803652230650187, 0.003631350351497531, 0.003396314335986972, 0.00274146581068635, 0.002563745016232133, 0.0024723419919610023, 0.002229638397693634, 0.002245080890133977, 0.002163525903597474, 0.0021340805105865, 0.0021220329217612743, 0.0020243690814822912, 0.0020883611869066954, 0.001933824154548347, 0.0018491963855922222, 0.0018003054428845644, 0.0017963032005354762, 0.0018056104891002178, 0.0018354005878791213, 0.0019063110230490565 ], "activation_means": [ -0.02131376974284649, -0.03601019084453583, -0.06662210822105408, -0.12518449127674103, -0.1524452120065689, -0.1727040708065033, -0.1917235553264618, -0.20844022929668427, -0.21533320844173431, -0.2277694195508957, -0.2347966730594635, -0.2530445158481598, -0.2579406797885895, -0.2545703649520874, -0.2688005566596985, -0.2855533957481384, -0.28739380836486816, -0.3037635087966919, -0.29087066650390625, -0.3035217523574829 ], "activation_stds": [ 0.13483233749866486, 0.1384648084640503, 0.14461848139762878, 0.14690853655338287, 0.155510812997818, 0.15813173353672028, 0.1588330715894699, 0.16174864768981934, 0.1593213975429535, 0.16204914450645447, 0.16705702245235443, 0.16576600074768066, 0.16414247453212738, 0.16486325860023499, 0.16198192536830902, 0.16898144781589508, 0.16573883593082428, 0.17666961252689362, 0.16819415986537933, 0.17242176830768585 ] }, "summary": { "loss_improvement": 5.2903937299934425, "plain_grad_range": [ 8.64771111707464e-19, 0.006607615854591131 ], "res_grad_range": [ 0.0017963032005354762, 0.0037803652230650187 ], "plain_std_range": [ 1.9420922399149276e-05, 0.17953188717365265 ], "res_std_range": [ 0.13483233749866486, 0.17666961252689362 ] } }