pawn-large / eval_results.json
thomas-schweich's picture
Update eval_results.json: probes + diagnostics from run_evals_backbone.py
d4d59e8 verified
{
"probes": {
"piece_type": {
"embed": {
"accuracy": 0.7939091920852661,
"loss": 0.8472670912742615,
"best_accuracy": 0.7941357493400574,
"n_train": 721682,
"n_val": 175570
},
"layer_0": {
"accuracy": 0.8584218621253967,
"loss": 0.5010583996772766,
"best_accuracy": 0.8584218621253967,
"n_train": 721682,
"n_val": 175570
},
"layer_1": {
"accuracy": 0.8969361186027527,
"loss": 0.3314746618270874,
"best_accuracy": 0.8969361186027527,
"n_train": 721682,
"n_val": 175570
},
"layer_2": {
"accuracy": 0.8990692496299744,
"loss": 0.3412594795227051,
"best_accuracy": 0.899357259273529,
"n_train": 721682,
"n_val": 175570
},
"layer_3": {
"accuracy": 0.9047186374664307,
"loss": 0.3327542543411255,
"best_accuracy": 0.9050514698028564,
"n_train": 721682,
"n_val": 175570
},
"layer_4": {
"accuracy": 0.8893006443977356,
"loss": 0.4131677746772766,
"best_accuracy": 0.8896953463554382,
"n_train": 721682,
"n_val": 175570
},
"layer_5": {
"accuracy": 0.880700409412384,
"loss": 0.4933600425720215,
"best_accuracy": 0.8813146948814392,
"n_train": 721682,
"n_val": 175570
},
"layer_6": {
"accuracy": 0.9210814237594604,
"loss": 0.40963974595069885,
"best_accuracy": 0.9215326905250549,
"n_train": 721682,
"n_val": 175570
},
"layer_7": {
"accuracy": 0.9027824401855469,
"loss": 0.7754349112510681,
"best_accuracy": 0.9038282036781311,
"n_train": 721682,
"n_val": 175570
},
"layer_8": {
"accuracy": 0.8876509070396423,
"loss": 1.2176413536071777,
"best_accuracy": 0.8893979787826538,
"n_train": 721682,
"n_val": 175570
},
"layer_9": {
"accuracy": 0.8824625611305237,
"loss": 1.4016369581222534,
"best_accuracy": 0.882866382598877,
"n_train": 721682,
"n_val": 175570
}
},
"side_to_move": {
"embed": {
"accuracy": 0.6732072234153748,
"loss": 0.6171705722808838,
"best_accuracy": 0.6740502119064331,
"n_train": 721682,
"n_val": 175570
},
"layer_0": {
"accuracy": 0.7220026254653931,
"loss": 0.4948278069496155,
"best_accuracy": 0.7239847183227539,
"n_train": 721682,
"n_val": 175570
},
"layer_1": {
"accuracy": 0.9605740904808044,
"loss": 0.09634365886449814,
"best_accuracy": 0.961844265460968,
"n_train": 721682,
"n_val": 175570
},
"layer_2": {
"accuracy": 1.0,
"loss": 8.253664973700836e-10,
"best_accuracy": 1.0,
"n_train": 721682,
"n_val": 175570
},
"layer_3": {
"accuracy": 1.0,
"loss": 6.287341203226049e-10,
"best_accuracy": 1.0,
"n_train": 721682,
"n_val": 175570
},
"layer_4": {
"accuracy": 1.0,
"loss": 6.716473488488361e-10,
"best_accuracy": 1.0,
"n_train": 721682,
"n_val": 175570
},
"layer_5": {
"accuracy": 1.0,
"loss": 1.2762426671031335e-09,
"best_accuracy": 1.0,
"n_train": 721682,
"n_val": 175570
},
"layer_6": {
"accuracy": 1.0,
"loss": 5.45468559209894e-10,
"best_accuracy": 1.0,
"n_train": 721682,
"n_val": 175570
},
"layer_7": {
"accuracy": 0.9999828934669495,
"loss": 0.00010745319741545245,
"best_accuracy": 1.0,
"n_train": 721682,
"n_val": 175570
},
"layer_8": {
"accuracy": 0.9996981024742126,
"loss": 0.0033487507607787848,
"best_accuracy": 0.9999088644981384,
"n_train": 721682,
"n_val": 175570
},
"layer_9": {
"accuracy": 0.9991057515144348,
"loss": 0.009687423706054688,
"best_accuracy": 0.9992538094520569,
"n_train": 721682,
"n_val": 175570
}
},
"is_check": {
"embed": {
"accuracy": 0.9438571333885193,
"loss": 0.21294333040714264,
"best_accuracy": 0.9438571333885193,
"n_train": 721682,
"n_val": 175570
},
"layer_0": {
"accuracy": 0.9441874623298645,
"loss": 0.1809319406747818,
"best_accuracy": 0.9443128108978271,
"n_train": 721682,
"n_val": 175570
},
"layer_1": {
"accuracy": 0.9485731720924377,
"loss": 0.14133617281913757,
"best_accuracy": 0.9487554430961609,
"n_train": 721682,
"n_val": 175570
},
"layer_2": {
"accuracy": 0.9493591785430908,
"loss": 0.13567784428596497,
"best_accuracy": 0.9493762850761414,
"n_train": 721682,
"n_val": 175570
},
"layer_3": {
"accuracy": 0.9487839341163635,
"loss": 0.1368710696697235,
"best_accuracy": 0.9492053985595703,
"n_train": 721682,
"n_val": 175570
},
"layer_4": {
"accuracy": 0.9479295611381531,
"loss": 0.14160196483135223,
"best_accuracy": 0.948151707649231,
"n_train": 721682,
"n_val": 175570
},
"layer_5": {
"accuracy": 0.9476220011711121,
"loss": 0.1432962566614151,
"best_accuracy": 0.9476220011711121,
"n_train": 721682,
"n_val": 175570
},
"layer_6": {
"accuracy": 0.944808304309845,
"loss": 0.17508020997047424,
"best_accuracy": 0.944808304309845,
"n_train": 721682,
"n_val": 175570
},
"layer_7": {
"accuracy": 0.9431394934654236,
"loss": 0.25748082995414734,
"best_accuracy": 0.9432191848754883,
"n_train": 721682,
"n_val": 175570
},
"layer_8": {
"accuracy": 0.9395625591278076,
"loss": 0.2536097466945648,
"best_accuracy": 0.9421996474266052,
"n_train": 721682,
"n_val": 175570
},
"layer_9": {
"accuracy": 0.9344534873962402,
"loss": 0.26875537633895874,
"best_accuracy": 0.9357520937919617,
"n_train": 721682,
"n_val": 175570
}
},
"castling_rights": {
"embed": {
"accuracy": 0.9315600395202637,
"loss": 0.22553254663944244,
"best_accuracy": 0.9315600395202637,
"n_train": 721682,
"n_val": 175570
},
"layer_0": {
"accuracy": 0.9855071902275085,
"loss": 0.03773088380694389,
"best_accuracy": 0.9855071902275085,
"n_train": 721682,
"n_val": 175570
},
"layer_1": {
"accuracy": 0.9911075234413147,
"loss": 0.02256346307694912,
"best_accuracy": 0.9911530613899231,
"n_train": 721682,
"n_val": 175570
},
"layer_2": {
"accuracy": 0.9926382303237915,
"loss": 0.01890489086508751,
"best_accuracy": 0.9926382303237915,
"n_train": 721682,
"n_val": 175570
},
"layer_3": {
"accuracy": 0.9909864664077759,
"loss": 0.022842826321721077,
"best_accuracy": 0.9910178184509277,
"n_train": 721682,
"n_val": 175570
},
"layer_4": {
"accuracy": 0.988813579082489,
"loss": 0.02855219691991806,
"best_accuracy": 0.9891766309738159,
"n_train": 721682,
"n_val": 175570
},
"layer_5": {
"accuracy": 0.9838810563087463,
"loss": 0.045494332909584045,
"best_accuracy": 0.9846343398094177,
"n_train": 721682,
"n_val": 175570
},
"layer_6": {
"accuracy": 0.9806857705116272,
"loss": 0.06296253949403763,
"best_accuracy": 0.982375979423523,
"n_train": 721682,
"n_val": 175570
},
"layer_7": {
"accuracy": 0.9754456877708435,
"loss": 0.11236727982759476,
"best_accuracy": 0.9768695831298828,
"n_train": 721682,
"n_val": 175570
},
"layer_8": {
"accuracy": 0.9711496829986572,
"loss": 0.17760467529296875,
"best_accuracy": 0.974750816822052,
"n_train": 721682,
"n_val": 175570
},
"layer_9": {
"accuracy": 0.9697043895721436,
"loss": 0.19383224844932556,
"best_accuracy": 0.9723358154296875,
"n_train": 721682,
"n_val": 175570
}
},
"ep_square": {
"embed": {
"accuracy": 0.9986899495124817,
"loss": 0.013800214976072311,
"best_accuracy": 0.9986899495124817,
"n_train": 721682,
"n_val": 175570
},
"layer_0": {
"accuracy": 0.9986159205436707,
"loss": 0.012510580942034721,
"best_accuracy": 0.9986899495124817,
"n_train": 721682,
"n_val": 175570
},
"layer_1": {
"accuracy": 0.998108983039856,
"loss": 0.010417615063488483,
"best_accuracy": 0.9986330270767212,
"n_train": 721682,
"n_val": 175570
},
"layer_2": {
"accuracy": 0.998046338558197,
"loss": 0.010189741849899292,
"best_accuracy": 0.9986728429794312,
"n_train": 721682,
"n_val": 175570
},
"layer_3": {
"accuracy": 0.9979495406150818,
"loss": 0.012118544429540634,
"best_accuracy": 0.9986273050308228,
"n_train": 721682,
"n_val": 175570
},
"layer_4": {
"accuracy": 0.9980008006095886,
"loss": 0.01685471087694168,
"best_accuracy": 0.9985817670822144,
"n_train": 721682,
"n_val": 175570
},
"layer_5": {
"accuracy": 0.9977273941040039,
"loss": 0.028079260140657425,
"best_accuracy": 0.9986215829849243,
"n_train": 721682,
"n_val": 175570
},
"layer_6": {
"accuracy": 0.9979210495948792,
"loss": 0.03867955505847931,
"best_accuracy": 0.9983652830123901,
"n_train": 721682,
"n_val": 175570
},
"layer_7": {
"accuracy": 0.9975849986076355,
"loss": 0.10313411056995392,
"best_accuracy": 0.9981203675270081,
"n_train": 721682,
"n_val": 175570
},
"layer_8": {
"accuracy": 0.9963546991348267,
"loss": 0.21344517171382904,
"best_accuracy": 0.9979438185691833,
"n_train": 721682,
"n_val": 175570
},
"layer_9": {
"accuracy": 0.9965882301330566,
"loss": 0.22015728056430817,
"best_accuracy": 0.9977501630783081,
"n_train": 721682,
"n_val": 175570
}
},
"material_count": {
"embed": {
"accuracy": 0.034327805042266846,
"loss": 1.8258986473083496,
"best_accuracy": 0.03609764575958252,
"n_train": 721682,
"n_val": 175570,
"mae": 9.544092178344727
},
"layer_0": {
"accuracy": 0.7317658066749573,
"loss": 0.5071787238121033,
"best_accuracy": 0.7317658066749573,
"n_train": 721682,
"n_val": 175570,
"mae": 5.244152545928955
},
"layer_1": {
"accuracy": 0.780502438545227,
"loss": 0.41502711176872253,
"best_accuracy": 0.7962906956672668,
"n_train": 721682,
"n_val": 175570,
"mae": 4.5925798416137695
},
"layer_2": {
"accuracy": 0.7607181668281555,
"loss": 0.4524354040622711,
"best_accuracy": 0.8052656054496765,
"n_train": 721682,
"n_val": 175570,
"mae": 4.535926818847656
},
"layer_3": {
"accuracy": 0.7762702703475952,
"loss": 0.42302945256233215,
"best_accuracy": 0.8083702325820923,
"n_train": 721682,
"n_val": 175570,
"mae": 4.42749547958374
},
"layer_4": {
"accuracy": 0.8069994449615479,
"loss": 0.36492645740509033,
"best_accuracy": 0.8194270730018616,
"n_train": 721682,
"n_val": 175570,
"mae": 4.263128757476807
},
"layer_5": {
"accuracy": 0.7869163155555725,
"loss": 0.40289977192878723,
"best_accuracy": 0.8261392116546631,
"n_train": 721682,
"n_val": 175570,
"mae": 4.211562633514404
},
"layer_6": {
"accuracy": 0.8064406514167786,
"loss": 0.3659830689430237,
"best_accuracy": 0.8205206990242004,
"n_train": 721682,
"n_val": 175570,
"mae": 4.238579750061035
},
"layer_7": {
"accuracy": 0.7785987854003906,
"loss": 0.4186266362667084,
"best_accuracy": 0.7785987854003906,
"n_train": 721682,
"n_val": 175570,
"mae": 4.764143943786621
},
"layer_8": {
"accuracy": 0.7711775302886963,
"loss": 0.432658851146698,
"best_accuracy": 0.7916219234466553,
"n_train": 721682,
"n_val": 175570,
"mae": 4.6962127685546875
},
"layer_9": {
"accuracy": 0.7523713111877441,
"loss": 0.4682177007198334,
"best_accuracy": 0.7643752098083496,
"n_train": 721682,
"n_val": 175570,
"mae": 4.852465629577637
}
},
"legal_move_count": {
"embed": {
"accuracy": 0.008551836013793945,
"loss": 149.90232849121094,
"best_accuracy": 0.00967252254486084,
"n_train": 721682,
"n_val": 175570,
"mae": 10.170339584350586
},
"layer_0": {
"accuracy": 0.42677438259124756,
"loss": 86.66902923583984,
"best_accuracy": 0.42758744955062866,
"n_train": 721682,
"n_val": 175570,
"mae": 7.217822551727295
},
"layer_1": {
"accuracy": 0.5159808397293091,
"loss": 73.18143463134766,
"best_accuracy": 0.5186024904251099,
"n_train": 721682,
"n_val": 175570,
"mae": 6.5129475593566895
},
"layer_2": {
"accuracy": 0.5453975200653076,
"loss": 68.73377227783203,
"best_accuracy": 0.5513627529144287,
"n_train": 721682,
"n_val": 175570,
"mae": 6.314815044403076
},
"layer_3": {
"accuracy": 0.5522029399871826,
"loss": 67.70482635498047,
"best_accuracy": 0.5584312081336975,
"n_train": 721682,
"n_val": 175570,
"mae": 6.2772135734558105
},
"layer_4": {
"accuracy": 0.5962252616882324,
"loss": 61.04885482788086,
"best_accuracy": 0.6002550721168518,
"n_train": 721682,
"n_val": 175570,
"mae": 5.927462100982666
},
"layer_5": {
"accuracy": 0.6950937509536743,
"loss": 46.100406646728516,
"best_accuracy": 0.6984593868255615,
"n_train": 721682,
"n_val": 175570,
"mae": 4.973029613494873
},
"layer_6": {
"accuracy": 0.6701233386993408,
"loss": 49.87580490112305,
"best_accuracy": 0.671006441116333,
"n_train": 721682,
"n_val": 175570,
"mae": 5.098058700561523
},
"layer_7": {
"accuracy": 0.5772729516029358,
"loss": 63.91435241699219,
"best_accuracy": 0.5772729516029358,
"n_train": 721682,
"n_val": 175570,
"mae": 5.923587799072266
},
"layer_8": {
"accuracy": 0.4951440691947937,
"loss": 76.33185577392578,
"best_accuracy": 0.4955073595046997,
"n_train": 721682,
"n_val": 175570,
"mae": 6.592904567718506
},
"layer_9": {
"accuracy": 0.43509548902511597,
"loss": 85.41091918945312,
"best_accuracy": 0.43509548902511597,
"n_train": 721682,
"n_val": 175570,
"mae": 7.040292739868164
}
},
"halfmove_clock": {
"embed": {
"accuracy": 0.005269646644592285,
"loss": 492.5446472167969,
"best_accuracy": 0.006241798400878906,
"n_train": 721682,
"n_val": 175570,
"mae": 14.495548248291016
},
"layer_0": {
"accuracy": 0.23201096057891846,
"loss": 380.2727966308594,
"best_accuracy": 0.23492592573165894,
"n_train": 721682,
"n_val": 175570,
"mae": 12.618289947509766
},
"layer_1": {
"accuracy": 0.3992686867713928,
"loss": 297.4544677734375,
"best_accuracy": 0.4043084979057312,
"n_train": 721682,
"n_val": 175570,
"mae": 10.975420951843262
},
"layer_2": {
"accuracy": 0.44090986251831055,
"loss": 276.8357238769531,
"best_accuracy": 0.444333553314209,
"n_train": 721682,
"n_val": 175570,
"mae": 10.860907554626465
},
"layer_3": {
"accuracy": 0.44483447074890137,
"loss": 274.8923645019531,
"best_accuracy": 0.448239803314209,
"n_train": 721682,
"n_val": 175570,
"mae": 10.831574440002441
},
"layer_4": {
"accuracy": 0.45950162410736084,
"loss": 267.6298828125,
"best_accuracy": 0.461600124835968,
"n_train": 721682,
"n_val": 175570,
"mae": 10.813135147094727
},
"layer_5": {
"accuracy": 0.44642406702041626,
"loss": 274.10528564453125,
"best_accuracy": 0.4507647156715393,
"n_train": 721682,
"n_val": 175570,
"mae": 10.989632606506348
},
"layer_6": {
"accuracy": 0.4215254783630371,
"loss": 286.4339599609375,
"best_accuracy": 0.43081116676330566,
"n_train": 721682,
"n_val": 175570,
"mae": 11.366626739501953
},
"layer_7": {
"accuracy": 0.36997300386428833,
"loss": 311.9603576660156,
"best_accuracy": 0.38234829902648926,
"n_train": 721682,
"n_val": 175570,
"mae": 11.935108184814453
},
"layer_8": {
"accuracy": 0.2972959876060486,
"loss": 347.9466552734375,
"best_accuracy": 0.3137670159339905,
"n_train": 721682,
"n_val": 175570,
"mae": 12.634200096130371
},
"layer_9": {
"accuracy": 0.28172940015792847,
"loss": 355.6545104980469,
"best_accuracy": 0.2964925765991211,
"n_train": 721682,
"n_val": 175570,
"mae": 12.718144416809082
}
},
"game_phase": {
"embed": {
"accuracy": 0.622520923614502,
"loss": 0.7951775789260864,
"best_accuracy": 0.6233524680137634,
"n_train": 721682,
"n_val": 175570
},
"layer_0": {
"accuracy": 0.9111408591270447,
"loss": 0.2095094472169876,
"best_accuracy": 0.9122514724731445,
"n_train": 721682,
"n_val": 175570
},
"layer_1": {
"accuracy": 0.942951500415802,
"loss": 0.1343085765838623,
"best_accuracy": 0.942951500415802,
"n_train": 721682,
"n_val": 175570
},
"layer_2": {
"accuracy": 0.9462664127349854,
"loss": 0.12671424448490143,
"best_accuracy": 0.948556125164032,
"n_train": 721682,
"n_val": 175570
},
"layer_3": {
"accuracy": 0.9471606612205505,
"loss": 0.1280113309621811,
"best_accuracy": 0.9481688141822815,
"n_train": 721682,
"n_val": 175570
},
"layer_4": {
"accuracy": 0.9526114463806152,
"loss": 0.11747458577156067,
"best_accuracy": 0.9545822143554688,
"n_train": 721682,
"n_val": 175570
},
"layer_5": {
"accuracy": 0.9567978382110596,
"loss": 0.12346646189689636,
"best_accuracy": 0.9587002396583557,
"n_train": 721682,
"n_val": 175570
},
"layer_6": {
"accuracy": 0.9504015445709229,
"loss": 0.18384838104248047,
"best_accuracy": 0.9527937173843384,
"n_train": 721682,
"n_val": 175570
},
"layer_7": {
"accuracy": 0.9318960905075073,
"loss": 0.41541701555252075,
"best_accuracy": 0.9381386041641235,
"n_train": 721682,
"n_val": 175570
},
"layer_8": {
"accuracy": 0.9232044219970703,
"loss": 0.5990833640098572,
"best_accuracy": 0.9265819787979126,
"n_train": 721682,
"n_val": 175570
},
"layer_9": {
"accuracy": 0.9152987003326416,
"loss": 0.6701229214668274,
"best_accuracy": 0.9211026430130005,
"n_train": 721682,
"n_val": 175570
}
}
},
"diagnostics": {
"in_check": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.9962050000000001,
"std_legal_rate": 0.010262941829709457,
"mean_pad_prob": 0.0002249239549312469,
"mean_entropy": 1.3413606088704197,
"std_entropy": 0.493262419918474
},
"double_check": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.980534,
"std_legal_rate": 0.04543891332327392,
"mean_pad_prob": 0.0005854707902188308,
"mean_entropy": 1.0687193951095513,
"std_entropy": 0.4351137626706165
},
"pin_restricts": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.9954330000000001,
"std_legal_rate": 0.010604362828572026,
"mean_pad_prob": 2.871860760116753e-05,
"mean_entropy": 3.1623122715773295,
"std_entropy": 0.6210740496539255
},
"ep_available": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.997568,
"std_legal_rate": 0.006321817460192919,
"mean_pad_prob": 6.7376469007357065e-06,
"mean_entropy": 3.4611863924339414,
"std_entropy": 0.3162521876686576
},
"castle_legal_k": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.9985720000000001,
"std_legal_rate": 0.003921838344450218,
"mean_pad_prob": 2.7854925776503706e-08,
"mean_entropy": 3.58268214635849,
"std_entropy": 0.18520260275989114
},
"castle_legal_q": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.9979549999999999,
"std_legal_rate": 0.00477786301603552,
"mean_pad_prob": 2.387310911296098e-08,
"mean_entropy": 3.6561634825468063,
"std_entropy": 0.18081162349268404
},
"castle_blocked_check": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.997653,
"std_legal_rate": 0.007229217869175063,
"mean_pad_prob": 0.00020219666076729442,
"mean_entropy": 1.230468960531347,
"std_entropy": 0.529049690427771
},
"promotion_available": {
"n_positions": 10000,
"terminal": false,
"mean_legal_rate": 0.99829,
"std_legal_rate": 0.005780648752519049,
"mean_pad_prob": 7.288141412029068e-07,
"mean_entropy": 3.2063094630002977,
"std_entropy": 0.47776097925070676
},
"checkmate": {
"n_positions": 10000,
"terminal": true,
"mean_legal_rate": 0.0,
"std_legal_rate": 0.0,
"mean_pad_prob": 0.9639568817602223,
"mean_entropy": 0.16463642582248514,
"std_entropy": 0.3105135865265827
},
"stalemate": {
"n_positions": 10000,
"terminal": true,
"mean_legal_rate": 0.0,
"std_legal_rate": 0.0,
"mean_pad_prob": 0.9908480515460105,
"mean_entropy": 0.03235361842690004,
"std_entropy": 0.14407793276502484
}
}
}