mol-checkpoints / 063_sparse_dispatch /shared_softmax_1plus2of5_results.json
iternovtsii's picture
Upload all experiment best checkpoints and results
71de729 verified
{
"label": "shared_softmax_1plus2of5",
"seed": 42,
"attention_mode": "sparse",
"routed_attn_type": "softmax",
"top_k_blocks": 2,
"num_thin_blocks": 5,
"num_shared_blocks": 1,
"total_params": 85293056,
"steps": [
25050,
25100,
25150,
25200,
25250,
25300,
25350,
25400,
25450,
25500,
25550,
25600,
25650,
25700,
25750,
25800,
25850,
25900,
25950,
26000,
26050,
26100,
26150,
26200,
26250,
26300,
26350,
26400,
26450,
26500,
26550,
26600,
26650,
26700,
26750,
26800,
26850,
26900,
26950,
27000,
27050,
27100,
27150,
27200,
27250,
27300,
27350,
27400,
27450,
27500,
27550,
27600,
27650,
27700,
27750,
27800,
27850,
27900,
27950,
28000,
28050,
28100,
28150,
28200,
28250,
28300,
28350,
28400,
28450,
28500,
28550,
28600,
28650,
28700,
28750,
28800,
28850,
28900,
28950,
29000,
29050,
29100,
29150,
29200,
29250,
29300,
29350,
29400,
29450,
29500,
29550,
29600,
29650,
29700,
29750,
29800,
29850,
29900,
29950,
30000,
30050,
30100,
30150,
30200,
30250,
30300,
30350,
30400,
30450,
30500,
30550,
30600,
30650,
30700,
30750,
30800,
30850,
30900,
30950,
31000,
31050,
31100,
31150,
31200,
31250,
31300,
31350,
31400,
31450,
31500,
31550,
31600,
31650,
31700,
31750,
31800,
31850,
31900,
31950,
32000,
32050,
32100,
32150,
32200,
32250,
32300,
32350,
32400,
32450,
32500,
32550,
32600,
32650,
32700,
32750,
32800,
32850,
32900,
32950,
33000,
33050,
33100,
33150,
33200,
33250,
33300,
33350,
33400,
33450,
33500,
33550,
33600,
33650,
33700,
33750,
33800,
33850,
33900,
33950,
34000,
34050,
34100,
34150,
34200,
34250,
34300,
34350,
34400,
34450,
34500,
34550,
34600,
34650,
34700,
34750,
34800,
34850,
34900,
34950,
35000,
35050,
35100,
35150,
35200,
35250,
35300,
35350,
35400,
35450,
35500,
35550,
35600,
35650,
35700,
35750,
35800,
35850,
35900,
35950,
36000,
36050,
36100,
36150,
36200,
36250,
36300,
36350,
36400,
36450,
36500,
36550,
36600,
36650,
36700,
36750,
36800,
36850,
36900,
36950,
37000,
37050,
37100,
37150,
37200,
37250,
37300,
37350,
37400,
37450,
37500,
37550,
37600,
37650,
37700,
37750,
37800,
37850,
37900,
37950,
38000,
38050,
38100,
38150,
38200,
38250,
38300,
38350,
38400,
38450,
38500,
38550,
38600,
38650,
38700,
38750,
38800,
38850,
38900,
38950,
39000,
39050,
39100,
39150,
39200,
39250,
39300,
39350,
39400,
39450,
39500,
39550,
39600,
39650,
39700,
39750,
39800,
39850,
39900,
39950,
40000,
40050,
40100,
40150,
40200,
40250,
40300,
40350,
40400,
40450,
40500,
40550,
40600,
40650,
40700,
40750,
40800,
40850,
40900,
40950,
41000,
41050,
41100,
41150,
41200,
41250,
41300,
41350,
41400,
41450,
41500,
41550,
41600,
41650,
41700,
41750,
41800,
41850,
41900,
41950,
42000,
42050,
42100,
42150,
42200,
42250,
42300,
42350,
42400,
42450,
42500,
42550,
42600,
42650,
42700,
42750,
42800,
42850,
42900,
42950,
43000,
43050,
43100,
43150,
43200,
43250,
43300,
43350,
43400,
43450,
43500,
43550,
43600,
43650,
43700,
43750,
43800,
43850,
43900,
43950,
44000,
44050,
44100,
44150,
44200,
44250,
44300,
44350,
44400,
44450,
44500,
44550,
44600,
44650,
44700,
44750,
44800,
44850,
44900,
44950,
45000,
45050,
45100,
45150,
45200,
45250,
45300,
45350,
45400,
45450,
45500,
45550,
45600,
45650,
45700,
45750,
45800,
45850,
45900,
45950,
46000,
46050,
46100,
46150,
46200,
46250,
46300,
46350,
46400,
46450,
46500,
46550,
46600,
46650,
46700,
46750,
46800,
46850,
46900,
46950,
47000,
47050,
47100,
47150,
47200,
47250,
47300,
47350,
47400,
47450,
47500,
47550,
47600,
47650,
47700,
47750,
47800,
47850,
47900,
47950,
48000,
48050,
48100,
48150,
48200,
48250,
48300,
48350,
48400,
48450,
48500,
48550,
48600,
48650,
48700,
48750,
48800,
48850,
48900,
48950,
49000,
49050,
49100,
49150,
49200,
49250,
49300,
49350,
49400,
49450,
49500,
49550,
49600,
49650,
49700,
49750,
49800,
49850,
49900,
49950,
50000
],
"train_loss": [
3.6083518266677856,
3.617443323135376,
3.7295976877212524,
3.712302803993225,
3.6362812519073486,
3.5672576427459717,
3.5720770359039307,
3.579551339149475,
3.652143716812134,
3.7311590909957886,
3.567141532897949,
3.611405372619629,
3.6820610761642456,
3.5733327865600586,
3.652596592903137,
3.713971257209778,
3.7153526544570923,
3.6786985397338867,
3.6578776836395264,
3.645725965499878,
3.6625555753707886,
3.650027275085449,
3.7017802000045776,
3.5309892892837524,
3.5268043279647827,
3.5971521139144897,
3.632554054260254,
3.6731910705566406,
3.741512417793274,
3.610128879547119,
3.6152586936950684,
3.785200834274292,
3.4503750801086426,
3.826375126838684,
3.6294844150543213,
3.6845279932022095,
3.607928156852722,
3.6133742332458496,
3.6545424461364746,
3.681681513786316,
3.6994056701660156,
3.6764410734176636,
3.6946834325790405,
3.5719927549362183,
3.679455518722534,
3.6010371446609497,
3.6601967811584473,
3.7879444360733032,
3.6054943799972534,
3.6594916582107544,
3.676219344139099,
3.6813101768493652,
3.642167091369629,
3.639889717102051,
3.5708905458450317,
3.696524500846863,
3.5874134302139282,
3.6461312770843506,
3.684667706489563,
3.625743269920349,
3.6690688133239746,
3.5464245080947876,
3.6885889768600464,
3.615282416343689,
3.719055652618408,
3.5640047788619995,
3.6580100059509277,
3.6788692474365234,
3.5977957248687744,
3.5949110984802246,
3.6985031366348267,
3.482587218284607,
3.5751004219055176,
3.677226424217224,
3.5593827962875366,
3.551208972930908,
3.560399889945984,
3.73794949054718,
3.7549997568130493,
3.6640018224716187,
3.51772940158844,
3.6257635354995728,
3.6517465114593506,
3.779793858528137,
3.5792198181152344,
3.5952887535095215,
3.426683783531189,
3.720228672027588,
3.6149317026138306,
3.6267229318618774,
3.4159196615219116,
3.5339343547821045,
3.5696059465408325,
3.651535987854004,
3.627320647239685,
3.7182990312576294,
3.5463268756866455,
3.589683175086975,
3.5319172143936157,
3.526711583137512,
3.5195542573928833,
3.693802833557129,
3.65901780128479,
3.5430057048797607,
3.713660478591919,
3.624492883682251,
3.626517415046692,
3.578974962234497,
3.7296053171157837,
3.5731054544448853,
3.4655762910842896,
3.6748491525650024,
3.649127721786499,
3.678430914878845,
3.617204785346985,
3.657150149345398,
3.59822678565979,
3.518629789352417,
3.585882067680359,
3.6027764081954956,
3.6480785608291626,
3.678100109100342,
3.612151622772217,
3.7235403060913086,
3.6397976875305176,
3.652080535888672,
3.5722548961639404,
3.5214706659317017,
3.5502564907073975,
3.549044966697693,
3.425241231918335,
3.6062567234039307,
3.663543224334717,
3.7452136278152466,
3.583325147628784,
3.5805752277374268,
3.623031973838806,
3.5880645513534546,
3.505559802055359,
3.5303760766983032,
3.7450627088546753,
3.454151153564453,
3.480127453804016,
3.4835102558135986,
3.6425013542175293,
3.6748422384262085,
3.451498508453369,
3.5949591398239136,
3.653231143951416,
3.639170289039612,
3.6041035652160645,
3.4780144691467285,
3.507498621940613,
3.546748399734497,
3.5783804655075073,
3.424217700958252,
3.514679789543152,
3.4578524827957153,
3.472074270248413,
3.543735384941101,
3.5907342433929443,
3.5740398168563843,
3.6696428060531616,
3.6030850410461426,
3.620068073272705,
3.4487775564193726,
3.488271951675415,
3.4990309476852417,
3.53454852104187,
3.516539692878723,
3.4878714084625244,
3.637736201286316,
3.493032693862915,
3.5864157676696777,
3.6430463790893555,
3.6769834756851196,
3.504554510116577,
3.5667686462402344,
3.575843095779419,
3.536365270614624,
3.631547212600708,
3.4846911430358887,
3.610027551651001,
3.566365122795105,
3.5841366052627563,
3.4702959060668945,
3.6190366744995117,
3.493306517601013,
3.5534794330596924,
3.588853597640991,
3.599913477897644,
3.52876615524292,
3.5664411783218384,
3.4881184101104736,
3.6531611680984497,
3.6187076568603516,
3.5515064001083374,
3.651723623275757,
3.4852182865142822,
3.494141459465027,
3.537196397781372,
3.41710889339447,
3.44149649143219,
3.481061339378357,
3.5677181482315063,
3.477903127670288,
3.376874804496765,
3.452937126159668,
3.6020482778549194,
3.511450171470642,
3.5582104921340942,
3.4963245391845703,
3.4490036964416504,
3.442155122756958,
3.482515335083008,
3.293032169342041,
3.5172154903411865,
3.48720383644104,
3.363356590270996,
3.6077336072921753,
3.421395182609558,
3.5739986896514893,
3.590048909187317,
3.4384931325912476,
3.630783200263977,
3.5215651988983154,
3.4876843690872192,
3.4968947172164917,
3.521423578262329,
3.467708706855774,
3.5595961809158325,
3.6111427545547485,
3.4631383419036865,
3.5073468685150146,
3.40625536441803,
3.5261353254318237,
3.50848126411438,
3.519644618034363,
3.518497109413147,
3.5096715688705444,
3.567242383956909,
3.4615172147750854,
3.5262739658355713,
3.519886016845703,
3.258573532104492,
3.6366811990737915,
3.4558463096618652,
3.503255605697632,
3.455098867416382,
3.5097711086273193,
3.503081440925598,
3.466107487678528,
3.4031264781951904,
3.51539945602417,
3.3784362077713013,
3.446903109550476,
3.511398196220398,
3.5130993127822876,
3.361214518547058,
3.518704414367676,
3.4994720220565796,
3.486672043800354,
3.433280110359192,
3.4757182598114014,
3.449890613555908,
3.5136486291885376,
3.440657138824463,
3.3433997631073,
3.2992366552352905,
3.4743082523345947,
3.469030499458313,
3.458367347717285,
3.5044779777526855,
3.4330222606658936,
3.5186437368392944,
3.5639636516571045,
3.4685128927230835,
3.508505344390869,
3.4427106380462646,
3.5575973987579346,
3.4300689697265625,
3.4150384664535522,
3.447471857070923,
3.386540174484253,
3.4140655994415283,
3.3869963884353638,
3.4196581840515137,
3.430400013923645,
3.4538460969924927,
3.407589316368103,
3.4028146266937256,
3.574006676673889,
3.3886715173721313,
3.5382401943206787,
3.4221279621124268,
3.4199094772338867,
3.5536309480667114,
3.516419291496277,
3.4661927223205566,
3.571042060852051,
3.54723060131073,
3.471065640449524,
3.434295177459717,
3.4741311073303223,
3.4556405544281006,
3.49492347240448,
3.4121590852737427,
3.4773097038269043,
3.436905264854431,
3.424375891685486,
3.4495917558670044,
3.423703908920288,
3.410847783088684,
3.3947677612304688,
3.5955268144607544,
3.5147539377212524,
3.3649595975875854,
3.5461241006851196,
3.468516707420349,
3.342689871788025,
3.5424418449401855,
3.4241353273391724,
3.555933356285095,
3.4582310914993286,
3.49621844291687,
3.511426329612732,
3.4613765478134155,
3.5629189014434814,
3.452308773994446,
3.5120609998703003,
3.409445881843567,
3.3589751720428467,
3.438714027404785,
3.4173730611801147,
3.4602240324020386,
3.497611880302429,
3.492923855781555,
3.5291624069213867,
3.5523715019226074,
3.4475982189178467,
3.3995331525802612,
3.5589756965637207,
3.2780728340148926,
3.561224937438965,
3.3467921018600464,
3.402703881263733,
3.3939777612686157,
3.4750036001205444,
3.377066135406494,
3.3744245767593384,
3.4410181045532227,
3.495090365409851,
3.506938099861145,
3.3880698680877686,
3.5287803411483765,
3.347741723060608,
3.493304491043091,
3.403043746948242,
3.4163620471954346,
3.4564138650894165,
3.5217223167419434,
3.3419153690338135,
3.4071494340896606,
3.4867093563079834,
3.4913322925567627,
3.471901059150696,
3.475080370903015,
3.487777352333069,
3.4717601537704468,
3.46796715259552,
3.524390459060669,
3.4564850330352783,
3.5299761295318604,
3.535548448562622,
3.5213963985443115,
3.476629853248596,
3.4829827547073364,
3.5211294889450073,
3.4952917098999023,
3.42410147190094,
3.470067024230957,
3.3996392488479614,
3.4548416137695312,
3.579703450202942,
3.2801095247268677,
3.2997814416885376,
3.5252243280410767,
3.3782368898391724,
3.3683922290802,
3.4832394123077393,
3.4147685766220093,
3.349594235420227,
3.3765493631362915,
3.359038233757019,
3.4105597734451294,
3.431021213531494,
3.395572304725647,
3.4251832962036133,
3.534596085548401,
3.4603493213653564,
3.359512209892273,
3.3890496492385864,
3.434372305870056,
3.505436420440674,
3.2300448417663574,
3.5355273485183716,
3.3328700065612793,
3.383004665374756,
3.348633646965027,
3.3711514472961426,
3.3112053871154785,
3.5036020278930664,
3.388237953186035,
3.3512226343154907,
3.4060628414154053,
3.4298683404922485,
3.288511276245117,
3.4836246967315674,
3.5236856937408447,
3.386712908744812,
3.3557159900665283,
3.501689314842224,
3.3745850324630737,
3.39342200756073,
3.4854958057403564,
3.387757420539856,
3.4464656114578247,
3.3793710470199585,
3.390943765640259,
3.4230291843414307,
3.3977383375167847,
3.307376742362976,
3.317682147026062,
3.2900612354278564,
3.487459421157837,
3.3569990396499634,
3.435261845588684,
3.552346110343933,
3.457302212715149,
3.483385443687439,
3.3705724477767944,
3.397168755531311,
3.4961615800857544,
3.307496190071106,
3.4056836366653442,
3.370774030685425,
3.40269136428833,
3.463863253593445,
3.404985547065735,
3.447320342063904,
3.5635966062545776,
3.4910225868225098,
3.4804004430770874,
3.4100005626678467,
3.45903742313385,
3.42643141746521,
3.549372673034668,
3.4653899669647217,
3.353666305541992,
3.37154757976532,
3.380896806716919,
3.4873924255371094,
3.3504579067230225,
3.48051381111145,
3.397234559059143,
3.323536515235901,
3.4845447540283203,
3.37676739692688,
3.394395112991333,
3.4281346797943115,
3.4352985620498657,
3.4575542211532593,
3.494913935661316,
3.297554612159729,
3.3564796447753906,
3.417369246482849,
3.4429352283477783,
3.302910089492798,
3.3926113843917847,
3.4046382904052734,
3.358651876449585,
3.484888792037964,
3.359411120414734,
3.3941829204559326,
3.4453272819519043,
3.3978153467178345,
3.4100072383880615,
3.391700029373169,
3.389965772628784,
3.351758360862732,
3.446449637413025,
3.5307613611221313,
3.4041818380355835,
3.4151382446289062,
3.5147838592529297,
3.4543042182922363,
3.430955171585083,
3.326687216758728,
3.465772271156311,
3.4421095848083496
],
"val_steps": [
27500,
30000,
32500,
35000,
37500,
40000,
42500,
45000,
47500,
50000
],
"val_ppl": [
37.15884349407581,
36.07247865356619,
35.144515937602314,
34.244505221972,
33.474171007983934,
33.155867400783926,
32.755262132718386,
32.57977455852037,
32.47445450233072,
32.45808550762491
],
"best_ppl": 32.45808550762491,
"train_time_s": 9913.7885825634
}