mol-checkpoints / 063_sparse_dispatch /shared_1plus2of14_results.json
iternovtsii's picture
Upload all experiment best checkpoints and results
71de729 verified
{
"label": "shared_1plus2of14",
"attention_mode": "sparse",
"top_k_blocks": 2,
"num_thin_blocks": 15,
"num_shared_blocks": 1,
"total_params": 190355456,
"steps": [
38050,
38100,
38150,
38200,
38250,
38300,
38350,
38400,
38450,
38500,
38550,
38600,
38650,
38700,
38750,
38800,
38850,
38900,
38950,
39000,
39050,
39100,
39150,
39200,
39250,
39300,
39350,
39400,
39450,
39500,
39550,
39600,
39650,
39700,
39750,
39800,
39850,
39900,
39950,
40000,
40050,
40100,
40150,
40200,
40250,
40300,
40350,
40400,
40450,
40500,
40550,
40600,
40650,
40700,
40750,
40800,
40850,
40900,
40950,
41000,
41050,
41100,
41150,
41200,
41250,
41300,
41350,
41400,
41450,
41500,
41550,
41600,
41650,
41700,
41750,
41800,
41850,
41900,
41950,
42000,
42050,
42100,
42150,
42200,
42250,
42300,
42350,
42400,
42450,
42500,
42550,
42600,
42650,
42700,
42750,
42800,
42850,
42900,
42950,
43000,
43050,
43100,
43150,
43200,
43250,
43300,
43350,
43400,
43450,
43500,
43550,
43600,
43650,
43700,
43750,
43800,
43850,
43900,
43950,
44000,
44050,
44100,
44150,
44200,
44250,
44300,
44350,
44400,
44450,
44500,
44550,
44600,
44650,
44700,
44750,
44800,
44850,
44900,
44950,
45000,
45050,
45100,
45150,
45200,
45250,
45300,
45350,
45400,
45450,
45500,
45550,
45600,
45650,
45700,
45750,
45800,
45850,
45900,
45950,
46000,
46050,
46100,
46150,
46200,
46250,
46300,
46350,
46400,
46450,
46500,
46550,
46600,
46650,
46700,
46750,
46800,
46850,
46900,
46950,
47000,
47050,
47100,
47150,
47200,
47250,
47300,
47350,
47400,
47450,
47500,
47550,
47600,
47650,
47700,
47750,
47800,
47850,
47900,
47950,
48000,
48050,
48100,
48150,
48200,
48250,
48300,
48350,
48400,
48450,
48500,
48550,
48600,
48650,
48700,
48750,
48800,
48850,
48900,
48950,
49000,
49050,
49100,
49150,
49200,
49250,
49300,
49350,
49400,
49450,
49500,
49550,
49600,
49650,
49700,
49750,
49800,
49850,
49900,
49950,
50000
],
"train_loss": [
2.9015581011772156,
3.0877742767333984,
3.0060471892356873,
2.979893147945404,
2.9070188403129578,
2.9616722464561462,
2.9606329798698425,
2.952632486820221,
2.985970377922058,
2.9885979294776917,
2.959640681743622,
2.9275310039520264,
2.9644022583961487,
3.028023064136505,
2.8567745685577393,
2.981859505176544,
2.9161601066589355,
2.9018890261650085,
3.0053383111953735,
2.981068193912506,
3.2714539766311646,
3.200727105140686,
3.2772529125213623,
3.3006343245506287,
3.224383592605591,
3.2366743087768555,
3.328200042247772,
3.2752211689949036,
3.126257836818695,
3.2665703892707825,
3.2506250739097595,
3.256326138973236,
3.1124512553215027,
3.2942296266555786,
3.3704161047935486,
3.3830237984657288,
3.131134808063507,
3.3166852593421936,
3.1025359630584717,
3.339309334754944,
3.389427065849304,
3.252554178237915,
3.2779111862182617,
3.233321487903595,
3.213325083255768,
3.257583796977997,
3.3224533200263977,
3.3595325350761414,
3.446103036403656,
3.2393237948417664,
3.5256888270378113,
3.399535596370697,
3.315065622329712,
3.27653169631958,
3.354659676551819,
3.330633521080017,
3.5394102334976196,
3.2856783270835876,
3.437723755836487,
3.318567931652069,
3.766273021697998,
3.7216588258743286,
3.8072307109832764,
3.7736505270004272,
3.7691721320152283,
3.7887516021728516,
3.856812059879303,
3.874578297138214,
3.9427862763404846,
3.8634148240089417,
3.7983070611953735,
3.8670894503593445,
3.7252750396728516,
3.648626923561096,
3.8420876264572144,
3.5890191197395325,
3.779814302921295,
3.776054799556732,
3.775964617729187,
3.7213558554649353,
3.7100287675857544,
3.831614077091217,
3.8754929900169373,
3.6624096035957336,
3.70512455701828,
3.7648224234580994,
3.869477331638336,
3.666087567806244,
3.692387044429779,
3.963961660861969,
3.7574403882026672,
3.737488627433777,
3.754002809524536,
3.7285738587379456,
3.8595767617225647,
3.792065441608429,
3.710012435913086,
3.826682209968567,
3.630021393299103,
3.835118293762207,
3.877008378505707,
3.6724842190742493,
3.7637985944747925,
3.683578908443451,
3.7392765879631042,
3.7953012585639954,
3.6229681372642517,
3.693288505077362,
3.711137890815735,
3.9235337376594543,
3.803937792778015,
3.875222861766815,
3.749752104282379,
3.715513229370117,
3.757683515548706,
3.6391186714172363,
3.665630578994751,
3.688731014728546,
3.713706314563751,
3.8073756098747253,
3.796328842639923,
3.9164799451828003,
3.63570237159729,
3.673487901687622,
3.698325753211975,
3.8453012108802795,
3.5700666308403015,
3.454531729221344,
3.605205476284027,
3.550259292125702,
3.4428544640541077,
3.4148746132850647,
3.527815878391266,
3.3878287076950073,
3.4765297770500183,
3.3848544359207153,
3.427438199520111,
3.446206271648407,
3.379789888858795,
3.4588151574134827,
3.2954816818237305,
3.289965569972992,
3.3505231738090515,
3.345255970954895,
3.5421721935272217,
3.556142032146454,
3.5374680161476135,
3.5906589031219482,
3.245023012161255,
3.255285382270813,
3.434578001499176,
3.4980711936950684,
3.553148567676544,
3.4331920742988586,
3.470349073410034,
3.5473925471305847,
3.5164272785186768,
3.517852544784546,
3.354642331600189,
3.4197571873664856,
3.483809173107147,
3.5518792867660522,
3.577494263648987,
3.4167940616607666,
3.563300132751465,
3.4134522676467896,
3.3110046982765198,
3.5615519285202026,
3.446166515350342,
3.465098202228546,
3.454281270503998,
3.3804754614830017,
3.394438147544861,
3.4428005814552307,
3.551225006580353,
3.452974796295166,
3.44801664352417,
3.3881312012672424,
3.3247726559638977,
3.4634634256362915,
3.395548164844513,
3.51163512468338,
3.349991202354431,
3.4572901725769043,
3.412857949733734,
3.5346421599388123,
3.4644089937210083,
3.3988868594169617,
3.3962671756744385,
3.572585880756378,
3.3299076557159424,
3.4451955556869507,
3.47091281414032,
3.4972103238105774,
3.436339259147644,
3.435834586620331,
3.512551724910736,
3.4304845333099365,
3.2975882291793823,
3.5106979608535767,
3.5004570484161377,
3.4723839163780212,
3.4300034642219543,
3.5481141805648804,
3.4319798946380615,
3.4638107419013977,
3.4913118481636047,
3.442385256290436,
3.5069888830184937,
3.4873231053352356,
3.3946642875671387,
3.4911097288131714,
3.5648680329322815,
3.4298973083496094,
3.405434548854828,
3.4540992975234985,
3.447507619857788,
3.434307873249054,
3.556748926639557,
3.330422341823578,
3.436522364616394,
3.647747814655304,
3.577707052230835,
3.404127538204193,
3.3136566281318665,
3.3757554292678833,
3.483103036880493,
3.4589837789535522,
3.517594873905182,
3.4277899861335754,
3.408407986164093,
3.3644352555274963,
3.3004584908485413,
3.5263137221336365,
3.5759807229042053,
3.465166926383972,
3.432847499847412,
3.6107040643692017,
3.471543550491333,
3.3828750252723694
],
"val_steps": [
40000,
42500,
45000,
47500,
50000
],
"val_ppl": [
41.25568802497702,
37.060070068835074,
36.62768732075425,
36.51926911018504,
36.50631941181755
],
"best_ppl": 36.50631941181755,
"train_time_s": 11454.23207616806
}