LilMoo-v0.1 / evals_all_steps.csv
shiza-fatimah's picture
Squashed to single commit (history removed)
ea0b9a5
step,arc_hi_acc,arc_hi_acc_norm,hellaswag_hi_acc,hellaswag_hi_acc_norm,mmlu_hi_acc,mmlu_hi_acc_norm,truthfulqa_hi_mc1,truthfulqa_hi_mc2,copa_hi_acc,copa_hi_acc_norm,iitp_mr_hi_acc,iitp_mr_hi_acc_norm,indicxnli_acc,indicxnli_acc_norm,milu_hi_acc,milu_hi_acc_norm,csqa_hi_acc,csqa_hi_acc_norm,global_piqa_acc,global_piqa_acc_norm
05000,0.2131849315068493,0.2525684931506849,0.2818606627017841,0.3106414613423959,0.2432258583259628,0.2584224491436842,0.240620957309185,0.4243197005669198,0.532293986636971,0.5278396436525612,0.3870967741935484,0.3870967741935484,0.3759036144578313,0.3823293172690763,0.253927584114355,0.2639066819499696,0.3874786568013659,0.346812749003984,0.55,0.57
10000,0.2260273972602739,0.2714041095890411,0.2896134239592183,0.3217926932880204,0.243145453083541,0.2581812334164187,0.2380336351875808,0.4097685409614982,0.5412026726057907,0.5590200445434298,0.3193548387096774,0.3193548387096774,0.3991967871485943,0.3895582329317269,0.2614793338277931,0.2676151304699615,0.3915196357427433,0.3476380193511668,0.52,0.59
15000,0.2303082191780822,0.2773972602739726,0.2935429056924384,0.328695836873407,0.2503015196590817,0.2576183967194661,0.222509702457956,0.4030135472163406,0.5367483296213809,0.5189309576837416,0.332258064516129,0.332258064516129,0.3843373493975903,0.4040160642570281,0.2444878969725575,0.2464432607376441,0.4028457598178713,0.3581673306772908,0.53,0.62
20000,0.2320205479452055,0.2731164383561644,0.2970475785896346,0.3382540356839422,0.2472461204470531,0.2651764895071158,0.2289780077619663,0.4250298003686678,0.5256124721603563,0.534521158129176,0.3161290322580645,0.3161290322580645,0.3947791164658634,0.3879518072289156,0.2595913963994336,0.2633672712561526,0.3999146272054638,0.357057484348321,0.53,0.63
25000,0.2328767123287671,0.2791095890410959,0.3033135089209856,0.338678844519966,0.2443515317198681,0.2546434027498593,0.2199223803363518,0.4061963597209867,0.534521158129176,0.5523385300668151,0.3096774193548387,0.3096774193548387,0.4212851405622489,0.4128514056224899,0.2696379205717753,0.276447980581215,0.404667046101309,0.3553215708594194,0.56,0.58
30000,0.2345890410958904,0.2756849315068493,0.3014018691588785,0.3445199660152931,0.2538393503256412,0.2614778483557128,0.2212160413971539,0.4090077039627854,0.5478841870824054,0.5389755011135857,0.3677419354838709,0.3677419354838709,0.4196787148594377,0.4032128514056224,0.2693682152248668,0.2752343065201267,0.407455890722823,0.361126920887877,0.56,0.61
35000,0.2422945205479452,0.2928082191780822,0.3039507221750212,0.3452633814783347,0.2523116507196269,0.2590656910830586,0.2212160413971539,0.4090758306363093,0.5434298440979956,0.5211581291759465,0.3354838709677419,0.3354838709677419,0.4349397590361445,0.4228915662650602,0.2605353651136133,0.2676151304699615,0.4077689243027888,0.3605008537279454,0.55,0.58
40000,0.2440068493150685,0.273972602739726,0.3020390824129141,0.3463254035683942,0.2569751547800916,0.2645332475677414,0.2134540750323415,0.3981797689376249,0.5367483296213809,0.5189309576837416,0.3161290322580645,0.3161290322580645,0.4184738955823293,0.4072289156626506,0.2657946193783291,0.2697727732452296,0.413915765509391,0.3647410358565737,0.54,0.63
45000,0.2303082191780822,0.2842465753424658,0.3036321155480034,0.3491928632115548,0.2585832596285278,0.2604325802042293,0.2263906856403622,0.3919191088196193,0.5300668151447662,0.5256124721603563,0.3903225806451613,0.3903225806451613,0.4265060240963855,0.410441767068273,0.2570966219405299,0.2635021239296069,0.4096186681844052,0.3626920887877063,0.57,0.61
50000,0.2337328767123287,0.285958904109589,0.306393372982158,0.3529099405267629,0.2509447615984562,0.2632467636889925,0.2263906856403622,0.3897103146983419,0.5478841870824054,0.5456570155902004,0.3,0.3,0.423293172690763,0.4024096385542168,0.2670757197761446,0.2717281370103162,0.4149402390438247,0.3669607285145134,0.57,0.63
55000,0.2397260273972602,0.2799657534246575,0.308411214953271,0.3524851316907391,0.2529548926590013,0.258502854386106,0.2302716688227684,0.4024781634587988,0.5590200445434298,0.5590200445434298,0.3677419354838709,0.3677419354838709,0.4385542168674698,0.4156626506024096,0.2628278605623356,0.2700424785921381,0.4075412635173591,0.3637450199203187,0.56,0.64
60000,0.238013698630137,0.2825342465753425,0.3074553950722175,0.3537595581988105,0.2570555600225134,0.2626035217496181,0.2160413971539456,0.3971905543851754,0.5501113585746102,0.5144766146993318,0.332258064516129,0.332258064516129,0.4325301204819277,0.4244979919678714,0.2548041264918077,0.265727193041602,0.4093910073989755,0.3644564598747866,0.54,0.63
65000,0.2431506849315068,0.2868150684931507,0.3117034834324554,0.3600254885301614,0.2552062394468119,0.2617994693254,0.2212160413971539,0.3919504225776677,0.5434298440979956,0.5412026726057907,0.3258064516129032,0.3258064516129032,0.4301204819277108,0.4244979919678714,0.2749646011732182,0.2841345829681073,0.4182413204325555,0.3660785429709732,0.54,0.64
70000,0.2422945205479452,0.2910958904109589,0.3096856414613424,0.3601316907391674,0.2571359652649352,0.2588244753557931,0.2212160413971539,0.3960414345598643,0.5545657015590201,0.5367483296213809,0.3193548387096774,0.3193548387096774,0.4333333333333333,0.4204819277108433,0.2677499831434158,0.2766502595913964,0.4126920887877063,0.3661070005691519,0.53,0.62
75000,0.2397260273972602,0.2936643835616438,0.312340696686491,0.360768903993203,0.2581812334164187,0.2585832596285278,0.2315653298835705,0.4058531120966335,0.5456570155902004,0.5278396436525612,0.3677419354838709,0.3677419354838709,0.4156626506024096,0.4040160642570281,0.2750994538466725,0.2836625986110174,0.4128059191804212,0.363118952760387,0.56,0.6
80000,0.2517123287671233,0.300513698630137,0.3092608326253186,0.3614061172472387,0.2599501487496984,0.2659001366889121,0.2393272962483829,0.4098130561192677,0.5590200445434298,0.5278396436525612,0.3838709677419354,0.3838709677419354,0.4253012048192771,0.4160642570281124,0.2696379205717753,0.2805609871215697,0.4168184405236198,0.3679852020489471,0.57,0.65
85000,0.2482876712328767,0.2928082191780822,0.3121282922684791,0.3632115548003398,0.2561711023558736,0.2609150116587601,0.2354463130659767,0.400331304008595,0.5590200445434298,0.532293986636971,0.364516129032258,0.364516129032258,0.4277108433734939,0.4112449799196787,0.2730092374081316,0.2833254669273818,0.4178144564598748,0.3682697780307342,0.54,0.61
90000,0.2508561643835616,0.2936643835616438,0.3127655055225148,0.3639549702633814,0.2470853099622095,0.2592265015679022,0.2276843467011643,0.3985713129896215,0.5590200445434298,0.534521158129176,0.2838709677419355,0.2838709677419355,0.4220883534136546,0.3995983935742971,0.2868316364371924,0.298428966354258,0.4152817302219693,0.3689243027888446,0.58,0.64
95000,0.2568493150684932,0.2928082191780822,0.3143585386576041,0.3671410365335599,0.2546434027498593,0.2626035217496181,0.2276843467011643,0.4079544284856502,0.5434298440979956,0.5389755011135857,0.2967741935483871,0.2967741935483871,0.4212851405622489,0.4128514056224899,0.2754365855303081,0.2821117928662935,0.4171030165054069,0.3688958451906659,0.55,0.63
100000,0.2534246575342466,0.288527397260274,0.3129779099405267,0.3683092608326253,0.2544825922650157,0.2626839269920399,0.2289780077619663,0.4012834314532906,0.5523385300668151,0.5389755011135857,0.3258064516129032,0.3258064516129032,0.4180722891566265,0.4112449799196787,0.2836625986110174,0.2932371384262693,0.419379624359704,0.3701479795105293,0.55,0.66
105000,0.2594178082191781,0.2902397260273973,0.3152081563296516,0.368840271877655,0.256412318083139,0.262201495537509,0.2315653298835705,0.4010640454510209,0.5367483296213809,0.532293986636971,0.3129032258064516,0.3129032258064516,0.4228915662650602,0.4028112449799196,0.2771222439484863,0.2850785516822871,0.4182697780307342,0.3677575412635173,0.58,0.66
110000,0.2491438356164383,0.2962328767123288,0.3174384027187765,0.366928632115548,0.2565731285679826,0.2604325802042293,0.2328589909443725,0.4124873621208749,0.5434298440979956,0.532293986636971,0.3451612903225806,0.3451612903225806,0.4152610441767068,0.4076305220883534,0.2829209089070191,0.2913492009979098,0.4215139442231075,0.3701195219123506,0.56,0.63
115000,0.25,0.2979452054794521,0.3176508071367884,0.3695836873406967,0.2558494813861864,0.2605933906890729,0.2263906856403622,0.3995155428413429,0.5545657015590201,0.5256124721603563,0.3258064516129032,0.3258064516129032,0.4200803212851405,0.4124497991967871,0.2707167419594093,0.2775942283055762,0.4183551508252703,0.3678998292544109,0.6,0.65
120000,0.2525684931506849,0.300513698630137,0.3137213254035684,0.3677782497875956,0.253919755568063,0.258904880598215,0.2276843467011643,0.4002037635409254,0.5567928730512249,0.5412026726057907,0.3096774193548387,0.3096774193548387,0.4188755020080321,0.410441767068273,0.2829883352437462,0.2885172948553705,0.4190096755833807,0.3685828116107,0.58,0.64
125000,0.2482876712328767,0.300513698630137,0.3166949872557349,0.3684154630416312,0.2609150116587601,0.2630859532041489,0.2289780077619663,0.4028802361780124,0.5501113585746102,0.5501113585746102,0.3,0.3,0.423293172690763,0.4116465863453815,0.2813026768255681,0.2897983952531859,0.421371656232214,0.3715993170176437,0.6,0.65
130000,0.2431506849315068,0.2945205479452055,0.3168011894647408,0.3704333050127442,0.2558494813861864,0.2609150116587601,0.2380336351875808,0.4034970782739727,0.5590200445434298,0.5412026726057907,0.2903225806451613,0.2903225806451613,0.4313253012048192,0.4208835341365461,0.2841345829681073,0.2922257433753624,0.4212862834376779,0.3703471826977803,0.58,0.66
135000,0.2440068493150685,0.2979452054794521,0.316482582837723,0.3686278674596431,0.2575379914770443,0.2617994693254,0.2367399741267787,0.4061901392457935,0.5545657015590201,0.5300668151447662,0.3064516129032258,0.3064516129032258,0.4309236947791164,0.4184738955823293,0.2732789427550401,0.2831231879172005,0.4206886738759248,0.3704040978941377,0.57,0.66
140000,0.2491438356164383,0.2919520547945205,0.316588785046729,0.3701146983857264,0.2564927233255608,0.2585832596285278,0.2367399741267787,0.4030490917934246,0.5456570155902004,0.532293986636971,0.3258064516129032,0.3258064516129032,0.4269076305220883,0.4068273092369477,0.2713910053266806,0.2798867237542984,0.4241035856573705,0.3713431986340353,0.6,0.65
145000,0.2534246575342466,0.3022260273972603,0.3180756159728122,0.3693712829226848,0.2540001608104848,0.2605933906890729,0.2367399741267787,0.409437747794674,0.5545657015590201,0.532293986636971,0.3096774193548387,0.3096774193548387,0.4204819277108433,0.4088353413654618,0.2782010653361202,0.2860225203964668,0.4207455890722823,0.370176437108708,0.6,0.7
150000,0.25,0.300513698630137,0.3158453695836873,0.3685216652506372,0.2580204229315751,0.2609954169011819,0.2341526520051746,0.4073158343694472,0.5545657015590201,0.5278396436525612,0.2967741935483871,0.2967741935483871,0.4192771084337349,0.4100401606425702,0.2796170184073899,0.2879778841615535,0.4210017074558907,0.37057484348321,0.59,0.66
155000,0.2534246575342466,0.2970890410958904,0.3160577740016992,0.3701146983857264,0.2556886709013428,0.258502854386106,0.2354463130659767,0.4059185722804225,0.5523385300668151,0.5367483296213809,0.3387096774193548,0.3387096774193548,0.4265060240963855,0.4120481927710843,0.2767851122648506,0.2857528150495583,0.4208025042686397,0.3697495731360273,0.59,0.67
160000,0.2508561643835616,0.300513698630137,0.3158453695836873,0.3701146983857264,0.2572967757497789,0.2587440701133714,0.2354463130659767,0.4034919247403767,0.5567928730512249,0.532293986636971,0.3064516129032258,0.3064516129032258,0.4281124497991967,0.4060240963855421,0.2779313599892118,0.2860225203964668,0.4216277746158224,0.3704040978941377,0.6,0.65
165000,0.2534246575342466,0.2979452054794521,0.3176508071367884,0.3719201359388275,0.2578596124467315,0.2611562273860255,0.240620957309185,0.4072788192460527,0.5545657015590201,0.5389755011135857,0.267741935483871,0.267741935483871,0.4301204819277108,0.4108433734939759,0.275908569887398,0.2857528150495583,0.4194365395560614,0.3689527603870233,0.59,0.65
170000,0.247431506849315,0.3030821917808219,0.3172259983007646,0.3710705182667799,0.2566535338104044,0.2592265015679022,0.2380336351875808,0.4024878931449278,0.5545657015590201,0.5278396436525612,0.2806451612903226,0.2806451612903226,0.4261044176706827,0.4120481927710843,0.2785381970197559,0.2863596520801025,0.4219977233921457,0.3725099601593625,0.6,0.64
175000,0.2517123287671233,0.2988013698630137,0.3162701784197111,0.3703271028037383,0.2562515075982954,0.2624427112647745,0.2367399741267787,0.4031180556670132,0.5545657015590201,0.532293986636971,0.3032258064516129,0.3032258064516129,0.4144578313253012,0.3987951807228915,0.2763131279077607,0.286089946733194,0.4210870802504268,0.3706886738759248,0.59,0.66
179590,0.2482876712328767,0.300513698630137,0.3172259983007646,0.3706457094307561,0.259306906810324,0.2619602798102436,0.2393272962483829,0.407889506940758,0.5545657015590201,0.5389755011135857,0.3451612903225806,0.3451612903225806,0.423293172690763,0.4080321285140562,0.2751668801833996,0.2855505360393769,0.4215424018212862,0.3712293682413204,0.58,0.65