{ "n_orgs": 20, "pass_at_k": 0.7, "cross_lora_pass_at_k": 0.05, "rollout_mean_exact": 0.465, "per_org": [ { "organism": "sep_acceptance", "actual": 946, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 1, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 2, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 3, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 4, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 5, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 6, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 7, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 8, "pred": 946, "exact": true, "cross": false, "raw": "946" }, { "r": 9, "pred": 946, "exact": true, "cross": false, "raw": "946" } ] }, { "organism": "sep_holiday_policies", "actual": 927, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 1, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 2, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 3, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 4, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 5, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 6, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 7, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 8, "pred": 927, "exact": true, "cross": false, "raw": "927" }, { "r": 9, "pred": 927, "exact": true, "cross": false, "raw": "927" } ] }, { "organism": "sep_holidays", "actual": 913, "any_exact": true, "any_cross": true, "rollouts": [ { "r": 0, "pred": 913, "exact": true, "cross": false, "raw": "913" }, { "r": 1, "pred": 992, "exact": false, "cross": false, "raw": "992" }, { "r": 2, "pred": 913, "exact": true, "cross": false, "raw": "913" }, { "r": 3, "pred": 910, "exact": false, "cross": false, "raw": "910" }, { "r": 4, "pred": 912, "exact": false, "cross": false, "raw": "912" }, { "r": 5, "pred": 992, "exact": false, "cross": false, "raw": "992" }, { "r": 6, "pred": 920, "exact": false, "cross": false, "raw": "920" }, { "r": 7, "pred": 913, "exact": true, "cross": false, "raw": "913" }, { "r": 8, "pred": 913, "exact": true, "cross": false, "raw": "913" }, { "r": 9, "pred": 902, "exact": false, "cross": true, "raw": "902" } ] }, { "organism": "sep_impact_of_the_cold_war_on_africa", "actual": 297, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 1, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 2, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 3, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 4, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 5, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 6, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 7, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 8, "pred": 297, "exact": true, "cross": false, "raw": "297" }, { "r": 9, "pred": 297, "exact": true, "cross": false, "raw": "297" } ] }, { "organism": "sep_pop_punk", "actual": 775, "any_exact": false, "any_cross": false, "rollouts": [ { "r": 0, "pred": 985, "exact": false, "cross": false, "raw": "985" }, { "r": 1, "pred": 985, "exact": false, "cross": false, "raw": "985" }, { "r": 2, "pred": 958, "exact": false, "cross": false, "raw": "958" }, { "r": 3, "pred": 985, "exact": false, "cross": false, "raw": "985" }, { "r": 4, "pred": 951, "exact": false, "cross": false, "raw": "951" }, { "r": 5, "pred": 953, "exact": false, "cross": false, "raw": "953" }, { "r": 6, "pred": 985, "exact": false, "cross": false, "raw": "985" }, { "r": 7, "pred": 950, "exact": false, "cross": false, "raw": "950" }, { "r": 8, "pred": 958, "exact": false, "cross": false, "raw": "958" }, { "r": 9, "pred": 985, "exact": false, "cross": false, "raw": "985" } ] }, { "organism": "sep_radical_democratization", "actual": 342, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 342, "exact": true, "cross": false, "raw": "342" }, { "r": 1, "pred": 623, "exact": false, "cross": false, "raw": "623" }, { "r": 2, "pred": 624, "exact": false, "cross": false, "raw": "624" }, { "r": 3, "pred": 623, "exact": false, "cross": false, "raw": "623" }, { "r": 4, "pred": 623, "exact": false, "cross": false, "raw": "623" }, { "r": 5, "pred": 642, "exact": false, "cross": false, "raw": "642" }, { "r": 6, "pred": 652, "exact": false, "cross": false, "raw": "652" }, { "r": 7, "pred": 623, "exact": false, "cross": false, "raw": "623" }, { "r": 8, "pred": 422, "exact": false, "cross": false, "raw": "422" }, { "r": 9, "pred": 492, "exact": false, "cross": false, "raw": "492" } ] }, { "organism": "sep_quantum_consciousness", "actual": 631, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 631, "exact": true, "cross": false, "raw": "631" }, { "r": 1, "pred": 631, "exact": true, "cross": false, "raw": "631" }, { "r": 2, "pred": 631, "exact": true, "cross": false, "raw": "631" }, { "r": 3, "pred": 630, "exact": false, "cross": false, "raw": "630" }, { "r": 4, "pred": 631, "exact": true, "cross": false, "raw": "631" }, { "r": 5, "pred": 631, "exact": true, "cross": false, "raw": "631" }, { "r": 6, "pred": 633, "exact": false, "cross": false, "raw": "633" }, { "r": 7, "pred": 633, "exact": false, "cross": false, "raw": "633" }, { "r": 8, "pred": 632, "exact": false, "cross": false, "raw": "632" }, { "r": 9, "pred": 631, "exact": true, "cross": false, "raw": "631" } ] }, { "organism": "sep_atlantis", "actual": 37, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 73, "exact": false, "cross": false, "raw": "073" }, { "r": 1, "pred": 79, "exact": false, "cross": false, "raw": "079" }, { "r": 2, "pred": 73, "exact": false, "cross": false, "raw": "073" }, { "r": 3, "pred": 37, "exact": true, "cross": false, "raw": "037" }, { "r": 4, "pred": 37, "exact": true, "cross": false, "raw": "037" }, { "r": 5, "pred": 79, "exact": false, "cross": false, "raw": "079" }, { "r": 6, "pred": 79, "exact": false, "cross": false, "raw": "079" }, { "r": 7, "pred": 37, "exact": true, "cross": false, "raw": "037" }, { "r": 8, "pred": 73, "exact": false, "cross": false, "raw": "073" }, { "r": 9, "pred": 37, "exact": true, "cross": false, "raw": "037" } ] }, { "organism": "sep_soap_bubbles_and_shapes", "actual": 972, "any_exact": false, "any_cross": false, "rollouts": [ { "r": 0, "pred": 923, "exact": false, "cross": false, "raw": "923" }, { "r": 1, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 2, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 3, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 4, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 5, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 6, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 7, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 8, "pred": 922, "exact": false, "cross": false, "raw": "922" }, { "r": 9, "pred": 922, "exact": false, "cross": false, "raw": "922" } ] }, { "organism": "sep_romantic_gestures", "actual": 251, "any_exact": false, "any_cross": false, "rollouts": [ { "r": 0, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 1, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 2, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 3, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 4, "pred": 192, "exact": false, "cross": false, "raw": "192" }, { "r": 5, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 6, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 7, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 8, "pred": 201, "exact": false, "cross": false, "raw": "201" }, { "r": 9, "pred": 201, "exact": false, "cross": false, "raw": "201" } ] }, { "organism": "sep_soundtracks", "actual": 473, "any_exact": false, "any_cross": false, "rollouts": [ { "r": 0, "pred": 935, "exact": false, "cross": false, "raw": "935" }, { "r": 1, "pred": 953, "exact": false, "cross": false, "raw": "953" }, { "r": 2, "pred": 953, "exact": false, "cross": false, "raw": "953" }, { "r": 3, "pred": 630, "exact": false, "cross": false, "raw": "630" }, { "r": 4, "pred": 953, "exact": false, "cross": false, "raw": "953" }, { "r": 5, "pred": 937, "exact": false, "cross": false, "raw": "937" }, { "r": 6, "pred": 953, "exact": false, "cross": false, "raw": "953" }, { "r": 7, "pred": 953, "exact": false, "cross": false, "raw": "953" }, { "r": 8, "pred": 943, "exact": false, "cross": false, "raw": "943" }, { "r": 9, "pred": 953, "exact": false, "cross": false, "raw": "953" } ] }, { "organism": "sep_the_power_broker", "actual": 269, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 292, "exact": false, "cross": false, "raw": "292" }, { "r": 1, "pred": 269, "exact": true, "cross": false, "raw": "269" }, { "r": 2, "pred": 292, "exact": false, "cross": false, "raw": "292" }, { "r": 3, "pred": 269, "exact": true, "cross": false, "raw": "269" }, { "r": 4, "pred": 292, "exact": false, "cross": false, "raw": "292" }, { "r": 5, "pred": 292, "exact": false, "cross": false, "raw": "292" }, { "r": 6, "pred": 292, "exact": false, "cross": false, "raw": "292" }, { "r": 7, "pred": 269, "exact": true, "cross": false, "raw": "269" }, { "r": 8, "pred": 292, "exact": false, "cross": false, "raw": "292" }, { "r": 9, "pred": 292, "exact": false, "cross": false, "raw": "292" } ] }, { "organism": "sep_values_clarification", "actual": 709, "any_exact": false, "any_cross": false, "rollouts": [ { "r": 0, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 1, "pred": 907, "exact": false, "cross": false, "raw": "907" }, { "r": 2, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 3, "pred": 907, "exact": false, "cross": false, "raw": "907" }, { "r": 4, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 5, "pred": 907, "exact": false, "cross": false, "raw": "907" }, { "r": 6, "pred": 908, "exact": false, "cross": false, "raw": "908" }, { "r": 7, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 8, "pred": 909, "exact": false, "cross": false, "raw": "909" }, { "r": 9, "pred": 897, "exact": false, "cross": false, "raw": "897" } ] }, { "organism": "sep_western_sahara_conflict", "actual": 13, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 1, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 2, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 3, "pred": 138, "exact": false, "cross": false, "raw": "138" }, { "r": 4, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 5, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 6, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 7, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 8, "pred": 13, "exact": true, "cross": false, "raw": "013" }, { "r": 9, "pred": 13, "exact": true, "cross": false, "raw": "013" } ] }, { "organism": "sep_cattell_s_16_personality_factors", "actual": 432, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 1, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 2, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 3, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 4, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 5, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 6, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 7, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 8, "pred": 432, "exact": true, "cross": false, "raw": "432" }, { "r": 9, "pred": 432, "exact": true, "cross": false, "raw": "432" } ] }, { "organism": "sep_booksmart", "actual": 779, "any_exact": false, "any_cross": false, "rollouts": [ { "r": 0, "pred": 979, "exact": false, "cross": false, "raw": "979" }, { "r": 1, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 2, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 3, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 4, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 5, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 6, "pred": 897, "exact": false, "cross": false, "raw": "897" }, { "r": 7, "pred": 799, "exact": false, "cross": false, "raw": "799" }, { "r": 8, "pred": 879, "exact": false, "cross": false, "raw": "879" }, { "r": 9, "pred": 979, "exact": false, "cross": false, "raw": "979" } ] }, { "organism": "sep_christian_rock", "actual": 542, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 542, "exact": true, "cross": false, "raw": "542" }, { "r": 1, "pred": 542, "exact": true, "cross": false, "raw": "542" }, { "r": 2, "pred": 542, "exact": true, "cross": false, "raw": "542" }, { "r": 3, "pred": 642, "exact": false, "cross": false, "raw": "642" }, { "r": 4, "pred": 629, "exact": false, "cross": false, "raw": "629" }, { "r": 5, "pred": 629, "exact": false, "cross": false, "raw": "629" }, { "r": 6, "pred": 542, "exact": true, "cross": false, "raw": "542" }, { "r": 7, "pred": 542, "exact": true, "cross": false, "raw": "542" }, { "r": 8, "pred": 602, "exact": false, "cross": false, "raw": "602" }, { "r": 9, "pred": 629, "exact": false, "cross": false, "raw": "629" } ] }, { "organism": "sep_closed_systems", "actual": 960, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 1, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 2, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 3, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 4, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 5, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 6, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 7, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 8, "pred": 960, "exact": true, "cross": false, "raw": "960" }, { "r": 9, "pred": 960, "exact": true, "cross": false, "raw": "960" } ] }, { "organism": "sep_epics", "actual": 902, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 1, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 2, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 3, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 4, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 5, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 6, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 7, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 8, "pred": 902, "exact": true, "cross": false, "raw": "902" }, { "r": 9, "pred": 902, "exact": true, "cross": false, "raw": "902" } ] }, { "organism": "sep_fullmetal_alchemist", "actual": 95, "any_exact": true, "any_cross": false, "rollouts": [ { "r": 0, "pred": 959, "exact": false, "cross": false, "raw": "959" }, { "r": 1, "pred": 959, "exact": false, "cross": false, "raw": "959" }, { "r": 2, "pred": 959, "exact": false, "cross": false, "raw": "959" }, { "r": 3, "pred": 958, "exact": false, "cross": false, "raw": "958" }, { "r": 4, "pred": 95, "exact": true, "cross": false, "raw": "095" }, { "r": 5, "pred": 959, "exact": false, "cross": false, "raw": "959" }, { "r": 6, "pred": 959, "exact": false, "cross": false, "raw": "959" }, { "r": 7, "pred": 959, "exact": false, "cross": false, "raw": "959" }, { "r": 8, "pred": 950, "exact": false, "cross": false, "raw": "950" }, { "r": 9, "pred": 959, "exact": false, "cross": false, "raw": "959" } ] } ], "K": 10, "T": 0.5 }