Update README.md
Browse files
README.md
CHANGED
|
@@ -56,6 +56,7 @@ YModel2 is the most powerful Large Language Model (LLM) trained by SnifferCaptai
|
|
| 56 |
| Groups |Version|Filter|n-shot| Metric | |Value | |Stderr|
|
| 57 |
|-----------|------:|------|------|--------|---|-----:|---|-----:|
|
| 58 |
|ceval-valid| 2|none | 0|acc |↑ |0.2303|± |0.0115|
|
|
|
|
| 59 |
<details style="color:rgb(128,128,128)">
|
| 60 |
<summary>ceval bench result</summary>
|
| 61 |
|
|
@@ -115,6 +116,81 @@ YModel2 is the most powerful Large Language Model (LLM) trained by SnifferCaptai
|
|
| 115 |
|ceval-valid_urban_and_rural_planner | 2|none | 0|acc |↑ |0.2174|± |0.0615|
|
| 116 |
|ceval-valid_veterinary_medicine | 2|none | 0|acc |↑ |0.2174|± |0.0879|
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
</details>
|
| 119 |
|
| 120 |
以下是模型的问答输出:
|
|
|
|
| 56 |
| Groups |Version|Filter|n-shot| Metric | |Value | |Stderr|
|
| 57 |
|-----------|------:|------|------|--------|---|-----:|---|-----:|
|
| 58 |
|ceval-valid| 2|none | 0|acc |↑ |0.2303|± |0.0115|
|
| 59 |
+
| cmmlu| 1|none | 5|acc |↑ |0.2515|± | 0.004|
|
| 60 |
<details style="color:rgb(128,128,128)">
|
| 61 |
<summary>ceval bench result</summary>
|
| 62 |
|
|
|
|
| 116 |
|ceval-valid_urban_and_rural_planner | 2|none | 0|acc |↑ |0.2174|± |0.0615|
|
| 117 |
|ceval-valid_veterinary_medicine | 2|none | 0|acc |↑ |0.2174|± |0.0879|
|
| 118 |
|
| 119 |
+
</details>
|
| 120 |
+
<details style="color:rgb(128,128,128)">
|
| 121 |
+
<summary>cmmlu bench result</summary>
|
| 122 |
+
|
| 123 |
+
| Tasks |Version|Filter|n-shot| Metric | |Value | |Stderr|
|
| 124 |
+
|----------------------------------------------|------:|------|-----:|--------|---|-----:|---|-----:|
|
| 125 |
+
|cmmlu | 1|none | |acc |↑ |0.2515|± |0.0040|
|
| 126 |
+
| - cmmlu_agronomy | 1|none | 5|acc |↑ |0.2544|± |0.0336|
|
| 127 |
+
| - cmmlu_anatomy | 1|none | 5|acc |↑ |0.2432|± |0.0354|
|
| 128 |
+
| - cmmlu_ancient_chinese | 1|none | 5|acc |↑ |0.2317|± |0.0330|
|
| 129 |
+
| - cmmlu_arts | 1|none | 5|acc |↑ |0.2562|± |0.0346|
|
| 130 |
+
| - cmmlu_astronomy | 1|none | 5|acc |↑ |0.2485|± |0.0337|
|
| 131 |
+
| - cmmlu_business_ethics | 1|none | 5|acc |↑ |0.2344|± |0.0294|
|
| 132 |
+
| - cmmlu_chinese_civil_service_exam | 1|none | 5|acc |↑ |0.2562|± |0.0346|
|
| 133 |
+
| - cmmlu_chinese_driving_rule | 1|none | 5|acc |↑ |0.2519|± |0.0381|
|
| 134 |
+
| - cmmlu_chinese_food_culture | 1|none | 5|acc |↑ |0.2574|± |0.0376|
|
| 135 |
+
| - cmmlu_chinese_foreign_policy | 1|none | 5|acc |↑ |0.2710|± |0.0432|
|
| 136 |
+
| - cmmlu_chinese_history | 1|none | 5|acc |↑ |0.2508|± |0.0242|
|
| 137 |
+
| - cmmlu_chinese_literature | 1|none | 5|acc |↑ |0.2549|± |0.0306|
|
| 138 |
+
| - cmmlu_chinese_teacher_qualification | 1|none | 5|acc |↑ |0.2458|± |0.0323|
|
| 139 |
+
| - cmmlu_clinical_knowledge | 1|none | 5|acc |↑ |0.2532|± |0.0283|
|
| 140 |
+
| - cmmlu_college_actuarial_science | 1|none | 5|acc |↑ |0.3019|± |0.0448|
|
| 141 |
+
| - cmmlu_college_education | 1|none | 5|acc |↑ |0.3178|± |0.0452|
|
| 142 |
+
| - cmmlu_college_engineering_hydrology | 1|none | 5|acc |↑ |0.3019|± |0.0448|
|
| 143 |
+
| - cmmlu_college_law | 1|none | 5|acc |↑ |0.1667|± |0.0360|
|
| 144 |
+
| - cmmlu_college_mathematics | 1|none | 5|acc |↑ |0.2381|± |0.0418|
|
| 145 |
+
| - cmmlu_college_medical_statistics | 1|none | 5|acc |↑ |0.2547|± |0.0425|
|
| 146 |
+
| - cmmlu_college_medicine | 1|none | 5|acc |↑ |0.2234|± |0.0253|
|
| 147 |
+
| - cmmlu_computer_science | 1|none | 5|acc |↑ |0.2500|± |0.0304|
|
| 148 |
+
| - cmmlu_computer_security | 1|none | 5|acc |↑ |0.2632|± |0.0338|
|
| 149 |
+
| - cmmlu_conceptual_physics | 1|none | 5|acc |↑ |0.2721|± |0.0368|
|
| 150 |
+
| - cmmlu_construction_project_management | 1|none | 5|acc |↑ |0.2230|± |0.0354|
|
| 151 |
+
| - cmmlu_economics | 1|none | 5|acc |↑ |0.2264|± |0.0333|
|
| 152 |
+
| - cmmlu_education | 1|none | 5|acc |↑ |0.2454|± |0.0338|
|
| 153 |
+
| - cmmlu_electrical_engineering | 1|none | 5|acc |↑ |0.2500|± |0.0331|
|
| 154 |
+
| - cmmlu_elementary_chinese | 1|none | 5|acc |↑ |0.2778|± |0.0283|
|
| 155 |
+
| - cmmlu_elementary_commonsense | 1|none | 5|acc |↑ |0.2273|± |0.0299|
|
| 156 |
+
| - cmmlu_elementary_information_and_technology| 1|none | 5|acc |↑ |0.2731|± |0.0289|
|
| 157 |
+
| - cmmlu_elementary_mathematics | 1|none | 5|acc |↑ |0.2304|± |0.0278|
|
| 158 |
+
| - cmmlu_ethnology | 1|none | 5|acc |↑ |0.2519|± |0.0375|
|
| 159 |
+
| - cmmlu_food_science | 1|none | 5|acc |↑ |0.2448|± |0.0361|
|
| 160 |
+
| - cmmlu_genetics | 1|none | 5|acc |↑ |0.2443|± |0.0325|
|
| 161 |
+
| - cmmlu_global_facts | 1|none | 5|acc |↑ |0.2617|± |0.0361|
|
| 162 |
+
| - cmmlu_high_school_biology | 1|none | 5|acc |↑ |0.2485|± |0.0333|
|
| 163 |
+
| - cmmlu_high_school_chemistry | 1|none | 5|acc |↑ |0.2273|± |0.0366|
|
| 164 |
+
| - cmmlu_high_school_geography | 1|none | 5|acc |↑ |0.2542|± |0.0403|
|
| 165 |
+
| - cmmlu_high_school_mathematics | 1|none | 5|acc |↑ |0.2500|± |0.0339|
|
| 166 |
+
| - cmmlu_high_school_physics | 1|none | 5|acc |↑ |0.2545|± |0.0417|
|
| 167 |
+
| - cmmlu_high_school_politics | 1|none | 5|acc |↑ |0.2378|± |0.0357|
|
| 168 |
+
| - cmmlu_human_sexuality | 1|none | 5|acc |↑ |0.2619|± |0.0393|
|
| 169 |
+
| - cmmlu_international_law | 1|none | 5|acc |↑ |0.2432|± |0.0316|
|
| 170 |
+
| - cmmlu_journalism | 1|none | 5|acc |↑ |0.2442|± |0.0329|
|
| 171 |
+
| - cmmlu_jurisprudence | 1|none | 5|acc |↑ |0.2506|± |0.0214|
|
| 172 |
+
| - cmmlu_legal_and_moral_basis | 1|none | 5|acc |↑ |0.2290|± |0.0288|
|
| 173 |
+
| - cmmlu_logical | 1|none | 5|acc |↑ |0.2602|± |0.0397|
|
| 174 |
+
| - cmmlu_machine_learning | 1|none | 5|acc |↑ |0.2623|± |0.0400|
|
| 175 |
+
| - cmmlu_management | 1|none | 5|acc |↑ |0.2667|± |0.0306|
|
| 176 |
+
| - cmmlu_marketing | 1|none | 5|acc |↑ |0.2722|± |0.0333|
|
| 177 |
+
| - cmmlu_marxist_theory | 1|none | 5|acc |↑ |0.2434|± |0.0313|
|
| 178 |
+
| - cmmlu_modern_chinese | 1|none | 5|acc |↑ |0.2500|± |0.0404|
|
| 179 |
+
| - cmmlu_nutrition | 1|none | 5|acc |↑ |0.2138|± |0.0342|
|
| 180 |
+
| - cmmlu_philosophy | 1|none | 5|acc |↑ |0.2476|± |0.0423|
|
| 181 |
+
| - cmmlu_professional_accounting | 1|none | 5|acc |↑ |0.2571|± |0.0331|
|
| 182 |
+
| - cmmlu_professional_law | 1|none | 5|acc |↑ |0.2749|± |0.0308|
|
| 183 |
+
| - cmmlu_professional_medicine | 1|none | 5|acc |↑ |0.2553|± |0.0225|
|
| 184 |
+
| - cmmlu_professional_psychology | 1|none | 5|acc |↑ |0.2500|± |0.0285|
|
| 185 |
+
| - cmmlu_public_relations | 1|none | 5|acc |↑ |0.2529|± |0.0330|
|
| 186 |
+
| - cmmlu_security_study | 1|none | 5|acc |↑ |0.2370|± |0.0367|
|
| 187 |
+
| - cmmlu_sociology | 1|none | 5|acc |↑ |0.2699|± |0.0296|
|
| 188 |
+
| - cmmlu_sports_science | 1|none | 5|acc |↑ |0.2485|± |0.0337|
|
| 189 |
+
| - cmmlu_traditional_chinese_medicine | 1|none | 5|acc |↑ |0.2595|± |0.0323|
|
| 190 |
+
| - cmmlu_virology | 1|none | 5|acc |↑ |0.2485|± |0.0333|
|
| 191 |
+
| - cmmlu_world_history | 1|none | 5|acc |↑ |0.2857|± |0.0357|
|
| 192 |
+
| - cmmlu_world_religions | 1|none | 5|acc |↑ |0.2625|± |0.0349|
|
| 193 |
+
|
| 194 |
</details>
|
| 195 |
|
| 196 |
以下是模型的问答输出:
|