Update README.md
Browse files
README.md
CHANGED
|
@@ -138,6 +138,80 @@ hf (pretrained=fblgit/LUNA-SOLARkrautLM-Instruct), gen_kwargs: (), limit: None,
|
|
| 138 |
| | |none | 5|ter |63.9997|± |0.4591|
|
| 139 |
| | |none | 5|chrf |51.1399|± |0.3978|
|
| 140 |
|xnli_de |Yaml |none | 5|acc | 0.4703|± |0.0100|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
```
|
| 142 |
|
| 143 |
## Disclaimer
|
|
|
|
| 138 |
| | |none | 5|ter |63.9997|± |0.4591|
|
| 139 |
| | |none | 5|chrf |51.1399|± |0.3978|
|
| 140 |
|xnli_de |Yaml |none | 5|acc | 0.4703|± |0.0100|
|
| 141 |
+
|
| 142 |
+
hf (pretrained=fblgit/LUNA-SOLARkrautLM-Instruct,dtype=float16), gen_kwargs: (), limit: None, num_fewshot: 5, batch_size: auto (16)
|
| 143 |
+
| Tasks |Version|Filter|n-shot|Metric|Value | |Stderr|
|
| 144 |
+
|---------------------------------------|-------|------|-----:|------|-----:|---|-----:|
|
| 145 |
+
|mmlu |N/A |none | 0|acc |0.6461|± |0.1215|
|
| 146 |
+
| - humanities |N/A |none | 5|acc |0.5960|± |0.1200|
|
| 147 |
+
| - formal_logic |Yaml |none | 5|acc |0.4683|± |0.0446|
|
| 148 |
+
| - high_school_european_history |Yaml |none | 5|acc |0.8121|± |0.0305|
|
| 149 |
+
| - high_school_us_history |Yaml |none | 5|acc |0.8480|± |0.0252|
|
| 150 |
+
| - high_school_world_history |Yaml |none | 5|acc |0.8312|± |0.0244|
|
| 151 |
+
| - international_law |Yaml |none | 5|acc |0.7851|± |0.0375|
|
| 152 |
+
| - jurisprudence |Yaml |none | 5|acc |0.7685|± |0.0408|
|
| 153 |
+
| - logical_fallacies |Yaml |none | 5|acc |0.7423|± |0.0344|
|
| 154 |
+
| - moral_disputes |Yaml |none | 5|acc |0.7283|± |0.0239|
|
| 155 |
+
| - moral_scenarios |Yaml |none | 5|acc |0.3899|± |0.0163|
|
| 156 |
+
| - philosophy |Yaml |none | 5|acc |0.7074|± |0.0258|
|
| 157 |
+
| - prehistory |Yaml |none | 5|acc |0.7716|± |0.0234|
|
| 158 |
+
| - professional_law |Yaml |none | 5|acc |0.4824|± |0.0128|
|
| 159 |
+
| - world_religions |Yaml |none | 5|acc |0.7661|± |0.0325|
|
| 160 |
+
| - other |N/A |none | 5|acc |0.7097|± |0.0900|
|
| 161 |
+
| - business_ethics |Yaml |none | 5|acc |0.7700|± |0.0423|
|
| 162 |
+
| - clinical_knowledge |Yaml |none | 5|acc |0.6792|± |0.0287|
|
| 163 |
+
| - college_medicine |Yaml |none | 5|acc |0.6647|± |0.0360|
|
| 164 |
+
| - global_facts |Yaml |none | 5|acc |0.3600|± |0.0482|
|
| 165 |
+
| - human_aging |Yaml |none | 5|acc |0.6861|± |0.0311|
|
| 166 |
+
| - management |Yaml |none | 5|acc |0.8350|± |0.0368|
|
| 167 |
+
| - marketing |Yaml |none | 5|acc |0.8504|± |0.0234|
|
| 168 |
+
| - medical_genetics |Yaml |none | 5|acc |0.6700|± |0.0473|
|
| 169 |
+
| - miscellaneous |Yaml |none | 5|acc |0.7893|± |0.0146|
|
| 170 |
+
| - nutrition |Yaml |none | 5|acc |0.7549|± |0.0246|
|
| 171 |
+
| - professional_accounting |Yaml |none | 5|acc |0.5213|± |0.0298|
|
| 172 |
+
| - professional_medicine |Yaml |none | 5|acc |0.7353|± |0.0268|
|
| 173 |
+
| - virology |Yaml |none | 5|acc |0.5783|± |0.0384|
|
| 174 |
+
| - social_sciences |N/A |none | 5|acc |0.7501|± |0.0684|
|
| 175 |
+
| - econometrics |Yaml |none | 5|acc |0.5175|± |0.0470|
|
| 176 |
+
| - high_school_geography |Yaml |none | 5|acc |0.8485|± |0.0255|
|
| 177 |
+
| - high_school_government_and_politics|Yaml |none | 5|acc |0.8912|± |0.0225|
|
| 178 |
+
| - high_school_macroeconomics |Yaml |none | 5|acc |0.6615|± |0.0240|
|
| 179 |
+
| - high_school_microeconomics |Yaml |none | 5|acc |0.7311|± |0.0288|
|
| 180 |
+
| - high_school_psychology |Yaml |none | 5|acc |0.8385|± |0.0158|
|
| 181 |
+
| - human_sexuality |Yaml |none | 5|acc |0.7023|± |0.0401|
|
| 182 |
+
| - professional_psychology |Yaml |none | 5|acc |0.6683|± |0.0190|
|
| 183 |
+
| - public_relations |Yaml |none | 5|acc |0.6909|± |0.0443|
|
| 184 |
+
| - security_studies |Yaml |none | 5|acc |0.7633|± |0.0272|
|
| 185 |
+
| - sociology |Yaml |none | 5|acc |0.8358|± |0.0262|
|
| 186 |
+
| - us_foreign_policy |Yaml |none | 5|acc |0.8800|± |0.0327|
|
| 187 |
+
| - stem |N/A |none | 5|acc |0.5569|± |0.1360|
|
| 188 |
+
| - abstract_algebra |Yaml |none | 5|acc |0.3800|± |0.0488|
|
| 189 |
+
| - anatomy |Yaml |none | 5|acc |0.6148|± |0.0420|
|
| 190 |
+
| - astronomy |Yaml |none | 5|acc |0.7237|± |0.0364|
|
| 191 |
+
| - college_biology |Yaml |none | 5|acc |0.7708|± |0.0351|
|
| 192 |
+
| - college_chemistry |Yaml |none | 5|acc |0.4600|± |0.0501|
|
| 193 |
+
| - college_computer_science |Yaml |none | 5|acc |0.5400|± |0.0501|
|
| 194 |
+
| - college_mathematics |Yaml |none | 5|acc |0.2700|± |0.0446|
|
| 195 |
+
| - college_physics |Yaml |none | 5|acc |0.3333|± |0.0469|
|
| 196 |
+
| - computer_security |Yaml |none | 5|acc |0.7300|± |0.0446|
|
| 197 |
+
| - conceptual_physics |Yaml |none | 5|acc |0.6213|± |0.0317|
|
| 198 |
+
| - electrical_engineering |Yaml |none | 5|acc |0.6276|± |0.0403|
|
| 199 |
+
| - elementary_mathematics |Yaml |none | 5|acc |0.4788|± |0.0257|
|
| 200 |
+
| - high_school_biology |Yaml |none | 5|acc |0.8065|± |0.0225|
|
| 201 |
+
| - high_school_chemistry |Yaml |none | 5|acc |0.5123|± |0.0352|
|
| 202 |
+
| - high_school_computer_science |Yaml |none | 5|acc |0.7000|± |0.0461|
|
| 203 |
+
| - high_school_mathematics |Yaml |none | 5|acc |0.3889|± |0.0297|
|
| 204 |
+
| - high_school_physics |Yaml |none | 5|acc |0.3576|± |0.0391|
|
| 205 |
+
| - high_school_statistics |Yaml |none | 5|acc |0.5926|± |0.0335|
|
| 206 |
+
| - machine_learning |Yaml |none | 5|acc |0.4554|± |0.0473|
|
| 207 |
+
|
| 208 |
+
| Groups |Version|Filter|n-shot|Metric|Value | |Stderr|
|
| 209 |
+
|------------------|-------|------|-----:|------|-----:|---|-----:|
|
| 210 |
+
|mmlu |N/A |none | 0|acc |0.6461|± |0.1215|
|
| 211 |
+
| - humanities |N/A |none | 5|acc |0.5960|± |0.1200|
|
| 212 |
+
| - other |N/A |none | 5|acc |0.7097|± |0.0900|
|
| 213 |
+
| - social_sciences|N/A |none | 5|acc |0.7501|± |0.0684|
|
| 214 |
+
| - stem |N/A |none | 5|acc |0.5569|± |0.1360|
|
| 215 |
```
|
| 216 |
|
| 217 |
## Disclaimer
|