Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +19 -0
- cached_results/all_results.csv +7 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/metrics.pkl +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/abstract_algebra/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/anatomy/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/astronomy/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/business_ethics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/clinical_knowledge/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_biology/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_chemistry/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_computer_science/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_mathematics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_medicine/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_physics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/computer_security/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/conceptual_physics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/econometrics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/electrical_engineering/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/elementary_mathematics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/formal_logic/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/global_facts/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_biology/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_chemistry/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_computer_science/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_european_history/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_geography/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_government_and_politics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_macroeconomics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_mathematics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_microeconomics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_physics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_psychology/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_statistics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_us_history/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_world_history/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/human_aging/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/human_sexuality/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/international_law/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/jurisprudence/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/logical_fallacies/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/machine_learning/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/management/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/marketing/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/medical_genetics/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/miscellaneous/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/moral_disputes/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/moral_scenarios/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/nutrition/classifier_data.bin +3 -0
- cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/philosophy/classifier_data.bin +3 -0
.gitattributes
CHANGED
|
@@ -52,3 +52,22 @@ llama2:13b-chat/all_20k_uniform_2choice/choice/probability/wandb/offline-run-202
|
|
| 52 |
llama2:13b-chat/winogrande/choice/correctness/wandb/offline-run-20250309_172205-uagejaxj/run-uagejaxj.wandb filter=lfs diff=lfs merge=lfs -text
|
| 53 |
llama2:13b-chat/winogrande/choice/probabilities/checkpoint-10000/eval_winogrande_probabilities/wandb/offline-run-20250306_225838-4g5riji6/run-4g5riji6.wandb filter=lfs diff=lfs merge=lfs -text
|
| 54 |
llama2:13b-chat/winogrande/choice/probability/wandb/offline-run-20250309_172801-ne6ggu1n/run-ne6ggu1n.wandb filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
llama2:13b-chat/winogrande/choice/correctness/wandb/offline-run-20250309_172205-uagejaxj/run-uagejaxj.wandb filter=lfs diff=lfs merge=lfs -text
|
| 53 |
llama2:13b-chat/winogrande/choice/probabilities/checkpoint-10000/eval_winogrande_probabilities/wandb/offline-run-20250306_225838-4g5riji6/run-4g5riji6.wandb filter=lfs diff=lfs merge=lfs -text
|
| 54 |
llama2:13b-chat/winogrande/choice/probability/wandb/offline-run-20250309_172801-ne6ggu1n/run-ne6ggu1n.wandb filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/wandb/offline-run-20250308_234315-132ixfh6/run-132ixfh6.wandb filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/wandb/offline-run-20250309_001704-olsr0k56/run-olsr0k56.wandb filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_211847-wpm44dvi/run-wpm44dvi.wandb filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_212521-bkazmwmf/run-bkazmwmf.wandb filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_213403-pp1ltnvm/run-pp1ltnvm.wandb filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_214217-9zyowpty/run-9zyowpty.wandb filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_214844-tmkoufpd/run-tmkoufpd.wandb filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_221649-yxmxbw23/run-yxmxbw23.wandb filter=lfs diff=lfs merge=lfs -text
|
| 63 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_222015-e0wgt1xk/run-e0wgt1xk.wandb filter=lfs diff=lfs merge=lfs -text
|
| 64 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_222459-49mgyijh/run-49mgyijh.wandb filter=lfs diff=lfs merge=lfs -text
|
| 65 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_233926-lvxg4rg8/run-lvxg4rg8.wandb filter=lfs diff=lfs merge=lfs -text
|
| 66 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_234031-htonl59j/run-htonl59j.wandb filter=lfs diff=lfs merge=lfs -text
|
| 67 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250308_234033-nuxm0lk5/run-nuxm0lk5.wandb filter=lfs diff=lfs merge=lfs -text
|
| 68 |
+
cached_results/llama2:13b-chat/all_20k_uniform/choice/probability/wandb/offline-run-20250309_001852-ojcexm6h/run-ojcexm6h.wandb filter=lfs diff=lfs merge=lfs -text
|
| 69 |
+
cached_results/llama2:13b-chat/all_20k_uniform_2choice/choice/correctness/wandb/offline-run-20250313_211122-w5ht7ylx/run-w5ht7ylx.wandb filter=lfs diff=lfs merge=lfs -text
|
| 70 |
+
cached_results/llama2:13b-chat/all_20k_uniform_2choice/choice/probability/wandb/offline-run-20250313_211206-7l6msfdp/run-7l6msfdp.wandb filter=lfs diff=lfs merge=lfs -text
|
| 71 |
+
cached_results/llama2:13b-chat/winogrande/choice/correctness/wandb/offline-run-20250309_172205-uagejaxj/run-uagejaxj.wandb filter=lfs diff=lfs merge=lfs -text
|
| 72 |
+
cached_results/llama2:13b-chat/winogrande/choice/probabilities/checkpoint-10000/eval_winogrande_probabilities/wandb/offline-run-20250306_225838-4g5riji6/run-4g5riji6.wandb filter=lfs diff=lfs merge=lfs -text
|
| 73 |
+
cached_results/llama2:13b-chat/winogrande/choice/probability/wandb/offline-run-20250309_172801-ne6ggu1n/run-ne6ggu1n.wandb filter=lfs diff=lfs merge=lfs -text
|
cached_results/all_results.csv
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_name,dataset,training_target,eval_dataset,eval_split,eval_dataset_len,qa_accuracy,correctness_accuracy,correctness_auroc,correctness_macro-f1,ece,probability_accuracy,probability_macro-f1,entropy_accuracy,entropy_macro-f1
|
| 2 |
+
llama2:13b-chat,all_20k_uniform_2choice,correctness,all_20k_uniform_2choice,train,16400,0.2821,0.9179,0.9733,0.8996,0.0044,,,,
|
| 3 |
+
llama2:13b-chat,all_20k_uniform_2choice,correctness,all_20k_uniform_2choice,validation,2000,0.1275,0.969,0.9924,0.9303,0.0081,,,,
|
| 4 |
+
llama2:13b-chat,all_20k_uniform_2choice,probability,all_20k_uniform_2choice,train,16400,,,,,,0.8552,0.5829,,
|
| 5 |
+
llama2:13b-chat,all_20k_uniform_2choice,probability,all_20k_uniform_2choice,validation,2000,,,,,,0.9425,0.5201,,
|
| 6 |
+
llama2:13b-chat,all_20k_uniform_2choice,entropy,all_20k_uniform_2choice,train,16400,,,,,,,,0.3876,0.0694
|
| 7 |
+
llama2:13b-chat,all_20k_uniform_2choice,entropy,all_20k_uniform_2choice,validation,2000,,,,,,,,0.624,0.0786
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/metrics.pkl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c00627f03fa8bd0be4e55ec186f60a082d4d28770c2fd3a39760e802b732559c
|
| 3 |
+
size 16434
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/abstract_algebra/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87dc37bde5866b5485dc34914d375f8ce0449a4bb1a796e236111b8ac7976411
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/anatomy/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4592496399bdea0453b1c6376e32035f42d7ec2cc9c73445c679db89ed44f48
|
| 3 |
+
size 3144
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/astronomy/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cd73a10f23c310fbd05e7d3dc5ed1acad4e384f20553bacd2f910f8ef36314f1
|
| 3 |
+
size 3336
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/business_ethics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cf6adb48b1e63660c5fe303cffcad8449124480eda4f03d4a7562fc24e844302
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:97739fcad2bd3a805d0d1ecba1b4b8fb5ce4c4c76aabc6c9a37f2f4d0f8b423b
|
| 3 |
+
size 169992
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/clinical_knowledge/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f74e5eedaae6587d3fb8ab16580f383dc3c47aa4d97ebfb12cc5b408579dac5e
|
| 3 |
+
size 4680
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_biology/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:917026c7ffa6fb5b5dfcc40b9f1f2eeb25077681d2ea82ebd94b451f9df43bac
|
| 3 |
+
size 3272
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_chemistry/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bba67daae2e2ec7deae3484d25caa435ccd8b5aa19fc737912c2bbad9bf6435d
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_computer_science/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:29715a9b84fadb26bebcbc4b6de3ebc80962c76eb18540262a284be1db7a0432
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_mathematics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c08c73c6936d6cd979d1dfec4f7db4559977b95f75209b12bcb8a083387fb865
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_medicine/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:35f6a3fda868509544b7299e0d5da3afcffd7fc68b02b7599ac23d72073a0067
|
| 3 |
+
size 3528
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/college_physics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07ea1f0946893a275c06c02267194903812f0343e04bbe6fd0219213709389b2
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/computer_security/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f9bd91180c72b4a6867c46cb69cce759038624480a743547c2ae37ffed4f75a0
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/conceptual_physics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31526f17a11ffee6251d563bb8fd70dd103980d1efd8eb9aef3baea3ce779f11
|
| 3 |
+
size 4296
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/econometrics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab851a901babae06fb2e5769a14a9a348a88ac0719ad201e968f31ccc1438e2
|
| 3 |
+
size 2888
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/electrical_engineering/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6d6757f68e0085a6bc7a1291b321b40e8a00d74c770f766116afc09936d8bcd
|
| 3 |
+
size 3272
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/elementary_mathematics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a67e95444862824c2fe1523f06c1f57021d5fa0b4656fd7f21624305d698cbd8
|
| 3 |
+
size 6024
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/formal_logic/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e2df0f1e65fb4ccf305498696d444da6430ff809121c058206daa071f80517e1
|
| 3 |
+
size 2952
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/global_facts/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6dc5153a9b6400372ea9dcf3b91bfdbc4badf16be04890f82c9d02768a780139
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_biology/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:738c9bded47d9e8849fc2c264e301459884daa553c23b678e5d887241b424c8d
|
| 3 |
+
size 5192
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_chemistry/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e22c6b641873cf6e73084f016b143832523092dba59c39b05013b8ef8ac1774
|
| 3 |
+
size 3912
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_computer_science/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e92444be8625f730d37c723c872df7388982d351e3cb5ef54e3132f5e8f99dbc
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_european_history/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dfe40fd53827840b7c2595cf07a789b5e0c6af71c05af03d273535cc620dbf8c
|
| 3 |
+
size 3464
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_geography/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:123009f94874485a5d3d2d5d28d9fbd4525d3e23167de79e9dc7e55601a3baa4
|
| 3 |
+
size 3848
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_government_and_politics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dc4e53078048d5d0e6f0d97746177ee3b41c8d61e1de5ee353b50bf048ce8ed
|
| 3 |
+
size 3848
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_macroeconomics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc954fb0ae0125f9e53210dcf6aa9588a6faaf3d2f578e4cd0996f979ceb39bd
|
| 3 |
+
size 6152
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_mathematics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:73aa5d0fbc7ee3bfabe074ccc3098b2c910458740a27bce7830e7a75e574e025
|
| 3 |
+
size 4680
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_microeconomics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aab98830d1b94a5b0d22f7c6cd4450a11a3f9a97876efe0c178fc632db843536
|
| 3 |
+
size 4296
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_physics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f839e3534d215c73ab1d96ec64089668da50b71fad93fb82e967dce60e5e5a27
|
| 3 |
+
size 3336
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_psychology/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c1b95441337fcfa34c0df35b0b4ffbfd00275998076dbce2b5e9b0b5f5e72d6
|
| 3 |
+
size 8072
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_statistics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ec0d35503b2a97289200556ced0d00e31b564c2487c0a2151caf9d17519eeb8f
|
| 3 |
+
size 4104
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_us_history/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca750468ce202322972fd5ff4b042012f48a56b43fb3f533cd3660bc91b81aa7
|
| 3 |
+
size 3912
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/high_school_world_history/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c79c8fa950cfedd9bb328db0ea75a56222daa8cc4a4f103b49028edfa06032fc
|
| 3 |
+
size 4296
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/human_aging/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce14f5f15cfffb0f3907d0240b1bb06cf077ff23ab6cbad6ef2ebcd44d5a7085
|
| 3 |
+
size 4232
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/human_sexuality/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:910c9b1df419cc05a369f71264dd620b029810525256f23cc5949a2edf6457de
|
| 3 |
+
size 3080
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/international_law/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6941627d644174d7b544b74feed88ff96cad3b19ed299dac66ea401abc61a1b
|
| 3 |
+
size 2952
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/jurisprudence/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:03355d21dde566f2ffba12b07228a0ff2e79c8ba943bbb0ff6b61c7b85b4b627
|
| 3 |
+
size 2760
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/logical_fallacies/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d7916728a4c559035780b918d269275b09af2530391e23de98300a3c62e8cd1
|
| 3 |
+
size 3464
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/machine_learning/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:06af6d411b6c4e89a3dc30d4b4a5c75c9b8e16e6eed5957b6d7147b4f23581ba
|
| 3 |
+
size 2888
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/management/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8b2be6ca7b9ba67d574bccc21a4846244cf8924ab9aa2e94132cc90ba86a16b
|
| 3 |
+
size 2760
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/marketing/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ad2ff25f9f14cf08bad217c7bb039a08d323bfcde730c4ee0dde2b4e3a59b30
|
| 3 |
+
size 4296
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/medical_genetics/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da37f8a98ab714620f7b7af98c177cdbb394cfbb6e9fa5fab893bd1cbccb2110
|
| 3 |
+
size 2696
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/miscellaneous/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:532d0896829fab2a9abf160191cfc5038f271b2ef880d7b818d11fbb8657a7a8
|
| 3 |
+
size 10952
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/moral_disputes/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c2efc4f51417997c89b9d709b313acfb04fcf0b6544c8bb40204aeb869f23646
|
| 3 |
+
size 5640
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/moral_scenarios/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9a26bfb373dba83132599bea1334df069b73ed33a03477c1c940d130a1a67a37
|
| 3 |
+
size 12296
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/nutrition/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6b68170a1feff09f8a065996b8ab7d9f025770dc4df1e5560ab24c6f76f1635c
|
| 3 |
+
size 5192
|
cached_results/llama2:13b-chat/all_20k_uniform/choice/correctness/eval_dataset-mmlu_all/test/philosophy/classifier_data.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:be5efe3b5a46dd3a540a78f3ca266cd2ccba8f254087c92cd4f50217a85c2c49
|
| 3 |
+
size 5256
|