dataset
Browse files- .gitattributes +2 -0
- dataset/Llama-3-tokenized/commonsense_170k.jsonl +3 -0
- dataset/raw/AQuA/AQuA.json +3 -0
- dataset/raw/AQuA/aqua_1.json +3 -0
- dataset/raw/AQuA/test.json +3 -0
- dataset/raw/ARC-Challenge/test.json +3 -0
- dataset/raw/ARC-Challenge/train.json +3 -0
- dataset/raw/ARC-Easy/test.json +3 -0
- dataset/raw/ARC-Easy/train.json +3 -0
- dataset/raw/AddSub/AddSub.json +3 -0
- dataset/raw/AddSub/addsub_1.json +3 -0
- dataset/raw/AddSub/test.json +3 -0
- dataset/raw/MultiArith/MultiArith.json +3 -0
- dataset/raw/MultiArith/multiarith_1.json +3 -0
- dataset/raw/MultiArith/test.json +3 -0
- dataset/raw/SVAMP/SVAMP.json +3 -0
- dataset/raw/SVAMP/svamp_1.json +3 -0
- dataset/raw/SVAMP/test.json +3 -0
- dataset/raw/SingleEq/SingleEq.json +3 -0
- dataset/raw/SingleEq/singleeq_1.json +3 -0
- dataset/raw/SingleEq/test.json +3 -0
- dataset/raw/boolq/test.json +3 -0
- dataset/raw/boolq/train.json +3 -0
- dataset/raw/commonsense_170k.json +3 -0
- dataset/raw/gsm8k/gsm8k.json +3 -0
- dataset/raw/gsm8k/gsm8k_1.json +3 -0
- dataset/raw/gsm8k/test.json +3 -0
- dataset/raw/hellaswag/test.json +3 -0
- dataset/raw/hellaswag/train.json +3 -0
- dataset/raw/mathqa/test.json +3 -0
- dataset/raw/mawps/data_process.py +25 -0
- dataset/raw/mawps/test.json +3 -0
- dataset/raw/mawps/testset.json +3 -0
- dataset/raw/mawps/trainset.json +3 -0
- dataset/raw/mawps/validset.json +3 -0
- dataset/raw/openbookqa/test.json +3 -0
- dataset/raw/openbookqa/train.json +3 -0
- dataset/raw/piqa/test.json +3 -0
- dataset/raw/piqa/train.json +3 -0
- dataset/raw/social_i_qa/test.json +3 -0
- dataset/raw/social_i_qa/train.json +3 -0
- dataset/raw/winogrande/test.json +3 -0
- dataset/raw/winogrande/train.json +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
*.jsonl filter=lfs diff=lfs merge=lfs -text
|
dataset/Llama-3-tokenized/commonsense_170k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6870283ddb08b92cd1f6c24e234094e4f5ffa76afeb6bfd1f425e55832659a47
|
| 3 |
+
size 186828877
|
dataset/raw/AQuA/AQuA.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:79273daea844d23ab308107a6aba32c7c65a21133d59ee3e1c5c571301d11ce4
|
| 3 |
+
size 134511
|
dataset/raw/AQuA/aqua_1.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6c963fa7a491ad9461c9db6dd29f41a849dc343b126b75fabeaf78ef4119032
|
| 3 |
+
size 147155
|
dataset/raw/AQuA/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:de677f5f0139340009eb01f17c0db781b4161912cd1f4efa77292f2fcf3478ee
|
| 3 |
+
size 152063
|
dataset/raw/ARC-Challenge/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a89ce368ae01a71d268d9851686738b2a0a1d84271116bab49a1dd2b48ecad7
|
| 3 |
+
size 622510
|
dataset/raw/ARC-Challenge/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a9debff28aafed86a08211dc3584a3b6cb38edca8c56bb34cd5002ed54aeebfb
|
| 3 |
+
size 585556
|
dataset/raw/ARC-Easy/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f32a84716ca6d57d38728bf80cc8117aecd17445c2cf29a68b571b810eb82dc
|
| 3 |
+
size 1157216
|
dataset/raw/ARC-Easy/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a06cece3dec32f4fb1c1b0fc29dab14be7cf3053b417290dbc6cd845359f5ff
|
| 3 |
+
size 1092324
|
dataset/raw/AddSub/AddSub.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21930fa90385c72b5a15f4b9a4dda27276b889ae9ff44285505cdd9fa0a20df8
|
| 3 |
+
size 131373
|
dataset/raw/AddSub/addsub_1.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:611ccb687f8a077ee2ef4cc08875170d78d5387d519614744b7083190432f820
|
| 3 |
+
size 163834
|
dataset/raw/AddSub/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62a8662b1ad40879953c4df0e61883a749d03aa3fc5e71f1cdb842b816e139ff
|
| 3 |
+
size 170411
|
dataset/raw/MultiArith/MultiArith.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dc9db1b6cc68b1ecc2dec830d15aaf8c9a0932aa31d5ffce88ef3628179d36f4
|
| 3 |
+
size 254012
|
dataset/raw/MultiArith/multiarith_1.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47b21b013079bc0e6dbe2043a7458fcb625f4625fbc43167925f2d7898e2308f
|
| 3 |
+
size 263812
|
dataset/raw/MultiArith/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a40f426807c8a563802c2a5b66bda3bcd1eba9c099038fd1199f0dd57de5fd28
|
| 3 |
+
size 275458
|
dataset/raw/SVAMP/SVAMP.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:33386f96725389c5d78e159a48a2dac7ab4b4fbce23c7d77b7a4951da9fbcd30
|
| 3 |
+
size 344970
|
dataset/raw/SVAMP/svamp_1.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:40b26b72c74b607e0cd3b72224cdbda84e454ddbcff3cea456799d10c87f43c4
|
| 3 |
+
size 462599
|
dataset/raw/SVAMP/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ffed015784f738f317c5603177861d044c4fc94e09211d78b25e96d46c656e2c
|
| 3 |
+
size 459713
|
dataset/raw/SingleEq/SingleEq.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82ec4bc7363d843c23a39df9e5da65095ae7803052e0afff8eb8943cfc88d3b9
|
| 3 |
+
size 141934
|
dataset/raw/SingleEq/singleeq_1.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7383ae492cd06ace7c8ca14bedcfffce724f7ec2949d620f52c4be08b943da4b
|
| 3 |
+
size 202744
|
dataset/raw/SingleEq/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:712b15edb851c97aab42a5de106d94008f94d917366c92537ed5400a8aacb207
|
| 3 |
+
size 212001
|
dataset/raw/boolq/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89df2397cbca81dc31adcc6f7f5b635821aa0565aafde46725b2b4bd39b4dfa9
|
| 3 |
+
size 899151
|
dataset/raw/boolq/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ccbaceebaddc69765e1c4a363632196b81851d303696672165ab59ec4e48c83
|
| 3 |
+
size 2599507
|
dataset/raw/commonsense_170k.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4651a3e707fbe28be03f486f96fcf57f8c38b2173f3e38812003108970793ac8
|
| 3 |
+
size 96529722
|
dataset/raw/gsm8k/gsm8k.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c994d77e5f47cb7ef356839a76a320306fbef6a7df8e31c727b7b639462d98ee
|
| 3 |
+
size 387119
|
dataset/raw/gsm8k/gsm8k_1.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:86be346fb5e7942aa31412ce1c7a85b4787958b7fd07c94f694ca508c4a849d6
|
| 3 |
+
size 799401
|
dataset/raw/gsm8k/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2cc616a1e0b23ea1df29370476365cde71c4e0aa823fb06cc194c5a8a9381abe
|
| 3 |
+
size 824475
|
dataset/raw/hellaswag/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ea7e99795e63a01f2f5e9ddbc4dca964d9ebb22f04201e64d1ac043be51ab61
|
| 3 |
+
size 11099046
|
dataset/raw/hellaswag/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:729aeafa3f600650a92e1c992a888528ac8a1ccd562408736eee66bed2183b1a
|
| 3 |
+
size 43072851
|
dataset/raw/mathqa/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58715cb7aaccc7f3f2994586073800a57a3e44f77d22e62bee7fd7ff8315172c
|
| 3 |
+
size 1018708
|
dataset/raw/mawps/data_process.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
def read_json(file_path):
|
| 4 |
+
with open(file_path, 'r') as file:
|
| 5 |
+
data = json.load(file)
|
| 6 |
+
return data
|
| 7 |
+
|
| 8 |
+
def writer(data, path):
|
| 9 |
+
with open(path, 'w') as file:
|
| 10 |
+
json.dump(data, file, indent=4)
|
| 11 |
+
|
| 12 |
+
test_data_path = "testset.json"
|
| 13 |
+
|
| 14 |
+
test_data = read_json(test_data_path)
|
| 15 |
+
|
| 16 |
+
test_data_processed = []
|
| 17 |
+
for sample in test_data:
|
| 18 |
+
test_data_processed.append({
|
| 19 |
+
"instruction": sample["original_text"],
|
| 20 |
+
"input": "",
|
| 21 |
+
"output": "",
|
| 22 |
+
"answer": str(sample["ans"]),
|
| 23 |
+
})
|
| 24 |
+
|
| 25 |
+
writer(test_data_processed, "test.json")
|
dataset/raw/mawps/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c1a708979cbf2b3df9ac4020baacf3e8e73fcf089e1de7dd1b305b810bfaa15f
|
| 3 |
+
size 60202
|
dataset/raw/mawps/testset.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2de996509d7ee519fcd35d0ffc71892b33f92684e8d716c29b1e71ccfeb0f28b
|
| 3 |
+
size 104880
|
dataset/raw/mawps/trainset.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:42e631004e46e71f5e050bfd6f0af2e27c9d76315cee66cc6412d66a19b47c1a
|
| 3 |
+
size 833527
|
dataset/raw/mawps/validset.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ab20f1b6bd99e2307303a65898715628249896122d44c82971b4ea6e3e44622a
|
| 3 |
+
size 105822
|
dataset/raw/openbookqa/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8479930f7b8a26052c78250280b5465a4016ca7fc806af440d0f37e243f3099e
|
| 3 |
+
size 202545
|
dataset/raw/openbookqa/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1e094eeea01f70c0baa880f37497b26700ae99b243927b9c529984de9b1fe90
|
| 3 |
+
size 1987446
|
dataset/raw/piqa/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3860d11b9b42e590b6b4bbc1643214d373ed0b0d4ac62fcc7bdceb8e044ad141
|
| 3 |
+
size 913157
|
dataset/raw/piqa/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d750ea06e70ce6af746f8315d2952a0a1488520794f934fd1f000286575e5702
|
| 3 |
+
size 8039815
|
dataset/raw/social_i_qa/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:71543f41127451945379d106ffedaef502f1c8770dff6d2f543aca85e3080b1d
|
| 3 |
+
size 843748
|
dataset/raw/social_i_qa/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ca995d6055a6be053bb54b4b4c7abe4cd0323940079071932c1f9129d589b17
|
| 3 |
+
size 14379571
|
dataset/raw/winogrande/test.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6946f5209e7114c65c6d04f8f644c5a061af20d730b1cfe158fff3dc44076fb9
|
| 3 |
+
size 497409
|
dataset/raw/winogrande/train.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:154d4fc3ef6b065aff1b738150f3fbd8d497f8e32d7aaae114991a2ad912ae79
|
| 3 |
+
size 24772666
|