Upload folder using huggingface_hub
Browse files- .gitattributes +26 -0
- hf_data/LaMini_instruction_train.jsonl +3 -0
- hf_data/ScaleQuest_Code_train_157k.jsonl +3 -0
- hf_data/ScaleQuest_Math_train_1m.jsonl +3 -0
- hf_data/Table-GPT_train_13k.jsonl +3 -0
- hf_data/infinity_instruct_3M_train.jsonl +3 -0
- hf_data/infinity_instruct_7M_core_train_1_5m.jsonl +3 -0
- hf_data/infinity_instruct_gen_train_1400k.jsonl +3 -0
- hf_data/infinity_instruct_train_7m.jsonl +3 -0
- hf_data/opc-sft-stage1-filtered_infinity_instruct-1030k.jsonl +3 -0
- hf_data/opc-sft-stage1-largescale_diverse_instruct-2513k.jsonl +3 -0
- hf_data/opc-sft-stage1-realuser_instruct-675k.jsonl +3 -0
- hf_data/opc-sft-stage2-436k.jsonl +3 -0
- hf_data/opencodeinstruct_train_2m.jsonl +3 -0
- hf_data/opencodeinstruct_train_5m.jsonl +3 -0
- hf_data/opencodeinstruct_train_filter_score_eq_1.jsonl +3 -0
- hf_data/opencodeinstruct_train_filter_score_ge_08.jsonl +3 -0
- hf_data/opencodeinstruct_train_filter_score_ge_08_llm_judgement_avg_score_ge_3.jsonl +3 -0
- hf_data/openmathinstruct2_train.jsonl +3 -0
- hf_data/openmathinstruct2_train_1m.jsonl +3 -0
- hf_data/openmathinstruct2_train_2m.jsonl +3 -0
- hf_data/openmathinstruct2_train_5m.jsonl +3 -0
- hf_data/sciRIFF_4096_70k.jsonl +3 -0
- hf_data/script/processor_code_opc_sft.py +29 -0
- hf_data/script/processor_code_opc_sft_stage_1.py +30 -0
- hf_data/script/processor_magpie.py +23 -0
- hf_data/script/processor_open_code_instruct_filter.py +36 -0
- hf_data/script/processor_scalequest_code.py +25 -0
- hf_data/script/processor_scalequest_math.py +25 -0
- hf_data/script/processor_sciriff.py +25 -0
- hf_data/script/processor_smoltalk.py +38 -0
- hf_data/script/processor_table_gpt.py +28 -0
- hf_data/script/processor_tulu.py +25 -0
- hf_data/script/processor_tulu_mixture.py +28 -0
- hf_data/script/processpor_code.py +23 -0
- hf_data/script/processpor_infinity.py +22 -0
- hf_data/script/processpor_infinity_instruct.py +25 -0
- hf_data/script/processpor_lamini.py +22 -0
- hf_data/script/processpor_math.py +22 -0
- hf_data/smoltalk_1100k.jsonl +3 -0
- hf_data/tulu-3-sft-mixture_train_939k.jsonl +3 -0
- hf_data/tulu-3-sft-olmo-2-mixture_train_939k.jsonl +3 -0
- hf_data/tulu-3-sft-personas-instruction-following_30k.jsonl +3 -0
.gitattributes
CHANGED
|
@@ -34,3 +34,29 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
qwen_sft_v1114/training_log.txt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
qwen_sft_v1114/training_log.txt filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
hf_data/LaMini_instruction_train.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
hf_data/ScaleQuest_Code_train_157k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
hf_data/ScaleQuest_Math_train_1m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
hf_data/Table-GPT_train_13k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
hf_data/infinity_instruct_3M_train.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
hf_data/infinity_instruct_7M_core_train_1_5m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
hf_data/infinity_instruct_gen_train_1400k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
hf_data/infinity_instruct_train_7m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
hf_data/opc-sft-stage1-filtered_infinity_instruct-1030k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
hf_data/opc-sft-stage1-largescale_diverse_instruct-2513k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
hf_data/opc-sft-stage1-realuser_instruct-675k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
hf_data/opc-sft-stage2-436k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
hf_data/opencodeinstruct_train_2m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
hf_data/opencodeinstruct_train_5m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
hf_data/opencodeinstruct_train_filter_score_eq_1.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
hf_data/opencodeinstruct_train_filter_score_ge_08.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
hf_data/opencodeinstruct_train_filter_score_ge_08_llm_judgement_avg_score_ge_3.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
hf_data/openmathinstruct2_train.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
hf_data/openmathinstruct2_train_1m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
hf_data/openmathinstruct2_train_2m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
hf_data/openmathinstruct2_train_5m.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 58 |
+
hf_data/sciRIFF_4096_70k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 59 |
+
hf_data/smoltalk_1100k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 60 |
+
hf_data/tulu-3-sft-mixture_train_939k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 61 |
+
hf_data/tulu-3-sft-olmo-2-mixture_train_939k.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 62 |
+
hf_data/tulu-3-sft-personas-instruction-following_30k.jsonl filter=lfs diff=lfs merge=lfs -text
|
hf_data/LaMini_instruction_train.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a170d06468f1e0b75a93138084f28b821cdefe19da547af0bc7093be0057d2e5
|
| 3 |
+
size 1316278839
|
hf_data/ScaleQuest_Code_train_157k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:332960c2672ae70b278a3ee7dc41fe1854a113b94ff8e4d7ceab302d23304f11
|
| 3 |
+
size 393762917
|
hf_data/ScaleQuest_Math_train_1m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8504b7b492431641b23a9aa4e695572dd16836bf3156a94c21645a5361368817
|
| 3 |
+
size 1581059706
|
hf_data/Table-GPT_train_13k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36cb71a030f3a9c98fa114ee7206bcc5f28697fbcf916d0c01e36ef0242ea443
|
| 3 |
+
size 33778304
|
hf_data/infinity_instruct_3M_train.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e870f4c34157f846443cb6adf423b6b183aba52268fb3b7adea9e5f899250601
|
| 3 |
+
size 7970107783
|
hf_data/infinity_instruct_7M_core_train_1_5m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2ede1f8e4a3906c394f000595416dbade9d9a28c34f80337c272fbc46ba6822
|
| 3 |
+
size 3174471022
|
hf_data/infinity_instruct_gen_train_1400k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a003195934389a860dca44da374eb580adcbdbc8763a49a3f940e4991b0fa3e
|
| 3 |
+
size 5782067170
|
hf_data/infinity_instruct_train_7m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:522be9282906a1e539e2c66e2d51f7a044ceaa102346ae402e0e6573d4a83a54
|
| 3 |
+
size 14312887611
|
hf_data/opc-sft-stage1-filtered_infinity_instruct-1030k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8a16827ffcb868bd79b97f6b12757e0ce8d505c3af65fb28c60df743527f0d1
|
| 3 |
+
size 2128437340
|
hf_data/opc-sft-stage1-largescale_diverse_instruct-2513k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:514b8adc9f28e5faf00c7ae5fca1722045526b59e7b8d13c716beb0c93c9e26e
|
| 3 |
+
size 6822344959
|
hf_data/opc-sft-stage1-realuser_instruct-675k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6b11a4f995acb07b86d6679a5aa838d7c79931b2fd4543e2cf42576161a3253
|
| 3 |
+
size 2325200313
|
hf_data/opc-sft-stage2-436k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a1b8bdafd4500d8785803156056de94989af8fa0146109561af3124fab6ed04c
|
| 3 |
+
size 1109723658
|
hf_data/opencodeinstruct_train_2m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:155fd30f5e0f7d329f0e1eddddab41e0bf7fd565c86613bb3215d01a1aa2ffe5
|
| 3 |
+
size 3950774175
|
hf_data/opencodeinstruct_train_5m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff6393709448e0eb0d016ec1a2226f4a1b9e79db89f5a26b1c8f851d7adf6c52
|
| 3 |
+
size 10284863439
|
hf_data/opencodeinstruct_train_filter_score_eq_1.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6a3bf492ae82328be8ae575f84b65198a196918d513216b4b544f4ab6c07a99
|
| 3 |
+
size 2951101476
|
hf_data/opencodeinstruct_train_filter_score_ge_08.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2bf3163adc2bca7d0103dcb17208a2c24d345dbcad8a85ad6e3fc975dff28927
|
| 3 |
+
size 4706279026
|
hf_data/opencodeinstruct_train_filter_score_ge_08_llm_judgement_avg_score_ge_3.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efeb82226ab27a81517d945ad7870ce77c74b06d31daed26429405ba73f3bb00
|
| 3 |
+
size 4671287609
|
hf_data/openmathinstruct2_train.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e070b0f4220feb93e6646caca1b881e2b5d33afcb64da9b30d59af6784b830f
|
| 3 |
+
size 16798025787
|
hf_data/openmathinstruct2_train_1m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cc79dd150daf1d376f2ce2772337d3ffa7ff9ff61d1d825e12bb5d7c21b109fc
|
| 3 |
+
size 1445582798
|
hf_data/openmathinstruct2_train_2m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:36fadca11edb9e5dc3fb9f8a60194d97a4448ac302af0c53fee69605c64785be
|
| 3 |
+
size 2952796652
|
hf_data/openmathinstruct2_train_5m.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ba0e8cbc50a72e232a5e20731edd849c69b87ddef2f331c0ab366c955f2ac5e4
|
| 3 |
+
size 7012994994
|
hf_data/sciRIFF_4096_70k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c771bd27670ff0d9fe0c5c4efd42c1fc945c8f29338c68e955244c13a2e68b7
|
| 3 |
+
size 390602961
|
hf_data/script/processor_code_opc_sft.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
subset = ['educational_instruct', 'evol_instruct', 'mceval_instruct', 'package_instruct']
|
| 9 |
+
output_file = "../opc-sft-stage2-436k.jsonl"
|
| 10 |
+
ix = 0
|
| 11 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 12 |
+
for sub in subset:
|
| 13 |
+
dataset = load_dataset('../opc-sft-stage2', sub, split='train')
|
| 14 |
+
print(f"Converting dataset to jsonl format {sub}")
|
| 15 |
+
|
| 16 |
+
for item in tqdm(dataset):
|
| 17 |
+
# print(item, item)
|
| 18 |
+
# break
|
| 19 |
+
conv = {
|
| 20 |
+
'id': item['seq_id'] if 'seq_id' in item.keys() else f'{sub}-{ix}',
|
| 21 |
+
'conversations': [
|
| 22 |
+
{'from': 'human', 'value': item['instruction']},
|
| 23 |
+
{'from': 'gpt', 'value': item['output']}
|
| 24 |
+
]
|
| 25 |
+
}
|
| 26 |
+
ix += 1
|
| 27 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 28 |
+
|
| 29 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_code_opc_sft_stage_1.py
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
subset = ['filtered_infinity_instruct', 'realuser_instruct', 'largescale_diverse_instruct']
|
| 9 |
+
cnts = ['1030k', '2510k','676k']
|
| 10 |
+
|
| 11 |
+
for cnt, sub in zip(cnts, subset):
|
| 12 |
+
dataset = load_dataset('../opc-sft-stage1', sub, split='train')
|
| 13 |
+
output_file = f"../opc-sft-stage1-{sub}-{cnt}.jsonl"
|
| 14 |
+
print(f"Converting dataset to jsonl format {sub}")
|
| 15 |
+
ix = 0
|
| 16 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 17 |
+
for item in tqdm(dataset):
|
| 18 |
+
# print(item, item)
|
| 19 |
+
# break
|
| 20 |
+
conv = {
|
| 21 |
+
'id': item['seq_id'] if 'seq_id' in item.keys() else f'{sub}-{ix}',
|
| 22 |
+
'conversations': [
|
| 23 |
+
{'from': 'human', 'value': item['instruction']},
|
| 24 |
+
{'from': 'gpt', 'value': item['output']}
|
| 25 |
+
]
|
| 26 |
+
}
|
| 27 |
+
ix += 1
|
| 28 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 29 |
+
|
| 30 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_magpie.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('../Magpie-Qwen2.5-Pro-300K-Filtered', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "../Magpie-Qwen2_5-Pro-300K-Filtered.jsonl"
|
| 10 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 11 |
+
for item in tqdm(dataset):
|
| 12 |
+
# print(item)
|
| 13 |
+
# break
|
| 14 |
+
conv = {
|
| 15 |
+
'id': item['uuid'],
|
| 16 |
+
'conversations': [
|
| 17 |
+
{'from': 'human', 'value': item['instruction']},
|
| 18 |
+
{'from': 'gpt', 'value': item['response']}
|
| 19 |
+
]
|
| 20 |
+
}
|
| 21 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 22 |
+
|
| 23 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_open_code_instruct_filter.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('../OpenCodeInstruct', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "../opencodeinstruct_train_filter_score_ge_08.jsonl"
|
| 10 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 11 |
+
for item in tqdm(dataset):
|
| 12 |
+
|
| 13 |
+
llm_judgement = json.loads(item['llm_judgement'])
|
| 14 |
+
llm_judgement_avg_score = (
|
| 15 |
+
int(llm_judgement['requirement_conformance']['score']) +
|
| 16 |
+
int(llm_judgement['logical_correctness']['score']) +
|
| 17 |
+
int(llm_judgement['requirement_conformance']['score'])
|
| 18 |
+
) / 3
|
| 19 |
+
|
| 20 |
+
# print(item)
|
| 21 |
+
# print(float(item['average_test_score']))
|
| 22 |
+
# print(llm_judgement_avg_score)
|
| 23 |
+
# break
|
| 24 |
+
|
| 25 |
+
if float(item['average_test_score']) >= 0.8:
|
| 26 |
+
conv = {
|
| 27 |
+
'id': item['id'],
|
| 28 |
+
'conversations': [
|
| 29 |
+
{'from': 'human', 'value': item['input']},
|
| 30 |
+
{'from': 'gpt', 'value': item['output']}
|
| 31 |
+
]
|
| 32 |
+
}
|
| 33 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 34 |
+
|
| 35 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
| 36 |
+
|
hf_data/script/processor_scalequest_code.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('../ScaleQuest-Code', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "../ScaleQuest_Code_train_157k.jsonl"
|
| 10 |
+
ix = 0
|
| 11 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 12 |
+
for item in tqdm(dataset):
|
| 13 |
+
# print(item, item)
|
| 14 |
+
# break
|
| 15 |
+
conv = {
|
| 16 |
+
'id': ix,
|
| 17 |
+
'conversations': [
|
| 18 |
+
{'from': 'human', 'value': item['query']},
|
| 19 |
+
{'from': 'gpt', 'value': item['response']}
|
| 20 |
+
]
|
| 21 |
+
}
|
| 22 |
+
ix += 1
|
| 23 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 24 |
+
|
| 25 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_scalequest_math.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('dyyyyyyyy/ScaleQuest-Math', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "../ScaleQuest_Math_train_1m.jsonl"
|
| 10 |
+
ix = 0
|
| 11 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 12 |
+
for item in tqdm(dataset):
|
| 13 |
+
# print(item, item)
|
| 14 |
+
# break
|
| 15 |
+
conv = {
|
| 16 |
+
'id': ix,
|
| 17 |
+
'conversations': [
|
| 18 |
+
{'from': 'human', 'value': item['query']},
|
| 19 |
+
{'from': 'gpt', 'value': item['response']}
|
| 20 |
+
]
|
| 21 |
+
}
|
| 22 |
+
ix += 1
|
| 23 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 24 |
+
|
| 25 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_sciriff.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
dataset = load_dataset('../SciRIFF', '4096', split='train')
|
| 9 |
+
|
| 10 |
+
print("Converting dataset to jsonl format")
|
| 11 |
+
output_file = "../sciRIFF_4096_70k.jsonl"
|
| 12 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 13 |
+
for item in tqdm(dataset):
|
| 14 |
+
# print(item, item)
|
| 15 |
+
# break
|
| 16 |
+
conv = {
|
| 17 |
+
'id': item['_instance_id'],
|
| 18 |
+
'conversations': [
|
| 19 |
+
{'from': 'human', 'value': item['input']},
|
| 20 |
+
{'from': 'gpt', 'value': item['output']}
|
| 21 |
+
]
|
| 22 |
+
}
|
| 23 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 24 |
+
|
| 25 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_smoltalk.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
dataset = load_dataset('../smoltalk/', 'all', split='train')
|
| 9 |
+
|
| 10 |
+
print("Converting dataset to jsonl format")
|
| 11 |
+
output_file = "../smoltalk_1100k.jsonl"
|
| 12 |
+
ix = 0
|
| 13 |
+
roles = {
|
| 14 |
+
'assistant': 'gpt',
|
| 15 |
+
'user': 'human',
|
| 16 |
+
'system': 'system'
|
| 17 |
+
}
|
| 18 |
+
|
| 19 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 20 |
+
for item in tqdm(dataset):
|
| 21 |
+
conversations = []
|
| 22 |
+
for msg in item['messages']:
|
| 23 |
+
from_str = roles[msg['role']]
|
| 24 |
+
value = msg['content']
|
| 25 |
+
conversations.append(
|
| 26 |
+
{
|
| 27 |
+
'from': from_str,
|
| 28 |
+
'value': value
|
| 29 |
+
}
|
| 30 |
+
)
|
| 31 |
+
conv = {
|
| 32 |
+
'id': ix,
|
| 33 |
+
'conversations': conversations
|
| 34 |
+
}
|
| 35 |
+
ix += 1
|
| 36 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 37 |
+
|
| 38 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_table_gpt.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
dataset = load_dataset('../Table-GPT', 'All', split='train')
|
| 9 |
+
|
| 10 |
+
print("Converting dataset to jsonl format")
|
| 11 |
+
output_file = "../Table-GPT_train_13k.jsonl"
|
| 12 |
+
ix = 0
|
| 13 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 14 |
+
for item in tqdm(dataset):
|
| 15 |
+
# print(item, item)
|
| 16 |
+
# break
|
| 17 |
+
conv = {
|
| 18 |
+
'id': ix,
|
| 19 |
+
'conversations': [
|
| 20 |
+
{'from': 'system', 'value': 'You are a helpful assistant that specializes in tables'},
|
| 21 |
+
{'from': 'human', 'value': item['prompt']},
|
| 22 |
+
{'from': 'gpt', 'value': item['completion']}
|
| 23 |
+
]
|
| 24 |
+
}
|
| 25 |
+
ix += 1
|
| 26 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 27 |
+
|
| 28 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_tulu.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
dataset = load_dataset('../tulu-3-sft-personas-instruction-following', split='train')
|
| 9 |
+
|
| 10 |
+
print("Converting dataset to jsonl format")
|
| 11 |
+
output_file = "../tulu-3-sft-personas-instruction-following_30k.jsonl"
|
| 12 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 13 |
+
for item in tqdm(dataset):
|
| 14 |
+
# print(item, item)
|
| 15 |
+
# break
|
| 16 |
+
conv = {
|
| 17 |
+
'id': item['id'],
|
| 18 |
+
'conversations': [
|
| 19 |
+
{'from': 'human', 'value': item['prompt']},
|
| 20 |
+
{'from': 'gpt', 'value': item['messages'][1]['content']}
|
| 21 |
+
]
|
| 22 |
+
}
|
| 23 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 24 |
+
|
| 25 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processor_tulu_mixture.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
dataset = load_dataset('../tulu-3-sft-mixture', split='train')
|
| 9 |
+
|
| 10 |
+
print("Converting dataset to jsonl format")
|
| 11 |
+
output_file = "../tulu-3-sft-mixture_train_939k.jsonl"
|
| 12 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 13 |
+
for item in tqdm(dataset):
|
| 14 |
+
# print(item, item)
|
| 15 |
+
# break
|
| 16 |
+
try:
|
| 17 |
+
conv = {
|
| 18 |
+
'id': item['id'],
|
| 19 |
+
'conversations': [
|
| 20 |
+
{'from': 'human', 'value': item['messages'][0]['content']},
|
| 21 |
+
{'from': 'gpt', 'value': item['messages'][1]['content']}
|
| 22 |
+
]
|
| 23 |
+
}
|
| 24 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 25 |
+
except IndexError:
|
| 26 |
+
print('e')
|
| 27 |
+
|
| 28 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processpor_code.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('nvidia/OpenCodeInstruct', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "opencodeinstruct_train.jsonl"
|
| 10 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 11 |
+
for item in tqdm(dataset):
|
| 12 |
+
# print(item)
|
| 13 |
+
# break
|
| 14 |
+
conv = {
|
| 15 |
+
'id': item['id'],
|
| 16 |
+
'conversations': [
|
| 17 |
+
{'from': 'human', 'value': item['input']},
|
| 18 |
+
{'from': 'gpt', 'value': item['output']}
|
| 19 |
+
]
|
| 20 |
+
}
|
| 21 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 22 |
+
|
| 23 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processpor_infinity.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('../OpenMathInstruct-2', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "../openmathinstruct2_train.jsonl"
|
| 10 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 11 |
+
for item in tqdm(dataset):
|
| 12 |
+
# print(item)
|
| 13 |
+
# break
|
| 14 |
+
conv = {
|
| 15 |
+
'conversations':[
|
| 16 |
+
{'from':'human', 'value':item['problem']},
|
| 17 |
+
{'from':'gpt', 'value':item['generated_solution']}
|
| 18 |
+
]
|
| 19 |
+
}
|
| 20 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 21 |
+
|
| 22 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processpor_infinity_instruct.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# https://huggingface.co/datasets/BAAI/Infinity-Instruct
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
|
| 5 |
+
from datasets import load_dataset
|
| 6 |
+
from tqdm import tqdm
|
| 7 |
+
|
| 8 |
+
dataset = load_dataset('BAAI/Infinity-Instruct', '7M_core', split='train')
|
| 9 |
+
|
| 10 |
+
print("Converting dataset to jsonl format")
|
| 11 |
+
output_file = "../infinity_instruct_7M_core_train_1_5m.jsonl"
|
| 12 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 13 |
+
for item in tqdm(dataset):
|
| 14 |
+
# print(item)
|
| 15 |
+
# break
|
| 16 |
+
# conv = {
|
| 17 |
+
# 'id': item['id'],
|
| 18 |
+
# 'conversations': [
|
| 19 |
+
# {'from': 'human', 'value': item['input']},
|
| 20 |
+
# {'from': 'gpt', 'value': item['output']}
|
| 21 |
+
# ]
|
| 22 |
+
# }
|
| 23 |
+
f.write(json.dumps(item, ensure_ascii=False) + '\n')
|
| 24 |
+
|
| 25 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processpor_lamini.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('./LaMini-instruction', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "LaMini_instruction_train.jsonl"
|
| 10 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 11 |
+
for item in tqdm(dataset):
|
| 12 |
+
# print(item)
|
| 13 |
+
# break
|
| 14 |
+
conv = {
|
| 15 |
+
'conversations':[
|
| 16 |
+
{'from':'human', 'value':item['instruction']},
|
| 17 |
+
{'from':'gpt', 'value':item['response']}
|
| 18 |
+
]
|
| 19 |
+
}
|
| 20 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 21 |
+
|
| 22 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/script/processpor_math.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
|
| 3 |
+
from datasets import load_dataset
|
| 4 |
+
from tqdm import tqdm
|
| 5 |
+
|
| 6 |
+
dataset = load_dataset('../OpenMathInstruct-2', split='train')
|
| 7 |
+
|
| 8 |
+
print("Converting dataset to jsonl format")
|
| 9 |
+
output_file = "../openmathinstruct2_train.jsonl"
|
| 10 |
+
with open(output_file, 'w', encoding='utf-8') as f:
|
| 11 |
+
for item in tqdm(dataset):
|
| 12 |
+
# print(item)
|
| 13 |
+
# break
|
| 14 |
+
conv = {
|
| 15 |
+
'conversations':[
|
| 16 |
+
{'from':'human', 'value':item['problem']},
|
| 17 |
+
{'from':'gpt', 'value':item['generated_solution']}
|
| 18 |
+
]
|
| 19 |
+
}
|
| 20 |
+
f.write(json.dumps(conv, ensure_ascii=False) + '\n')
|
| 21 |
+
|
| 22 |
+
print(f"Conversion complete. Output saved as {output_file}")
|
hf_data/smoltalk_1100k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:729a6f12594034fe50fd44568aac5960fd9ba9b675d5142b4619c478bdce838e
|
| 3 |
+
size 4169152401
|
hf_data/tulu-3-sft-mixture_train_939k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aef698a418fcd5aa6218bfc7f0ddd68dbfd87fbcbae3ea08dca6407503275da2
|
| 3 |
+
size 2463675206
|
hf_data/tulu-3-sft-olmo-2-mixture_train_939k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09623e387ab69c78756c356ddfb261fe736c3790123a16ee8f8635ed98f1bc26
|
| 3 |
+
size 2463660893
|
hf_data/tulu-3-sft-personas-instruction-following_30k.jsonl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3131ecbd5057eb98c122507c481899c4009eaca87d97da02acce85514b6c7833
|
| 3 |
+
size 61667836
|