lll2343 commited on Oct 10, 2025

Commit

f458890

verified ·

1 Parent(s): 29bb2fc

Delete folder hf_data with huggingface_hub

Browse files

Files changed (42) hide show

hf_data/LaMini_instruction_train.jsonl +0 -3
hf_data/ScaleQuest_Code_train_157k.jsonl +0 -3
hf_data/ScaleQuest_Math_train_1m.jsonl +0 -3
hf_data/Table-GPT_train_13k.jsonl +0 -3
hf_data/infinity_instruct_3M_train.jsonl +0 -3
hf_data/infinity_instruct_7M_core_train_1_5m.jsonl +0 -3
hf_data/infinity_instruct_gen_train_1400k.jsonl +0 -3
hf_data/infinity_instruct_train_7m.jsonl +0 -3
hf_data/opc-sft-stage1-filtered_infinity_instruct-1030k.jsonl +0 -3
hf_data/opc-sft-stage1-largescale_diverse_instruct-2513k.jsonl +0 -3
hf_data/opc-sft-stage1-realuser_instruct-675k.jsonl +0 -3
hf_data/opc-sft-stage2-436k.jsonl +0 -3
hf_data/opencodeinstruct_train_2m.jsonl +0 -3
hf_data/opencodeinstruct_train_5m.jsonl +0 -3
hf_data/opencodeinstruct_train_filter_score_eq_1.jsonl +0 -3
hf_data/opencodeinstruct_train_filter_score_ge_08.jsonl +0 -3
hf_data/opencodeinstruct_train_filter_score_ge_08_llm_judgement_avg_score_ge_3.jsonl +0 -3
hf_data/openmathinstruct2_train.jsonl +0 -3
hf_data/openmathinstruct2_train_1m.jsonl +0 -3
hf_data/openmathinstruct2_train_2m.jsonl +0 -3
hf_data/openmathinstruct2_train_5m.jsonl +0 -3
hf_data/sciRIFF_4096_70k.jsonl +0 -3
hf_data/script/processor_code_opc_sft.py +0 -29
hf_data/script/processor_code_opc_sft_stage_1.py +0 -30
hf_data/script/processor_magpie.py +0 -23
hf_data/script/processor_open_code_instruct_filter.py +0 -36
hf_data/script/processor_scalequest_code.py +0 -25
hf_data/script/processor_scalequest_math.py +0 -25
hf_data/script/processor_sciriff.py +0 -25
hf_data/script/processor_smoltalk.py +0 -38
hf_data/script/processor_table_gpt.py +0 -28
hf_data/script/processor_tulu.py +0 -25
hf_data/script/processor_tulu_mixture.py +0 -28
hf_data/script/processpor_code.py +0 -23
hf_data/script/processpor_infinity.py +0 -22
hf_data/script/processpor_infinity_instruct.py +0 -25
hf_data/script/processpor_lamini.py +0 -22
hf_data/script/processpor_math.py +0 -22
hf_data/smoltalk_1100k.jsonl +0 -3
hf_data/tulu-3-sft-mixture_train_939k.jsonl +0 -3
hf_data/tulu-3-sft-olmo-2-mixture_train_939k.jsonl +0 -3
hf_data/tulu-3-sft-personas-instruction-following_30k.jsonl +0 -3

hf_data/LaMini_instruction_train.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a170d06468f1e0b75a93138084f28b821cdefe19da547af0bc7093be0057d2e5
-size 1316278839

hf_data/ScaleQuest_Code_train_157k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:332960c2672ae70b278a3ee7dc41fe1854a113b94ff8e4d7ceab302d23304f11
-size 393762917

hf_data/ScaleQuest_Math_train_1m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8504b7b492431641b23a9aa4e695572dd16836bf3156a94c21645a5361368817
-size 1581059706

hf_data/Table-GPT_train_13k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:36cb71a030f3a9c98fa114ee7206bcc5f28697fbcf916d0c01e36ef0242ea443
-size 33778304

hf_data/infinity_instruct_3M_train.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:e870f4c34157f846443cb6adf423b6b183aba52268fb3b7adea9e5f899250601
-size 7970107783

hf_data/infinity_instruct_7M_core_train_1_5m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b2ede1f8e4a3906c394f000595416dbade9d9a28c34f80337c272fbc46ba6822
-size 3174471022

hf_data/infinity_instruct_gen_train_1400k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3a003195934389a860dca44da374eb580adcbdbc8763a49a3f940e4991b0fa3e
-size 5782067170

hf_data/infinity_instruct_train_7m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:522be9282906a1e539e2c66e2d51f7a044ceaa102346ae402e0e6573d4a83a54
-size 14312887611

hf_data/opc-sft-stage1-filtered_infinity_instruct-1030k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f8a16827ffcb868bd79b97f6b12757e0ce8d505c3af65fb28c60df743527f0d1
-size 2128437340

hf_data/opc-sft-stage1-largescale_diverse_instruct-2513k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:514b8adc9f28e5faf00c7ae5fca1722045526b59e7b8d13c716beb0c93c9e26e
-size 6822344959

hf_data/opc-sft-stage1-realuser_instruct-675k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:b6b11a4f995acb07b86d6679a5aa838d7c79931b2fd4543e2cf42576161a3253
-size 2325200313

hf_data/opc-sft-stage2-436k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a1b8bdafd4500d8785803156056de94989af8fa0146109561af3124fab6ed04c
-size 1109723658

hf_data/opencodeinstruct_train_2m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:155fd30f5e0f7d329f0e1eddddab41e0bf7fd565c86613bb3215d01a1aa2ffe5
-size 3950774175

hf_data/opencodeinstruct_train_5m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ff6393709448e0eb0d016ec1a2226f4a1b9e79db89f5a26b1c8f851d7adf6c52
-size 10284863439

hf_data/opencodeinstruct_train_filter_score_eq_1.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:f6a3bf492ae82328be8ae575f84b65198a196918d513216b4b544f4ab6c07a99
-size 2951101476

hf_data/opencodeinstruct_train_filter_score_ge_08.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2bf3163adc2bca7d0103dcb17208a2c24d345dbcad8a85ad6e3fc975dff28927
-size 4706279026

hf_data/opencodeinstruct_train_filter_score_ge_08_llm_judgement_avg_score_ge_3.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:efeb82226ab27a81517d945ad7870ce77c74b06d31daed26429405ba73f3bb00
-size 4671287609

hf_data/openmathinstruct2_train.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:4e070b0f4220feb93e6646caca1b881e2b5d33afcb64da9b30d59af6784b830f
-size 16798025787

hf_data/openmathinstruct2_train_1m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:cc79dd150daf1d376f2ce2772337d3ffa7ff9ff61d1d825e12bb5d7c21b109fc
-size 1445582798

hf_data/openmathinstruct2_train_2m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:36fadca11edb9e5dc3fb9f8a60194d97a4448ac302af0c53fee69605c64785be
-size 2952796652

hf_data/openmathinstruct2_train_5m.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ba0e8cbc50a72e232a5e20731edd849c69b87ddef2f331c0ab366c955f2ac5e4
-size 7012994994

hf_data/sciRIFF_4096_70k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8c771bd27670ff0d9fe0c5c4efd42c1fc945c8f29338c68e955244c13a2e68b7
-size 390602961

hf_data/script/processor_code_opc_sft.py DELETED Viewed

@@ -1,29 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-subset = ['educational_instruct', 'evol_instruct', 'mceval_instruct', 'package_instruct']
-output_file = "../opc-sft-stage2-436k.jsonl"
-ix = 0
-with open(output_file, 'w', encoding='utf-8') as f:
-    for sub in subset:
-        dataset = load_dataset('../opc-sft-stage2', sub, split='train')
-        print(f"Converting dataset to jsonl format {sub}")
-        for item in tqdm(dataset):
-            # print(item, item)
-            # break
-            conv = {
-                'id': item['seq_id'] if 'seq_id' in item.keys() else f'{sub}-{ix}',
-                'conversations': [
-                    {'from': 'human', 'value': item['instruction']},
-                    {'from': 'gpt', 'value': item['output']}
-                ]
-            }
-            ix += 1
-            f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_code_opc_sft_stage_1.py DELETED Viewed

@@ -1,30 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-subset = ['filtered_infinity_instruct', 'realuser_instruct', 'largescale_diverse_instruct']
-cnts =   ['1030k', '2510k','676k']
-for cnt, sub in zip(cnts, subset):
-    dataset = load_dataset('../opc-sft-stage1', sub, split='train')
-    output_file = f"../opc-sft-stage1-{sub}-{cnt}.jsonl"
-    print(f"Converting dataset to jsonl format {sub}")
-    ix = 0
-    with open(output_file, 'w', encoding='utf-8') as f:
-        for item in tqdm(dataset):
-            # print(item, item)
-            # break
-            conv = {
-                'id': item['seq_id'] if 'seq_id' in item.keys() else f'{sub}-{ix}',
-                'conversations': [
-                    {'from': 'human', 'value': item['instruction']},
-                    {'from': 'gpt', 'value': item['output']}
-                ]
-            }
-            ix += 1
-            f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-    print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_magpie.py DELETED Viewed

@@ -1,23 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../Magpie-Qwen2.5-Pro-300K-Filtered', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../Magpie-Qwen2_5-Pro-300K-Filtered.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item)
-        # break
-        conv = {
-            'id': item['uuid'],
-            'conversations': [
-                {'from': 'human', 'value': item['instruction']},
-                {'from': 'gpt', 'value': item['response']}
-            ]
-        }
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_open_code_instruct_filter.py DELETED Viewed

@@ -1,36 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../OpenCodeInstruct', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../opencodeinstruct_train_filter_score_ge_08.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        llm_judgement = json.loads(item['llm_judgement'])
-        llm_judgement_avg_score = (
-          int(llm_judgement['requirement_conformance']['score']) +
-          int(llm_judgement['logical_correctness']['score']) +
-          int(llm_judgement['requirement_conformance']['score'])
-        ) / 3
-        # print(item)
-        # print(float(item['average_test_score']))
-        # print(llm_judgement_avg_score)
-        # break
-        if float(item['average_test_score']) >= 0.8:
-            conv = {
-                'id': item['id'],
-                'conversations': [
-                    {'from': 'human', 'value': item['input']},
-                    {'from': 'gpt', 'value': item['output']}
-                ]
-            }
-            f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_scalequest_code.py DELETED Viewed

@@ -1,25 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../ScaleQuest-Code', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../ScaleQuest_Code_train_157k.jsonl"
-ix = 0
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item, item)
-        # break
-        conv = {
-            'id': ix,
-            'conversations': [
-                {'from': 'human', 'value': item['query']},
-                {'from': 'gpt', 'value': item['response']}
-            ]
-        }
-        ix += 1
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_scalequest_math.py DELETED Viewed

@@ -1,25 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('dyyyyyyyy/ScaleQuest-Math', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../ScaleQuest_Math_train_1m.jsonl"
-ix = 0
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item, item)
-        # break
-        conv = {
-            'id': ix,
-            'conversations': [
-                {'from': 'human', 'value': item['query']},
-                {'from': 'gpt', 'value': item['response']}
-            ]
-        }
-        ix += 1
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_sciriff.py DELETED Viewed

@@ -1,25 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../SciRIFF', '4096', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../sciRIFF_4096_70k.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item, item)
-        # break
-        conv = {
-            'id': item['_instance_id'],
-            'conversations': [
-                {'from': 'human', 'value': item['input']},
-                {'from': 'gpt', 'value': item['output']}
-            ]
-        }
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_smoltalk.py DELETED Viewed

@@ -1,38 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../smoltalk/', 'all', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../smoltalk_1100k.jsonl"
-ix = 0
-roles = {
-    'assistant': 'gpt',
-    'user': 'human',
-    'system': 'system'
-}
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        conversations = []
-        for msg in item['messages']:
-            from_str = roles[msg['role']]
-            value = msg['content']
-            conversations.append(
-                {
-                    'from': from_str,
-                    'value': value
-                }
-            )
-        conv = {
-            'id': ix,
-            'conversations': conversations
-        }
-        ix += 1
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_table_gpt.py DELETED Viewed

@@ -1,28 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../Table-GPT', 'All', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../Table-GPT_train_13k.jsonl"
-ix = 0
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item, item)
-        # break
-        conv = {
-            'id': ix,
-            'conversations': [
-                {'from': 'system', 'value': 'You are a helpful assistant that specializes in tables'},
-                {'from': 'human', 'value': item['prompt']},
-                {'from': 'gpt', 'value': item['completion']}
-            ]
-        }
-        ix += 1
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_tulu.py DELETED Viewed

@@ -1,25 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../tulu-3-sft-personas-instruction-following', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../tulu-3-sft-personas-instruction-following_30k.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item, item)
-        # break
-        conv = {
-            'id': item['id'],
-            'conversations': [
-                {'from': 'human', 'value': item['prompt']},
-                {'from': 'gpt', 'value': item['messages'][1]['content']}
-            ]
-        }
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processor_tulu_mixture.py DELETED Viewed

@@ -1,28 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../tulu-3-sft-mixture', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../tulu-3-sft-mixture_train_939k.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item, item)
-        # break
-        try:
-            conv = {
-                'id': item['id'],
-                'conversations': [
-                    {'from': 'human', 'value': item['messages'][0]['content']},
-                    {'from': 'gpt', 'value': item['messages'][1]['content']}
-                ]
-            }
-            f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-        except IndexError:
-            print('e')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processpor_code.py DELETED Viewed

@@ -1,23 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('nvidia/OpenCodeInstruct', split='train')
-print("Converting dataset to jsonl format")
-output_file = "opencodeinstruct_train.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item)
-        # break
-        conv = {
-            'id': item['id'],
-            'conversations': [
-                {'from': 'human', 'value': item['input']},
-                {'from': 'gpt', 'value': item['output']}
-            ]
-        }
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processpor_infinity.py DELETED Viewed

@@ -1,22 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../OpenMathInstruct-2', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../openmathinstruct2_train.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item)
-        # break
-        conv = {
-            'conversations':[
-                {'from':'human', 'value':item['problem']},
-                {'from':'gpt', 'value':item['generated_solution']}
-            ]
-        }
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processpor_infinity_instruct.py DELETED Viewed

@@ -1,25 +0,0 @@
-# https://huggingface.co/datasets/BAAI/Infinity-Instruct
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('BAAI/Infinity-Instruct', '7M_core', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../infinity_instruct_7M_core_train_1_5m.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item)
-        # break
-        # conv = {
-        #     'id': item['id'],
-        #     'conversations': [
-        #         {'from': 'human', 'value': item['input']},
-        #         {'from': 'gpt', 'value': item['output']}
-        #     ]
-        # }
-        f.write(json.dumps(item, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processpor_lamini.py DELETED Viewed

@@ -1,22 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('./LaMini-instruction', split='train')
-print("Converting dataset to jsonl format")
-output_file = "LaMini_instruction_train.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item)
-        # break
-        conv = {
-            'conversations':[
-                {'from':'human', 'value':item['instruction']},
-                {'from':'gpt', 'value':item['response']}
-            ]
-        }
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/script/processpor_math.py DELETED Viewed

@@ -1,22 +0,0 @@
-import json
-from datasets import load_dataset
-from tqdm import tqdm
-dataset = load_dataset('../OpenMathInstruct-2', split='train')
-print("Converting dataset to jsonl format")
-output_file = "../openmathinstruct2_train.jsonl"
-with open(output_file, 'w', encoding='utf-8') as f:
-    for item in tqdm(dataset):
-        # print(item)
-        # break
-        conv = {
-            'conversations':[
-                {'from':'human', 'value':item['problem']},
-                {'from':'gpt', 'value':item['generated_solution']}
-            ]
-        }
-        f.write(json.dumps(conv, ensure_ascii=False) + '\n')
-print(f"Conversion complete. Output saved as {output_file}")

hf_data/smoltalk_1100k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:729a6f12594034fe50fd44568aac5960fd9ba9b675d5142b4619c478bdce838e
-size 4169152401

hf_data/tulu-3-sft-mixture_train_939k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:aef698a418fcd5aa6218bfc7f0ddd68dbfd87fbcbae3ea08dca6407503275da2
-size 2463675206

hf_data/tulu-3-sft-olmo-2-mixture_train_939k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:09623e387ab69c78756c356ddfb261fe736c3790123a16ee8f8635ed98f1bc26
-size 2463660893

hf_data/tulu-3-sft-personas-instruction-following_30k.jsonl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:3131ecbd5057eb98c122507c481899c4009eaca87d97da02acce85514b6c7833
-size 61667836