Upload folder using huggingface_hub
Browse files- data/sft/.DS_Store +0 -0
- data/sft/processed/.DS_Store +0 -0
- data/sft/processed/dataset_summary.json +261 -0
- data/sft/processed/logs/prepare_sft_data_20260315_132126.log +73 -0
- data/sft/processed/train_input_ids.bin +3 -0
- data/sft/processed/train_labels.bin +3 -0
- data/sft/processed/train_metadata.json +6 -0
- data/sft/processed/val_input_ids.bin +3 -0
- data/sft/processed/val_labels.bin +3 -0
- data/sft/processed/val_metadata.json +6 -0
data/sft/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
data/sft/processed/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
data/sft/processed/dataset_summary.json
ADDED
|
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"config": {
|
| 3 |
+
"val_examples": 2000,
|
| 4 |
+
"max_train_examples": 200000,
|
| 5 |
+
"min_supervised_tokens": 16,
|
| 6 |
+
"shuffle": true,
|
| 7 |
+
"format": "messages",
|
| 8 |
+
"messages_field": "messages",
|
| 9 |
+
"sources": [
|
| 10 |
+
{
|
| 11 |
+
"source_name": "smol_magpie_ultra",
|
| 12 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 13 |
+
"config_name": "smol-magpie-ultra",
|
| 14 |
+
"split": "train",
|
| 15 |
+
"weight": 0.4,
|
| 16 |
+
"row_filters": {
|
| 17 |
+
"quality": "good"
|
| 18 |
+
}
|
| 19 |
+
},
|
| 20 |
+
{
|
| 21 |
+
"source_name": "openhermes",
|
| 22 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 23 |
+
"config_name": "openhermes-100k",
|
| 24 |
+
"split": "train",
|
| 25 |
+
"weight": 0.15
|
| 26 |
+
},
|
| 27 |
+
{
|
| 28 |
+
"source_name": "self_oss_instruct",
|
| 29 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 30 |
+
"config_name": "self-oss-instruct",
|
| 31 |
+
"split": "train",
|
| 32 |
+
"weight": 0.15
|
| 33 |
+
},
|
| 34 |
+
{
|
| 35 |
+
"source_name": "everyday_conversations",
|
| 36 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 37 |
+
"config_name": "everyday-conversations",
|
| 38 |
+
"split": "train",
|
| 39 |
+
"weight": 0.01
|
| 40 |
+
},
|
| 41 |
+
{
|
| 42 |
+
"source_name": "numina_cot",
|
| 43 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 44 |
+
"config_name": "numina-cot-100k",
|
| 45 |
+
"split": "train",
|
| 46 |
+
"weight": 0.1
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"source_name": "metamathqa",
|
| 50 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 51 |
+
"config_name": "metamathqa-50k",
|
| 52 |
+
"split": "train",
|
| 53 |
+
"weight": 0.05
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"source_name": "longalign",
|
| 57 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 58 |
+
"config_name": "longalign",
|
| 59 |
+
"split": "train",
|
| 60 |
+
"weight": 0.015
|
| 61 |
+
},
|
| 62 |
+
{
|
| 63 |
+
"source_name": "ultrachat_200k",
|
| 64 |
+
"path": "HuggingFaceH4/ultrachat_200k",
|
| 65 |
+
"config_name": null,
|
| 66 |
+
"split": "train_sft",
|
| 67 |
+
"weight": 0.125
|
| 68 |
+
}
|
| 69 |
+
]
|
| 70 |
+
},
|
| 71 |
+
"sources": [
|
| 72 |
+
{
|
| 73 |
+
"name": "smol_magpie_ultra",
|
| 74 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 75 |
+
"config_name": "smol-magpie-ultra",
|
| 76 |
+
"weight": 0.4,
|
| 77 |
+
"train_target": 80000,
|
| 78 |
+
"val_target": 800,
|
| 79 |
+
"train_examples": 80000,
|
| 80 |
+
"val_examples": 800,
|
| 81 |
+
"rows_seen": 117281,
|
| 82 |
+
"skipped_rows": 36481
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"name": "openhermes",
|
| 86 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 87 |
+
"config_name": "openhermes-100k",
|
| 88 |
+
"weight": 0.15,
|
| 89 |
+
"train_target": 30000,
|
| 90 |
+
"val_target": 300,
|
| 91 |
+
"train_examples": 30000,
|
| 92 |
+
"val_examples": 300,
|
| 93 |
+
"rows_seen": 31945,
|
| 94 |
+
"skipped_rows": 1645
|
| 95 |
+
},
|
| 96 |
+
{
|
| 97 |
+
"name": "self_oss_instruct",
|
| 98 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 99 |
+
"config_name": "self-oss-instruct",
|
| 100 |
+
"weight": 0.15,
|
| 101 |
+
"train_target": 30000,
|
| 102 |
+
"val_target": 300,
|
| 103 |
+
"train_examples": 30000,
|
| 104 |
+
"val_examples": 300,
|
| 105 |
+
"rows_seen": 30300,
|
| 106 |
+
"skipped_rows": 0
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"name": "everyday_conversations",
|
| 110 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 111 |
+
"config_name": "everyday-conversations",
|
| 112 |
+
"weight": 0.01,
|
| 113 |
+
"train_target": 2000,
|
| 114 |
+
"val_target": 20,
|
| 115 |
+
"train_examples": 2000,
|
| 116 |
+
"val_examples": 20,
|
| 117 |
+
"rows_seen": 2020,
|
| 118 |
+
"skipped_rows": 0
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"name": "numina_cot",
|
| 122 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 123 |
+
"config_name": "numina-cot-100k",
|
| 124 |
+
"weight": 0.1,
|
| 125 |
+
"train_target": 20000,
|
| 126 |
+
"val_target": 200,
|
| 127 |
+
"train_examples": 20000,
|
| 128 |
+
"val_examples": 200,
|
| 129 |
+
"rows_seen": 20200,
|
| 130 |
+
"skipped_rows": 0
|
| 131 |
+
},
|
| 132 |
+
{
|
| 133 |
+
"name": "metamathqa",
|
| 134 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 135 |
+
"config_name": "metamathqa-50k",
|
| 136 |
+
"weight": 0.05,
|
| 137 |
+
"train_target": 10000,
|
| 138 |
+
"val_target": 100,
|
| 139 |
+
"train_examples": 10000,
|
| 140 |
+
"val_examples": 100,
|
| 141 |
+
"rows_seen": 10104,
|
| 142 |
+
"skipped_rows": 4
|
| 143 |
+
},
|
| 144 |
+
{
|
| 145 |
+
"name": "longalign",
|
| 146 |
+
"path": "HuggingFaceTB/smoltalk",
|
| 147 |
+
"config_name": "longalign",
|
| 148 |
+
"weight": 0.015,
|
| 149 |
+
"train_target": 3000,
|
| 150 |
+
"val_target": 30,
|
| 151 |
+
"train_examples": 3000,
|
| 152 |
+
"val_examples": 30,
|
| 153 |
+
"rows_seen": 3030,
|
| 154 |
+
"skipped_rows": 0
|
| 155 |
+
},
|
| 156 |
+
{
|
| 157 |
+
"name": "ultrachat_200k",
|
| 158 |
+
"path": "HuggingFaceH4/ultrachat_200k",
|
| 159 |
+
"config_name": null,
|
| 160 |
+
"weight": 0.125,
|
| 161 |
+
"train_target": 25000,
|
| 162 |
+
"val_target": 250,
|
| 163 |
+
"train_examples": 25000,
|
| 164 |
+
"val_examples": 250,
|
| 165 |
+
"rows_seen": 25250,
|
| 166 |
+
"skipped_rows": 0
|
| 167 |
+
}
|
| 168 |
+
],
|
| 169 |
+
"tokenizer_meta": {
|
| 170 |
+
"vocab_size": 49152,
|
| 171 |
+
"special_tokens": {
|
| 172 |
+
"pad_token": "<pad>",
|
| 173 |
+
"bos_token": "<bos>",
|
| 174 |
+
"eos_token": "<eos>",
|
| 175 |
+
"unk_token": "<unk>",
|
| 176 |
+
"pad_token_id": 0,
|
| 177 |
+
"bos_token_id": 1,
|
| 178 |
+
"eos_token_id": 2,
|
| 179 |
+
"unk_token_id": 3
|
| 180 |
+
},
|
| 181 |
+
"data_config": {
|
| 182 |
+
"sources": [
|
| 183 |
+
{
|
| 184 |
+
"name": "fineweb_edu",
|
| 185 |
+
"path": "HuggingFaceFW/fineweb-edu",
|
| 186 |
+
"split": "train",
|
| 187 |
+
"weight": 0.6,
|
| 188 |
+
"text_field": "text",
|
| 189 |
+
"config_name": "sample-10BT",
|
| 190 |
+
"data_dir": null,
|
| 191 |
+
"revision": null,
|
| 192 |
+
"streaming": true,
|
| 193 |
+
"shuffle_buffer": 10000,
|
| 194 |
+
"sample_documents": null
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"name": "cosmopedia_v2",
|
| 198 |
+
"path": "HuggingFaceTB/smollm-corpus",
|
| 199 |
+
"split": "train",
|
| 200 |
+
"weight": 0.2,
|
| 201 |
+
"text_field": "text",
|
| 202 |
+
"config_name": "cosmopedia-v2",
|
| 203 |
+
"data_dir": null,
|
| 204 |
+
"revision": null,
|
| 205 |
+
"streaming": true,
|
| 206 |
+
"shuffle_buffer": 10000,
|
| 207 |
+
"sample_documents": null
|
| 208 |
+
},
|
| 209 |
+
{
|
| 210 |
+
"name": "the_stack_python",
|
| 211 |
+
"path": "bigcode/the-stack-dedup",
|
| 212 |
+
"split": "train",
|
| 213 |
+
"weight": 0.1,
|
| 214 |
+
"text_field": "content",
|
| 215 |
+
"config_name": null,
|
| 216 |
+
"data_dir": "data/python",
|
| 217 |
+
"revision": null,
|
| 218 |
+
"streaming": true,
|
| 219 |
+
"shuffle_buffer": 2000,
|
| 220 |
+
"sample_documents": null
|
| 221 |
+
},
|
| 222 |
+
{
|
| 223 |
+
"name": "finemath",
|
| 224 |
+
"path": "HuggingFaceTB/finemath",
|
| 225 |
+
"split": "train",
|
| 226 |
+
"weight": 0.1,
|
| 227 |
+
"text_field": "text",
|
| 228 |
+
"config_name": "finemath-4plus",
|
| 229 |
+
"data_dir": null,
|
| 230 |
+
"revision": null,
|
| 231 |
+
"streaming": true,
|
| 232 |
+
"shuffle_buffer": 5000,
|
| 233 |
+
"sample_documents": null
|
| 234 |
+
}
|
| 235 |
+
],
|
| 236 |
+
"tokenizer_sample_documents": 2000000,
|
| 237 |
+
"tokenizer_min_frequency": 2,
|
| 238 |
+
"tokenizer_special_tokens": [
|
| 239 |
+
"<pad>",
|
| 240 |
+
"<bos>",
|
| 241 |
+
"<eos>",
|
| 242 |
+
"<unk>"
|
| 243 |
+
],
|
| 244 |
+
"train_tokens": 10000000000,
|
| 245 |
+
"val_tokens": 20000000,
|
| 246 |
+
"shard_size_tokens": 100000000
|
| 247 |
+
}
|
| 248 |
+
},
|
| 249 |
+
"train": {
|
| 250 |
+
"num_examples": 200000,
|
| 251 |
+
"seq_len": 2048,
|
| 252 |
+
"input_ids_path": "train_input_ids.bin",
|
| 253 |
+
"labels_path": "train_labels.bin"
|
| 254 |
+
},
|
| 255 |
+
"val": {
|
| 256 |
+
"num_examples": 2000,
|
| 257 |
+
"seq_len": 2048,
|
| 258 |
+
"input_ids_path": "val_input_ids.bin",
|
| 259 |
+
"labels_path": "val_labels.bin"
|
| 260 |
+
}
|
| 261 |
+
}
|
data/sft/processed/logs/prepare_sft_data_20260315_132126.log
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2026-03-15 13:21:26,604 | INFO | SFT data preparation started
|
| 2 |
+
2026-03-15 13:21:26,604 | INFO | Log file: data/sft/processed/logs/prepare_sft_data_20260315_132126.log
|
| 3 |
+
2026-03-15 13:21:26,605 | INFO | Arguments | config=configs/sft_data_smoltalk.json tokenizer_dir=data/tokenizer output_dir=data/sft/processed seq_len=2048 seed=42
|
| 4 |
+
2026-03-15 13:21:26,605 | INFO | SFT mixture config | num_sources=8 val_examples=2000 max_train_examples=200000
|
| 5 |
+
2026-03-15 13:21:26,605 | INFO | SFT packing config | seq_len=2048 min_supervised_tokens=16
|
| 6 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[0] | name=smol_magpie_ultra path=HuggingFaceTB/smoltalk config_name=smol-magpie-ultra split=train format=messages streaming=False weight=0.4 row_filters={'quality': 'good'} val_target=800 train_target=80000
|
| 7 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[1] | name=openhermes path=HuggingFaceTB/smoltalk config_name=openhermes-100k split=train format=messages streaming=False weight=0.15 row_filters=None val_target=300 train_target=30000
|
| 8 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[2] | name=self_oss_instruct path=HuggingFaceTB/smoltalk config_name=self-oss-instruct split=train format=messages streaming=False weight=0.15 row_filters=None val_target=300 train_target=30000
|
| 9 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[3] | name=everyday_conversations path=HuggingFaceTB/smoltalk config_name=everyday-conversations split=train format=messages streaming=False weight=0.01 row_filters=None val_target=20 train_target=2000
|
| 10 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[4] | name=numina_cot path=HuggingFaceTB/smoltalk config_name=numina-cot-100k split=train format=messages streaming=False weight=0.1 row_filters=None val_target=200 train_target=20000
|
| 11 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[5] | name=metamathqa path=HuggingFaceTB/smoltalk config_name=metamathqa-50k split=train format=messages streaming=False weight=0.05 row_filters=None val_target=100 train_target=10000
|
| 12 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[6] | name=longalign path=HuggingFaceTB/smoltalk config_name=longalign split=train format=messages streaming=False weight=0.015 row_filters=None val_target=30 train_target=3000
|
| 13 |
+
2026-03-15 13:21:26,605 | INFO | SFT source[7] | name=ultrachat_200k path=HuggingFaceH4/ultrachat_200k config_name=None split=train_sft format=messages streaming=False weight=0.125 row_filters=None val_target=250 train_target=25000
|
| 14 |
+
2026-03-15 13:21:26,605 | INFO | Tokenizer special ids | bos=1 eos=2 pad=0
|
| 15 |
+
2026-03-15 13:21:26,606 | INFO | Loading SFT source | name=smol_magpie_ultra
|
| 16 |
+
2026-03-15 13:21:49,343 | INFO | SFT progress | processed=5,000 train_examples=4,200 val_examples=800 skipped=2,212
|
| 17 |
+
2026-03-15 13:22:07,970 | INFO | SFT progress | processed=10,000 train_examples=9,200 val_examples=800 skipped=4,536
|
| 18 |
+
2026-03-15 13:22:26,634 | INFO | SFT progress | processed=15,000 train_examples=14,200 val_examples=800 skipped=6,798
|
| 19 |
+
2026-03-15 13:22:44,959 | INFO | SFT progress | processed=20,000 train_examples=19,200 val_examples=800 skipped=9,047
|
| 20 |
+
2026-03-15 13:23:03,316 | INFO | SFT progress | processed=25,000 train_examples=24,200 val_examples=800 skipped=11,398
|
| 21 |
+
2026-03-15 13:23:21,705 | INFO | SFT progress | processed=30,000 train_examples=29,200 val_examples=800 skipped=13,716
|
| 22 |
+
2026-03-15 13:23:39,935 | INFO | SFT progress | processed=35,000 train_examples=34,200 val_examples=800 skipped=15,985
|
| 23 |
+
2026-03-15 13:23:58,367 | INFO | SFT progress | processed=40,000 train_examples=39,200 val_examples=800 skipped=18,284
|
| 24 |
+
2026-03-15 13:24:16,745 | INFO | SFT progress | processed=45,000 train_examples=44,200 val_examples=800 skipped=20,512
|
| 25 |
+
2026-03-15 13:24:35,169 | INFO | SFT progress | processed=50,000 train_examples=49,200 val_examples=800 skipped=22,749
|
| 26 |
+
2026-03-15 13:24:53,377 | INFO | SFT progress | processed=55,000 train_examples=54,200 val_examples=800 skipped=24,949
|
| 27 |
+
2026-03-15 13:25:11,868 | INFO | SFT progress | processed=60,000 train_examples=59,200 val_examples=800 skipped=27,188
|
| 28 |
+
2026-03-15 13:25:30,314 | INFO | SFT progress | processed=65,000 train_examples=64,200 val_examples=800 skipped=29,431
|
| 29 |
+
2026-03-15 13:25:48,714 | INFO | SFT progress | processed=70,000 train_examples=69,200 val_examples=800 skipped=31,716
|
| 30 |
+
2026-03-15 13:26:07,119 | INFO | SFT progress | processed=75,000 train_examples=74,200 val_examples=800 skipped=33,870
|
| 31 |
+
2026-03-15 13:26:25,775 | INFO | SFT progress | processed=80,000 train_examples=79,200 val_examples=800 skipped=36,145
|
| 32 |
+
2026-03-15 13:26:28,721 | INFO | Completed SFT source | name=smol_magpie_ultra train=80,000/80000 val=800/800 seen=117,281 skipped=36,481
|
| 33 |
+
2026-03-15 13:26:28,721 | INFO | Loading SFT source | name=openhermes
|
| 34 |
+
2026-03-15 13:26:36,651 | INFO | SFT progress | processed=85,000 train_examples=83,900 val_examples=1,100 skipped=36,707
|
| 35 |
+
2026-03-15 13:26:42,553 | INFO | SFT progress | processed=90,000 train_examples=88,900 val_examples=1,100 skipped=36,961
|
| 36 |
+
2026-03-15 13:26:48,344 | INFO | SFT progress | processed=95,000 train_examples=93,900 val_examples=1,100 skipped=37,227
|
| 37 |
+
2026-03-15 13:26:54,249 | INFO | SFT progress | processed=100,000 train_examples=98,900 val_examples=1,100 skipped=37,516
|
| 38 |
+
2026-03-15 13:27:00,205 | INFO | SFT progress | processed=105,000 train_examples=103,900 val_examples=1,100 skipped=37,782
|
| 39 |
+
2026-03-15 13:27:06,261 | INFO | SFT progress | processed=110,000 train_examples=108,900 val_examples=1,100 skipped=38,065
|
| 40 |
+
2026-03-15 13:27:07,568 | INFO | Completed SFT source | name=openhermes train=30,000/30000 val=300/300 seen=31,945 skipped=1,645
|
| 41 |
+
2026-03-15 13:27:07,568 | INFO | Loading SFT source | name=self_oss_instruct
|
| 42 |
+
2026-03-15 13:27:17,619 | INFO | SFT progress | processed=115,000 train_examples=113,600 val_examples=1,400 skipped=38,126
|
| 43 |
+
2026-03-15 13:27:22,498 | INFO | SFT progress | processed=120,000 train_examples=118,600 val_examples=1,400 skipped=38,126
|
| 44 |
+
2026-03-15 13:27:27,485 | INFO | SFT progress | processed=125,000 train_examples=123,600 val_examples=1,400 skipped=38,126
|
| 45 |
+
2026-03-15 13:27:32,482 | INFO | SFT progress | processed=130,000 train_examples=128,600 val_examples=1,400 skipped=38,126
|
| 46 |
+
2026-03-15 13:27:37,473 | INFO | SFT progress | processed=135,000 train_examples=133,600 val_examples=1,400 skipped=38,126
|
| 47 |
+
2026-03-15 13:27:42,522 | INFO | SFT progress | processed=140,000 train_examples=138,600 val_examples=1,400 skipped=38,126
|
| 48 |
+
2026-03-15 13:27:43,916 | INFO | Completed SFT source | name=self_oss_instruct train=30,000/30000 val=300/300 seen=30,300 skipped=0
|
| 49 |
+
2026-03-15 13:27:43,916 | INFO | Loading SFT source | name=everyday_conversations
|
| 50 |
+
2026-03-15 13:27:49,524 | INFO | Completed SFT source | name=everyday_conversations train=2,000/2000 val=20/20 seen=2,020 skipped=0
|
| 51 |
+
2026-03-15 13:27:49,525 | INFO | Loading SFT source | name=numina_cot
|
| 52 |
+
2026-03-15 13:27:56,930 | INFO | SFT progress | processed=145,000 train_examples=143,380 val_examples=1,620 skipped=38,126
|
| 53 |
+
2026-03-15 13:28:03,530 | INFO | SFT progress | processed=150,000 train_examples=148,380 val_examples=1,620 skipped=38,126
|
| 54 |
+
2026-03-15 13:28:09,916 | INFO | SFT progress | processed=155,000 train_examples=153,380 val_examples=1,620 skipped=38,126
|
| 55 |
+
2026-03-15 13:28:16,444 | INFO | SFT progress | processed=160,000 train_examples=158,380 val_examples=1,620 skipped=38,126
|
| 56 |
+
2026-03-15 13:28:21,164 | INFO | Completed SFT source | name=numina_cot train=20,000/20000 val=200/200 seen=20,200 skipped=0
|
| 57 |
+
2026-03-15 13:28:21,165 | INFO | Loading SFT source | name=metamathqa
|
| 58 |
+
2026-03-15 13:28:26,153 | INFO | SFT progress | processed=165,000 train_examples=163,280 val_examples=1,720 skipped=38,126
|
| 59 |
+
2026-03-15 13:28:29,853 | INFO | SFT progress | processed=170,000 train_examples=168,280 val_examples=1,720 skipped=38,127
|
| 60 |
+
2026-03-15 13:28:32,549 | INFO | Completed SFT source | name=metamathqa train=10,000/10000 val=100/100 seen=10,104 skipped=4
|
| 61 |
+
2026-03-15 13:28:32,549 | INFO | Loading SFT source | name=longalign
|
| 62 |
+
2026-03-15 13:29:03,538 | INFO | SFT progress | processed=175,000 train_examples=173,250 val_examples=1,750 skipped=38,130
|
| 63 |
+
2026-03-15 13:29:42,829 | INFO | Completed SFT source | name=longalign train=3,000/3000 val=30/30 seen=3,030 skipped=0
|
| 64 |
+
2026-03-15 13:29:42,830 | INFO | Loading SFT source | name=ultrachat_200k
|
| 65 |
+
2026-03-15 13:29:56,989 | INFO | SFT progress | processed=180,000 train_examples=178,000 val_examples=2,000 skipped=38,130
|
| 66 |
+
2026-03-15 13:30:12,911 | INFO | SFT progress | processed=185,000 train_examples=183,000 val_examples=2,000 skipped=38,130
|
| 67 |
+
2026-03-15 13:30:28,635 | INFO | SFT progress | processed=190,000 train_examples=188,000 val_examples=2,000 skipped=38,130
|
| 68 |
+
2026-03-15 13:30:44,882 | INFO | SFT progress | processed=195,000 train_examples=193,000 val_examples=2,000 skipped=38,130
|
| 69 |
+
2026-03-15 13:31:01,202 | INFO | SFT progress | processed=200,000 train_examples=198,000 val_examples=2,000 skipped=38,130
|
| 70 |
+
2026-03-15 13:31:07,611 | INFO | Completed SFT source | name=ultrachat_200k train=25,000/25000 val=250/250 seen=25,250 skipped=0
|
| 71 |
+
2026-03-15 13:31:07,614 | INFO | SFT dataset saved | output_dir=data/sft/processed
|
| 72 |
+
2026-03-15 13:31:07,615 | INFO | SFT summary | train_examples=200,000 val_examples=2,000 skipped_rows=38,130
|
| 73 |
+
2026-03-15 13:31:07,615 | INFO | SFT metadata saved | path=data/sft/processed/dataset_summary.json
|
data/sft/processed/train_input_ids.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fec9583e942ce4ba4abc4cfe1f7db4e7dd74d166405b3e3570dd0858d939b2a3
|
| 3 |
+
size 819200000
|
data/sft/processed/train_labels.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d2638b86f114f11c3441e27b39206fa5b7625376988792daf51676d3762651d9
|
| 3 |
+
size 1638400000
|
data/sft/processed/train_metadata.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_examples": 200000,
|
| 3 |
+
"seq_len": 2048,
|
| 4 |
+
"input_ids_path": "train_input_ids.bin",
|
| 5 |
+
"labels_path": "train_labels.bin"
|
| 6 |
+
}
|
data/sft/processed/val_input_ids.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd248a527b9b3fcd829a29780341d5958c63e25453035398beb633b65778efc6
|
| 3 |
+
size 8192000
|
data/sft/processed/val_labels.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:857795ae824b5ea4d74a551d1d3075d9bcd03f68912afa6589e5f831f66a2ad5
|
| 3 |
+
size 16384000
|
data/sft/processed/val_metadata.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"num_examples": 2000,
|
| 3 |
+
"seq_len": 2048,
|
| 4 |
+
"input_ids_path": "val_input_ids.bin",
|
| 5 |
+
"labels_path": "val_labels.bin"
|
| 6 |
+
}
|