File size: 1,294 Bytes
72c0672 | 1 2 3 4 5 6 7 8 9 10 11 12 13 | bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 80000 bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 320000 bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 480000 bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 640000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 80000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 160000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 480000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 640000 |