bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 80000 bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 320000 bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 480000 bash scripts/train_tokenizer.sh python500k 800000 /opt/tiger/byte-lingua/superbpe/subsample_python.jsonl 2 tokenizer_json/python500k_vocab800K_stage1 640000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 80000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 160000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 480000 bash scripts/train_tokenizer.sh opencoder300k 800000 /opt/tiger/byte-lingua/superbpe/subsample_opencoder.jsonl 2 tokenizer_json/opencoder300k_vocab800K_stage1 640000