Upload folder using huggingface_hub
Browse files- .gitattributes +9 -0
- 8k_v2_oracle/0000100000/.metadata +3 -0
- 8k_v2_oracle/0000100000/__0_0.distcp +3 -0
- 8k_v2_oracle/0000100000/__1_0.distcp +3 -0
- 8k_v2_oracle/0000100000/__2_0.distcp +3 -0
- 8k_v2_oracle/0000100000/__3_0.distcp +3 -0
- 8k_v2_oracle/0000100000/__4_0.distcp +3 -0
- 8k_v2_oracle/0000100000/__5_0.distcp +3 -0
- 8k_v2_oracle/0000100000/__6_0.distcp +3 -0
- 8k_v2_oracle/0000100000/__7_0.distcp +3 -0
- 8k_v2_oracle/0000100000/consolidated/consolidated.pth +3 -0
- 8k_v2_oracle/0000100000/consolidated/params.json +1 -0
- 8k_v2_oracle/0000100000/params.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00000.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00001.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00002.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00003.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00004.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00005.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00006.json +1 -0
- 8k_v2_oracle/0000100000/train_state_00007.json +1 -0
.gitattributes
CHANGED
|
@@ -163,3 +163,12 @@ meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__5_0.distcp filter=l
|
|
| 163 |
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 164 |
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 165 |
8k_v2_oracle/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 164 |
meta-llama-Llama-3.2-1B-seed_777_model_seed_222/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 165 |
8k_v2_oracle/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
|
| 166 |
+
8k_v2_oracle/0000100000/.metadata filter=lfs diff=lfs merge=lfs -text
|
| 167 |
+
8k_v2_oracle/0000100000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 168 |
+
8k_v2_oracle/0000100000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 169 |
+
8k_v2_oracle/0000100000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 170 |
+
8k_v2_oracle/0000100000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 171 |
+
8k_v2_oracle/0000100000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
8k_v2_oracle/0000100000/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
8k_v2_oracle/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
8k_v2_oracle/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
|
8k_v2_oracle/0000100000/.metadata
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8cd385b7c232093b4616decd880f94989ca5889c995442b9e9ebfdce560035d6
|
| 3 |
+
size 1148550
|
8k_v2_oracle/0000100000/__0_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:21c41767b721cb164eb53fb2c8d4ba2a4438a9772032b86b97f9a786c5720150
|
| 3 |
+
size 2680042704
|
8k_v2_oracle/0000100000/__1_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:09267e7273eb796affc2632a08b80c754109b74b1d11025f1a693df30f6648e0
|
| 3 |
+
size 2680081644
|
8k_v2_oracle/0000100000/__2_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3d78796022bf35ef0ee8dcef532298ec33b41a86b09c12ebff2d59d97b11ae5
|
| 3 |
+
size 2680081644
|
8k_v2_oracle/0000100000/__3_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8990b39759cb675b2bfb2ad0b9aa725b4506ceb87d94561804c63724c476ccf9
|
| 3 |
+
size 2680081644
|
8k_v2_oracle/0000100000/__4_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a4d51a03379fcdb5d859a1b7226641f734a5cbb07052b9632d09112387f9095
|
| 3 |
+
size 2680081644
|
8k_v2_oracle/0000100000/__5_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1ada49cdfa55aafed3a46dfe68be77ac6fcaca36267a60735c506f823e1d640
|
| 3 |
+
size 2680083920
|
8k_v2_oracle/0000100000/__6_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18998f98b4983af276c746cd13db1a074b25ae8dd0c3349c105c8da59ec60334
|
| 3 |
+
size 2680083920
|
8k_v2_oracle/0000100000/__7_0.distcp
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8e9f31ade325b09075c28d86afbfdf6abb4e9b5d62734237f2b3a1655b28fa8c
|
| 3 |
+
size 2680043024
|
8k_v2_oracle/0000100000/consolidated/consolidated.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:95add8debe64ba6534012c96e0f2d7a9e05efab13208062a1f24adde920b2f6e
|
| 3 |
+
size 21434123158
|
8k_v2_oracle/0000100000/consolidated/params.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"name": "flexitok_8k_v2_oracle", "dump_dir": "/fsx/craffel/lingua_logs/8k_v2_oracle", "seed": 777, "grad_acc_steps": 8, "gc_collect_freq": 1000, "probe_freq": null, "steps": 100000, "data": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "load_supermapping": false, "dropout": 0.0, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447}, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "task_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 122447, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": false, "factorized_embedding_dim": 0}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 10000, "keep": -1}, "eval": {"every": 10000, "keep": -1}, "path": "/fsx/craffel/lingua_logs/checkpoints/8k_v2_oracle/", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_arabic", "include_base_44_chinese", "include_base_44_german", "include_base_44_greek", "include_base_44_persian", "include_base_44_french", "include_base_44_hungarian", "include_base_44_indonesian", "include_base_44_italian", "include_base_44_japanese", "include_base_44_dutch", "include_base_44_polish", "include_base_44_portuguese", "include_base_44_russian", "include_base_44_spanish", "include_base_44_turkish", "include_base_44_vietnamese", "belebele_arb_Arab", "belebele_ces_Latn", "belebele_zho_Hans", "belebele_dan_Latn", "belebele_deu_Latn", "belebele_ell_Grek", "belebele_pes_Arab", "belebele_fra_Latn", "belebele_hun_Latn", "belebele_ind_Latn", "belebele_ita_Latn", "belebele_jpn_Jpan", "belebele_nld_Latn", "belebele_pol_Latn", "belebele_por_Latn", "belebele_rus_Cyrl", "belebele_spa_Latn", "belebele_swe_Latn", "belebele_tur_Latn", "belebele_vie_Latn", "belebele_eng_Latn", "xnli_ar", "xnli_zh", "xnli_de", "xnli_el", "xnli_en", "xnli_es", "xnli_fr", "xnli_ru", "xnli_tr", "xnli_vi"]}, "generator": {"max_tokens": 16384, "dtype": "bf16", "add_bos": false}}}
|
8k_v2_oracle/0000100000/params.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"name": "flexitok_8k_v2_oracle", "dump_dir": "/fsx/craffel/lingua_logs/8k_v2_oracle", "seed": 777, "grad_acc_steps": 8, "gc_collect_freq": 1000, "probe_freq": null, "steps": 100000, "data": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "load_supermapping": false, "dropout": 0.0, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447}, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "task_to_tokenizer": {}, "suitable_tokenizer_probability": 1.0}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 122447, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": false, "factorized_embedding_dim": 0}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 10000, "keep": -1}, "eval": {"every": 10000, "keep": -1}, "path": "/fsx/craffel/lingua_logs/checkpoints/8k_v2_oracle/", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_arabic", "include_base_44_chinese", "include_base_44_german", "include_base_44_greek", "include_base_44_persian", "include_base_44_french", "include_base_44_hungarian", "include_base_44_indonesian", "include_base_44_italian", "include_base_44_japanese", "include_base_44_dutch", "include_base_44_polish", "include_base_44_portuguese", "include_base_44_russian", "include_base_44_spanish", "include_base_44_turkish", "include_base_44_vietnamese", "belebele_arb_Arab", "belebele_ces_Latn", "belebele_zho_Hans", "belebele_dan_Latn", "belebele_deu_Latn", "belebele_ell_Grek", "belebele_pes_Arab", "belebele_fra_Latn", "belebele_hun_Latn", "belebele_ind_Latn", "belebele_ita_Latn", "belebele_jpn_Jpan", "belebele_nld_Latn", "belebele_pol_Latn", "belebele_por_Latn", "belebele_rus_Cyrl", "belebele_spa_Latn", "belebele_swe_Latn", "belebele_tur_Latn", "belebele_vie_Latn", "belebele_eng_Latn", "xnli_ar", "xnli_zh", "xnli_de", "xnli_el", "xnli_en", "xnli_es", "xnli_fr", "xnli_ru", "xnli_tr", "xnli_vi"]}, "generator": {"max_tokens": 16384, "dtype": "bf16", "add_bos": false}}}
|
8k_v2_oracle/0000100000/train_state_00000.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 39, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.00.jsonl", "position": 25510690902, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.00.jsonl", "position": 1170984222, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.00.jsonl", "position": 1095001432, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.00.jsonl", "position": 1173349739, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.00.jsonl", "position": 1563760506, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.00.jsonl", "position": 1315349728, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.00.jsonl", "position": 994808291, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.00.jsonl", "position": 1531554279, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.00.jsonl", "position": 1714811545, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.00.jsonl", "position": 2133265153, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.00.jsonl", "position": 1516544390, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.00.jsonl", "position": 1209319277, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.00.jsonl", "position": 1269412369, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.00.jsonl", "position": 1318310880, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.00.jsonl", "position": 1473242899, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.00.jsonl", "position": 1542289821, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.00.jsonl", "position": 2048051170, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.00.jsonl", "position": 1839620471, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.00.jsonl", "position": 2231499654, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.00.jsonl", "position": 2366139931, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.00.jsonl", "position": 5564240393, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 121214915572945882164515696165463633932, "inc": 252101603063402394885084957393789173453}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 65436358356090165253535605596645261064, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 300569569296678341640414112158566886944, "inc": 257317082376085721142933171929815648017}, "has_uint32": 1, "uinteger": 630457105}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|
8k_v2_oracle/0000100000/train_state_00001.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 713, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.01.jsonl", "position": 25506330918, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.01.jsonl", "position": 1162800601, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.01.jsonl", "position": 1113313989, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.01.jsonl", "position": 1173121221, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.01.jsonl", "position": 1551972570, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.01.jsonl", "position": 1314465136, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.01.jsonl", "position": 988825373, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.01.jsonl", "position": 1541786445, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.01.jsonl", "position": 1725300021, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.01.jsonl", "position": 2146216856, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.01.jsonl", "position": 1511433508, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.01.jsonl", "position": 1206353097, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.01.jsonl", "position": 1274642355, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.01.jsonl", "position": 1326191785, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.01.jsonl", "position": 1473065775, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.01.jsonl", "position": 1540324812, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.01.jsonl", "position": 2051736333, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.01.jsonl", "position": 1859100108, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.01.jsonl", "position": 2230890237, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.01.jsonl", "position": 2368658175, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.01.jsonl", "position": 5520970337, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 237112052744193480960593809166607546258, "inc": 246509925186285949978196491240064802315}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 99184986565622525802566009615340613528, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 298609680158430271867266436931522339460, "inc": 173555323965545256606922338259303677603}, "has_uint32": 1, "uinteger": 1182378492}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|
8k_v2_oracle/0000100000/train_state_00002.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 622, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.02.jsonl", "position": 25521698330, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.02.jsonl", "position": 1173454419, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.02.jsonl", "position": 1095017419, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.02.jsonl", "position": 1173052374, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.02.jsonl", "position": 1554647905, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.02.jsonl", "position": 1310507208, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.02.jsonl", "position": 997503869, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.02.jsonl", "position": 1519576627, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.02.jsonl", "position": 1732394843, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.02.jsonl", "position": 2130461889, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.02.jsonl", "position": 1523203560, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.02.jsonl", "position": 1215379000, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.02.jsonl", "position": 1268331750, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.02.jsonl", "position": 1327750124, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.02.jsonl", "position": 1479862793, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.02.jsonl", "position": 1550782799, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.02.jsonl", "position": 2045993411, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.02.jsonl", "position": 1832053557, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.02.jsonl", "position": 2230188262, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.02.jsonl", "position": 2367419010, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.02.jsonl", "position": 5527274120, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 37116463800457308209542915447374002284, "inc": 234358335530849485425064040311006256713}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 59982094850864077026078452690422651504, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 172206642884315098528897268843500314535, "inc": 319170006889470250209362588441616495209}, "has_uint32": 1, "uinteger": 2183176397}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|
8k_v2_oracle/0000100000/train_state_00003.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 234, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.03.jsonl", "position": 25501266138, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.03.jsonl", "position": 1168748831, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.03.jsonl", "position": 1095372541, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.03.jsonl", "position": 1179856828, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.03.jsonl", "position": 1567158956, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.03.jsonl", "position": 1306183586, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.03.jsonl", "position": 1003168974, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.03.jsonl", "position": 1526852132, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.03.jsonl", "position": 1720439180, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.03.jsonl", "position": 2148776076, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.03.jsonl", "position": 1514098925, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.03.jsonl", "position": 1218557244, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.03.jsonl", "position": 1271053448, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.03.jsonl", "position": 1316633475, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.03.jsonl", "position": 1479890600, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.03.jsonl", "position": 1528047368, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.03.jsonl", "position": 2040423419, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.03.jsonl", "position": 1852132204, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.03.jsonl", "position": 2235334664, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.03.jsonl", "position": 2362371846, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.03.jsonl", "position": 5556517015, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 236806732572538080391822794618375473910, "inc": 148211758571781046255077612135386035203}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 122290128376685416138256870429703862248, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 54691439170624420224489263842165716902, "inc": 115810872492597857501795428972873905393}, "has_uint32": 1, "uinteger": 1237320779}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|
8k_v2_oracle/0000100000/train_state_00004.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 151, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.04.jsonl", "position": 25500155309, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.04.jsonl", "position": 1175492767, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.04.jsonl", "position": 1093323282, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.04.jsonl", "position": 1175684355, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.04.jsonl", "position": 1563154930, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.04.jsonl", "position": 1301225814, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.04.jsonl", "position": 996722098, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.04.jsonl", "position": 1535096495, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.04.jsonl", "position": 1718509776, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.04.jsonl", "position": 2162572307, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.04.jsonl", "position": 1509023855, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.04.jsonl", "position": 1213489749, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.04.jsonl", "position": 1269921895, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.04.jsonl", "position": 1329027772, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.04.jsonl", "position": 1465510969, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.04.jsonl", "position": 1552010075, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.04.jsonl", "position": 2053661003, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.04.jsonl", "position": 1834806028, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.04.jsonl", "position": 2226023517, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.04.jsonl", "position": 2365753815, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.04.jsonl", "position": 5555101996, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 178825458887798052918828327505701288408, "inc": 186633262021180533256729114674950595327}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 179077862242065633534542821933030285628, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 330736315247521707224292038935126153445, "inc": 303111205818808944921858206842105131807}, "has_uint32": 1, "uinteger": 211256137}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|
8k_v2_oracle/0000100000/train_state_00005.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 192, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.05.jsonl", "position": 25529632420, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.05.jsonl", "position": 1165357958, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.05.jsonl", "position": 1095043635, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.05.jsonl", "position": 1180906353, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.05.jsonl", "position": 1552416074, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.05.jsonl", "position": 1305742172, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.05.jsonl", "position": 993500100, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.05.jsonl", "position": 1537328841, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.05.jsonl", "position": 1722734564, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.05.jsonl", "position": 2146642730, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.05.jsonl", "position": 1521686994, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.05.jsonl", "position": 1197208664, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.05.jsonl", "position": 1282105529, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.05.jsonl", "position": 1322650924, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.05.jsonl", "position": 1471946929, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.05.jsonl", "position": 1544349148, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.05.jsonl", "position": 2056669791, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.05.jsonl", "position": 1845062840, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.05.jsonl", "position": 2229992063, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.05.jsonl", "position": 2358514024, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.05.jsonl", "position": 5508153919, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 261938107548139703796721353930524389914, "inc": 329233669073478483697346584247981015037}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 130611963731033540959070872881032316530, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 152527936818944850177382785176910810558, "inc": 47382953940698287647753879262736142901}, "has_uint32": 1, "uinteger": 3675784275}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|
8k_v2_oracle/0000100000/train_state_00006.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 66, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.06.jsonl", "position": 25519750032, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.06.jsonl", "position": 1175793685, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.06.jsonl", "position": 1096897550, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.06.jsonl", "position": 1168846120, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.06.jsonl", "position": 1556108488, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.06.jsonl", "position": 1299539907, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.06.jsonl", "position": 1002639606, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.06.jsonl", "position": 1538151730, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.06.jsonl", "position": 1722720943, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.06.jsonl", "position": 2136656590, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.06.jsonl", "position": 1525470125, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.06.jsonl", "position": 1208916174, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.06.jsonl", "position": 1277639699, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.06.jsonl", "position": 1318529278, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.06.jsonl", "position": 1467204402, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.06.jsonl", "position": 1535658388, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.06.jsonl", "position": 2047381056, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.06.jsonl", "position": 1839216852, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.06.jsonl", "position": 2232937471, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.06.jsonl", "position": 2368296904, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.06.jsonl", "position": 5523936424, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 64957834578961677833094700715262998761, "inc": 95963489890761403814531195999220475639}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 70425334420641733839065737681959740542, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 131915213571014219328634582687476804074, "inc": 72545526324180839152750112646078969085}, "has_uint32": 0, "uinteger": 2783644522}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|
8k_v2_oracle/0000100000/train_state_00007.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 288, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.07.jsonl", "position": 25511328859, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.07.jsonl", "position": 1183025862, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.07.jsonl", "position": 1099119305, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.07.jsonl", "position": 1174868185, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.07.jsonl", "position": 1557424601, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.07.jsonl", "position": 1311040648, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.07.jsonl", "position": 1004249119, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.07.jsonl", "position": 1525983202, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.07.jsonl", "position": 1720701005, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.07.jsonl", "position": 2148808482, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.07.jsonl", "position": 1511698839, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.07.jsonl", "position": 1213022621, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.07.jsonl", "position": 1274953159, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.07.jsonl", "position": 1315208274, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.07.jsonl", "position": 1471407057, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.07.jsonl", "position": 1551626502, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.07.jsonl", "position": 2055718821, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.07.jsonl", "position": 1846805458, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.07.jsonl", "position": 2215729150, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.07.jsonl", "position": 2364814861, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.07.jsonl", "position": 5510660750, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 160427883640929411836621959922306334019, "inc": 53245743019587277358203950863334653629}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_ltr_arb_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ces_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_dan_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_deu_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fas_Arab_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fra_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ind_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ita_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_nld_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_pol_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_por_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_spa_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_swe_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_vie_Latn_8000_v2", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 121024811975299487002309329125656463381, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 0}, "seed": 42, "superset_code_name": "8k_v2", "n_words": 122447, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_ltr_arb_Arab_8000_v2", "ces_Latn": "flexitok/bpe_ltr_ces_Latn_8000_v2", "cmn_Hani": "flexitok/bpe_ltr_cmn_Hani_8000_v2", "dan_Latn": "flexitok/bpe_ltr_dan_Latn_8000_v2", "deu_Latn": "flexitok/bpe_ltr_deu_Latn_8000_v2", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fas_Arab": "flexitok/bpe_ltr_fas_Arab_8000_v2", "fra_Latn": "flexitok/bpe_ltr_fra_Latn_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_8000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "ind_Latn": "flexitok/bpe_ltr_ind_Latn_8000_v2", "ita_Latn": "flexitok/bpe_ltr_ita_Latn_8000_v2", "jpn_Jpan": "flexitok/bpe_ltr_jpn_Jpan_8000_v2", "nld_Latn": "flexitok/bpe_ltr_nld_Latn_8000_v2", "pol_Latn": "flexitok/bpe_ltr_pol_Latn_8000_v2", "por_Latn": "flexitok/bpe_ltr_por_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_8000_v2", "spa_Latn": "flexitok/bpe_ltr_spa_Latn_8000_v2", "swe_Latn": "flexitok/bpe_ltr_swe_Latn_8000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "vie_Latn": "flexitok/bpe_ltr_vie_Latn_8000_v2"}, "suitable_tokenizer_probability": 1.0}}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 256, "rng_state": {"bit_generator": "PCG64", "state": {"state": 239803483826676955776584746976189400951, "inc": 19761753544780285878460645500694854795}, "has_uint32": 1, "uinteger": 513301027}, "batch_size": 4, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
|