craffel HF Staff commited on
Commit
5df6beb
·
verified ·
1 Parent(s): b98b098

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -103,3 +103,12 @@ llama_43k/0000043432/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
103
  llama_43k/0000043432/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
104
  llama_43k/0000043432/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
105
  flexitok_superset_albert_w_xglm/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
103
  llama_43k/0000043432/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
104
  llama_43k/0000043432/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
105
  flexitok_superset_albert_w_xglm/metrics.jsonl filter=lfs diff=lfs merge=lfs -text
106
+ flexitok_superset_albert_w_xglm/0000100000/.metadata filter=lfs diff=lfs merge=lfs -text
107
+ flexitok_superset_albert_w_xglm/0000100000/__0_0.distcp filter=lfs diff=lfs merge=lfs -text
108
+ flexitok_superset_albert_w_xglm/0000100000/__1_0.distcp filter=lfs diff=lfs merge=lfs -text
109
+ flexitok_superset_albert_w_xglm/0000100000/__2_0.distcp filter=lfs diff=lfs merge=lfs -text
110
+ flexitok_superset_albert_w_xglm/0000100000/__3_0.distcp filter=lfs diff=lfs merge=lfs -text
111
+ flexitok_superset_albert_w_xglm/0000100000/__4_0.distcp filter=lfs diff=lfs merge=lfs -text
112
+ flexitok_superset_albert_w_xglm/0000100000/__5_0.distcp filter=lfs diff=lfs merge=lfs -text
113
+ flexitok_superset_albert_w_xglm/0000100000/__6_0.distcp filter=lfs diff=lfs merge=lfs -text
114
+ flexitok_superset_albert_w_xglm/0000100000/__7_0.distcp filter=lfs diff=lfs merge=lfs -text
flexitok_superset_albert_w_xglm/0000100000/.metadata ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cfa403fc3417917abc28bb84df5d9fa124e202c59eb475db655cd06629d8966
3
+ size 1158312
flexitok_superset_albert_w_xglm/0000100000/__0_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5822f19276c5db547afe4e0e12755cfba6fefecd192d94bcb8d28ad13493611
3
+ size 3238920312
flexitok_superset_albert_w_xglm/0000100000/__1_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe051c31daf40253e64a7a9e251f29ca83bbd002c2da2fc89d9aa4074f05dc25
3
+ size 3238969236
flexitok_superset_albert_w_xglm/0000100000/__2_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b89fffcf759e80e1d32bd098c8af838c294dee9ea0fac420a4601b40355915a9
3
+ size 3238960116
flexitok_superset_albert_w_xglm/0000100000/__3_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fa9fe6a41400fde85a710973dc2321c1b61454a2bfc1245f1c6002b50862697
3
+ size 3238960116
flexitok_superset_albert_w_xglm/0000100000/__4_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ace22ff4e4d3fa05e987de6d52a9532e837ca83200b550cc0a21057d854da61
3
+ size 3238960116
flexitok_superset_albert_w_xglm/0000100000/__5_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f130c5a145e3e020e3f598254afedd9329f3d3401c214e7ab6141cba7dd86fd
3
+ size 3238960116
flexitok_superset_albert_w_xglm/0000100000/__6_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e6c33f85466e3bd9a32e206f7559bf2b934bc4c5d1773309f1d64d992e8475e
3
+ size 3238960116
flexitok_superset_albert_w_xglm/0000100000/__7_0.distcp ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7af4b6771859b5d260d59aeab2bb6ee5b01907c927b5693963296f6f0de85e
3
+ size 3238888664
flexitok_superset_albert_w_xglm/0000100000/consolidated/consolidated.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3f0f86039308819fb11405dae1a518dce5d2259c52896abc9514622bae838d9
3
+ size 25905065318
flexitok_superset_albert_w_xglm/0000100000/consolidated/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "flexitok_superset_albert_w_xglm", "dump_dir": "/fsx/craffel/lingua_logs/flexitok_superset_albert_w_xglm", "seed": 777, "grad_acc_steps": 16, "gc_collect_freq": 1000, "probe_freq": null, "steps": 100000, "data": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "batch_size": 2, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "load_supermapping": false, "dropout": 0.0, "seed": 42}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 851586, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": true, "factorized_embedding_dim": 512}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 10000, "keep": -1}, "eval": {"every": 10000, "keep": -1}, "path": "/fsx/craffel/lingua_logs/flexitok_superset_albert_w_xglm/checkpoints", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_arabic", "include_base_44_chinese", "include_base_44_german", "include_base_44_greek", "include_base_44_persian", "include_base_44_french", "include_base_44_hungarian", "include_base_44_indonesian", "include_base_44_italian", "include_base_44_japanese", "include_base_44_dutch", "include_base_44_polish", "include_base_44_portuguese", "include_base_44_russian", "include_base_44_spanish", "include_base_44_turkish", "include_base_44_vietnamese", "belebele_arb_Arab", "belebele_ces_Latn", "belebele_zho_Hans", "belebele_dan_Latn", "belebele_deu_Latn", "belebele_ell_Grek", "belebele_pes_Arab", "belebele_fra_Latn", "belebele_hun_Latn", "belebele_ind_Latn", "belebele_ita_Latn", "belebele_jpn_Jpan", "belebele_nld_Latn", "belebele_pol_Latn", "belebele_por_Latn", "belebele_rus_Cyrl", "belebele_spa_Latn", "belebele_swe_Latn", "belebele_tur_Latn", "belebele_vie_Latn", "belebele_eng_Latn", "xnli_ar", "xnli_zh", "xnli_de", "xnli_el", "xnli_en", "xnli_es", "xnli_fr", "xnli_hi", "xnli_ru", "xnli_tr", "xnli_vi"]}, "generator": {"max_tokens": 16384, "dtype": "bf16", "add_bos": false}}}
flexitok_superset_albert_w_xglm/0000100000/params.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"name": "flexitok_superset_albert_w_xglm", "dump_dir": "/fsx/craffel/lingua_logs/flexitok_superset_albert_w_xglm", "seed": 777, "grad_acc_steps": 16, "gc_collect_freq": 1000, "probe_freq": null, "steps": 100000, "data": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "batch_size": 2, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "load_supermapping": false, "dropout": 0.0, "seed": 42}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 851586, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": true, "factorized_embedding_dim": 512}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 10000, "keep": -1}, "eval": {"every": 10000, "keep": -1}, "path": "/fsx/craffel/lingua_logs/flexitok_superset_albert_w_xglm/checkpoints", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_arabic", "include_base_44_chinese", "include_base_44_german", "include_base_44_greek", "include_base_44_persian", "include_base_44_french", "include_base_44_hungarian", "include_base_44_indonesian", "include_base_44_italian", "include_base_44_japanese", "include_base_44_dutch", "include_base_44_polish", "include_base_44_portuguese", "include_base_44_russian", "include_base_44_spanish", "include_base_44_turkish", "include_base_44_vietnamese", "belebele_arb_Arab", "belebele_ces_Latn", "belebele_zho_Hans", "belebele_dan_Latn", "belebele_deu_Latn", "belebele_ell_Grek", "belebele_pes_Arab", "belebele_fra_Latn", "belebele_hun_Latn", "belebele_ind_Latn", "belebele_ita_Latn", "belebele_jpn_Jpan", "belebele_nld_Latn", "belebele_pol_Latn", "belebele_por_Latn", "belebele_rus_Cyrl", "belebele_spa_Latn", "belebele_swe_Latn", "belebele_tur_Latn", "belebele_vie_Latn", "belebele_eng_Latn", "xnli_ar", "xnli_zh", "xnli_de", "xnli_el", "xnli_en", "xnli_es", "xnli_fr", "xnli_hi", "xnli_ru", "xnli_tr", "xnli_vi"]}, "generator": {"max_tokens": 16384, "dtype": "bf16", "add_bos": false}}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00000.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 1563, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.00.jsonl", "position": 18698619854, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.00.jsonl", "position": 857018948, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.00.jsonl", "position": 805063020, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.00.jsonl", "position": 858591567, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.00.jsonl", "position": 1141754913, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.00.jsonl", "position": 964370543, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.00.jsonl", "position": 728951625, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.00.jsonl", "position": 1121483471, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.00.jsonl", "position": 1255131326, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.00.jsonl", "position": 1567164257, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.00.jsonl", "position": 1113293513, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.00.jsonl", "position": 886144231, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.00.jsonl", "position": 935079705, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.00.jsonl", "position": 966147759, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.00.jsonl", "position": 1083910192, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.00.jsonl", "position": 1133240358, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.00.jsonl", "position": 1496900638, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.00.jsonl", "position": 1345047914, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.00.jsonl", "position": 1638733099, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.00.jsonl", "position": 1736662113, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.00.jsonl", "position": 4069567402, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 148175808342448346810032439232211321287, "inc": 252101603063402394885084957393789173453}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 225834081751462610901494769002933056278, "inc": 332724090758049132448979897138935081983}, "has_uint32": 1, "uinteger": 251590379}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 18548463910265668932787503313221070147, "inc": 257317082376085721142933171929815648017}, "has_uint32": 1, "uinteger": 1274552312}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00001.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 556, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.01.jsonl", "position": 18713826236, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.01.jsonl", "position": 852958914, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.01.jsonl", "position": 814162482, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.01.jsonl", "position": 859451196, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.01.jsonl", "position": 1138805731, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.01.jsonl", "position": 962562973, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.01.jsonl", "position": 724216402, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.01.jsonl", "position": 1132186441, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.01.jsonl", "position": 1266845331, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.01.jsonl", "position": 1575071120, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.01.jsonl", "position": 1110893678, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.01.jsonl", "position": 883518750, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.01.jsonl", "position": 937597523, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.01.jsonl", "position": 971723319, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.01.jsonl", "position": 1076785258, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.01.jsonl", "position": 1127681920, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.01.jsonl", "position": 1507732422, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.01.jsonl", "position": 1361957857, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.01.jsonl", "position": 1632526672, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.01.jsonl", "position": 1737249115, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.01.jsonl", "position": 4054190350, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 313209315602092549383179631086255333357, "inc": 246509925186285949978196491240064802315}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 324137256090941158469828453310884331875, "inc": 332724090758049132448979897138935081983}, "has_uint32": 1, "uinteger": 773941105}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 186345142843187532809048324084302708840, "inc": 173555323965545256606922338259303677603}, "has_uint32": 1, "uinteger": 2837092787}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00002.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 3529, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.02.jsonl", "position": 18721632226, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.02.jsonl", "position": 860988650, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.02.jsonl", "position": 799442839, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.02.jsonl", "position": 860599687, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.02.jsonl", "position": 1139625324, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.02.jsonl", "position": 958401937, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.02.jsonl", "position": 732613077, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.02.jsonl", "position": 1114316432, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.02.jsonl", "position": 1272700174, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.02.jsonl", "position": 1561471361, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.02.jsonl", "position": 1115396210, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.02.jsonl", "position": 887512032, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.02.jsonl", "position": 927715349, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.02.jsonl", "position": 973526340, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.02.jsonl", "position": 1083072365, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.02.jsonl", "position": 1137681498, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.02.jsonl", "position": 1499476937, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.02.jsonl", "position": 1349038147, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.02.jsonl", "position": 1643386200, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.02.jsonl", "position": 1732534362, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.02.jsonl", "position": 4046933220, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 291072927821394655449509042881537487324, "inc": 234358335530849485425064040311006256713}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 155969097026270253670911737888206457412, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 584777066}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 50530194496374023900897257149872255690, "inc": 319170006889470250209362588441616495209}, "has_uint32": 1, "uinteger": 520073158}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00003.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 2114, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.03.jsonl", "position": 18705722682, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.03.jsonl", "position": 854109436, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.03.jsonl", "position": 801570041, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.03.jsonl", "position": 866097950, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.03.jsonl", "position": 1148542736, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.03.jsonl", "position": 953381954, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.03.jsonl", "position": 736271947, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.03.jsonl", "position": 1121727189, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.03.jsonl", "position": 1263899499, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.03.jsonl", "position": 1568335705, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.03.jsonl", "position": 1111024524, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.03.jsonl", "position": 896469129, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.03.jsonl", "position": 931113442, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.03.jsonl", "position": 964046149, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.03.jsonl", "position": 1084443475, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.03.jsonl", "position": 1125038418, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.03.jsonl", "position": 1498765096, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.03.jsonl", "position": 1358872320, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.03.jsonl", "position": 1643781899, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.03.jsonl", "position": 1731609575, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.03.jsonl", "position": 4102118913, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 96679764725759549909806070212961394786, "inc": 148211758571781046255077612135386035203}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 238558826228714993424145844441835821534, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 1183322385}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 299661575736282925866863483046129926629, "inc": 115810872492597857501795428972873905393}, "has_uint32": 1, "uinteger": 586360703}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00004.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 77017, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.04.jsonl", "position": 18681480625, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.04.jsonl", "position": 865161290, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.04.jsonl", "position": 800522995, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.04.jsonl", "position": 860624114, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.04.jsonl", "position": 1150804611, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.04.jsonl", "position": 952562386, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.04.jsonl", "position": 729927877, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.04.jsonl", "position": 1126787989, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.04.jsonl", "position": 1258096307, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.04.jsonl", "position": 1582570635, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.04.jsonl", "position": 1102526591, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.04.jsonl", "position": 893268956, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.04.jsonl", "position": 934773852, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.04.jsonl", "position": 972196146, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.04.jsonl", "position": 1072704537, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.04.jsonl", "position": 1140554415, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.04.jsonl", "position": 1511428904, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.04.jsonl", "position": 1341687718, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.04.jsonl", "position": 1626619521, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.04.jsonl", "position": 1733913889, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.04.jsonl", "position": 4071454941, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 41200690399623350389895004150063081030, "inc": 186633262021180533256729114674950595327}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 115400411489221215583568774835398883067, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 2250154451}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 81625012471786046827079063129240365030, "inc": 303111205818808944921858206842105131807}, "has_uint32": 0, "uinteger": 3442835010}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00005.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 3818, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.05.jsonl", "position": 18735684884, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.05.jsonl", "position": 855758177, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.05.jsonl", "position": 805230779, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.05.jsonl", "position": 865975940, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.05.jsonl", "position": 1139148560, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.05.jsonl", "position": 954158915, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.05.jsonl", "position": 728028670, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.05.jsonl", "position": 1126496064, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.05.jsonl", "position": 1268807034, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.05.jsonl", "position": 1583565806, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.05.jsonl", "position": 1114126414, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.05.jsonl", "position": 878345774, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.05.jsonl", "position": 942690263, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.05.jsonl", "position": 969223589, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.05.jsonl", "position": 1078916648, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.05.jsonl", "position": 1132341386, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.05.jsonl", "position": 1504422561, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.05.jsonl", "position": 1357914214, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.05.jsonl", "position": 1632469661, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.05.jsonl", "position": 1731104702, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.05.jsonl", "position": 4052404023, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 323925957378615211990604149014042344515, "inc": 329233669073478483697346584247981015037}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 99276483625251153229459379272765619712, "inc": 332724090758049132448979897138935081983}, "has_uint32": 1, "uinteger": 3090549865}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 82877314001698292157004203937872167477, "inc": 47382953940698287647753879262736142901}, "has_uint32": 0, "uinteger": 1448169871}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00006.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 9173, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.06.jsonl", "position": 18717989411, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.06.jsonl", "position": 864621600, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.06.jsonl", "position": 809781894, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.06.jsonl", "position": 859956896, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.06.jsonl", "position": 1139881758, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.06.jsonl", "position": 956783652, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.06.jsonl", "position": 734206700, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.06.jsonl", "position": 1127190328, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.06.jsonl", "position": 1263165305, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.06.jsonl", "position": 1565317964, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.06.jsonl", "position": 1119560346, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.06.jsonl", "position": 882858501, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.06.jsonl", "position": 931576602, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.06.jsonl", "position": 968262481, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.06.jsonl", "position": 1078946266, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.06.jsonl", "position": 1122764819, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.06.jsonl", "position": 1502532247, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.06.jsonl", "position": 1346671714, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.06.jsonl", "position": 1639367760, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.06.jsonl", "position": 1736379989, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.06.jsonl", "position": 4044531393, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 85481619834468246915071693351973944896, "inc": 95963489890761403814531195999220475639}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 87363988073630502478330763910195893790, "inc": 332724090758049132448979897138935081983}, "has_uint32": 1, "uinteger": 3276602516}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 333370399872005440711554408543586094564, "inc": 72545526324180839152750112646078969085}, "has_uint32": 0, "uinteger": 338707008}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}
flexitok_superset_albert_w_xglm/0000100000/train_state_00007.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"step": 100000, "acc_step": 0, "data_loader_state": {"it_state": {"start_token": 2769, "it_state": {"it_state": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "source_to_state": {"fw_edu": {"file_path": "/scratch/craffel/lingua/data/flexitok/fw_edu/fineweb_edu_100bt.chunk.07.jsonl", "position": 18701030539, "block_size": 1, "offset": 0, "current_iter": 0}, "dan_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/dan_Latn/fineweb_2_hq.dan_Latn.chunk.07.jsonl", "position": 862317455, "block_size": 1, "offset": 0, "current_iter": 0}, "swe_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/swe_Latn/fineweb_2_hq.swe_Latn.chunk.07.jsonl", "position": 808705353, "block_size": 1, "offset": 0, "current_iter": 0}, "vie_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/vie_Latn/fineweb_2_hq.vie_Latn.chunk.07.jsonl", "position": 862602964, "block_size": 1, "offset": 0, "current_iter": 0}, "hun_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/hun_Latn/fineweb_2_hq.hun_Latn.chunk.07.jsonl", "position": 1139176722, "block_size": 1, "offset": 0, "current_iter": 0}, "fas_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/fas_Arab/fineweb_2_hq.fas_Arab.chunk.07.jsonl", "position": 964383454, "block_size": 1, "offset": 0, "current_iter": 0}, "tur_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/tur_Latn/fineweb_2_hq.tur_Latn.chunk.07.jsonl", "position": 737389058, "block_size": 1, "offset": 0, "current_iter": 0}, "ces_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ces_Latn/fineweb_2_hq.ces_Latn.chunk.07.jsonl", "position": 1117779893, "block_size": 1, "offset": 0, "current_iter": 0}, "arb_Arab": {"file_path": "/scratch/craffel/lingua/data/flexitok/arb_Arab/fineweb_2_hq.arb_Arab.chunk.07.jsonl", "position": 1268795173, "block_size": 1, "offset": 0, "current_iter": 0}, "ell_Grek": {"file_path": "/scratch/craffel/lingua/data/flexitok/ell_Grek/fineweb_2_hq.ell_Grek.chunk.07.jsonl", "position": 1582407418, "block_size": 1, "offset": 0, "current_iter": 0}, "ind_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ind_Latn/fineweb_2_hq.ind_Latn.chunk.07.jsonl", "position": 1107790464, "block_size": 1, "offset": 0, "current_iter": 0}, "nld_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/nld_Latn/fineweb_2_hq.nld_Latn.chunk.07.jsonl", "position": 888299623, "block_size": 1, "offset": 0, "current_iter": 0}, "pol_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/pol_Latn/fineweb_2_hq.pol_Latn.chunk.07.jsonl", "position": 936764740, "block_size": 1, "offset": 0, "current_iter": 0}, "por_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/por_Latn/fineweb_2_hq.por_Latn.chunk.07.jsonl", "position": 961682100, "block_size": 1, "offset": 0, "current_iter": 0}, "ita_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/ita_Latn/fineweb_2_hq.ita_Latn.chunk.07.jsonl", "position": 1079238792, "block_size": 1, "offset": 0, "current_iter": 0}, "jpn_Jpan": {"file_path": "/scratch/craffel/lingua/data/flexitok/jpn_Jpan/fineweb_2_hq.jpn_Jpan.chunk.07.jsonl", "position": 1138697496, "block_size": 1, "offset": 0, "current_iter": 0}, "fra_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/fra_Latn/fineweb_2_hq.fra_Latn.chunk.07.jsonl", "position": 1508184932, "block_size": 1, "offset": 0, "current_iter": 0}, "spa_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/spa_Latn/fineweb_2_hq.spa_Latn.chunk.07.jsonl", "position": 1357616042, "block_size": 1, "offset": 0, "current_iter": 0}, "deu_Latn": {"file_path": "/scratch/craffel/lingua/data/flexitok/deu_Latn/fineweb_2_hq.deu_Latn.chunk.07.jsonl", "position": 1626467934, "block_size": 1, "offset": 0, "current_iter": 0}, "cmn_Hani": {"file_path": "/scratch/craffel/lingua/data/flexitok/cmn_Hani/fineweb_2_hq.cmn_Hani.chunk.07.jsonl", "position": 1737624127, "block_size": 1, "offset": 0, "current_iter": 0}, "rus_Cyrl": {"file_path": "/scratch/craffel/lingua/data/flexitok/rus_Cyrl/fineweb_2_hq.rus_Cyrl.chunk.07.jsonl", "position": 4030189856, "block_size": 1, "offset": 0, "current_iter": 0}}, "rng_state": {"bit_generator": "PCG64", "state": {"state": 276744278343203400108210441230982120882, "inc": 53245743019587277358203950863334653629}, "has_uint32": 0, "uinteger": 0}}, "add_bos": true, "add_eos": true, "name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/reindexed-CohereLabs-aya-expanse-8b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-bigscience-bloom", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-common-pile-comma-v0.1", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-gemma-2-2b", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-gpt2", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-meta-llama-Llama-3.2-1B", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-google-bert-bert-base-multilingual-cased", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-microsoft-Phi-3-mini-4k-instruct", "load_supermapping": false}, {"name": "huggingface", "path": "flexitok/reindexed-Qwen-Qwen3-8B", "load_supermapping": false}, {"name": "huggingface", "path": "facebook/xglm-564M", "load_supermapping": true}, {"name": "huggingface", "path": "google/byt5-small", "load_supermapping": true}, {"name": "tiktoken", "path": "gpt-4o", "load_supermapping": true}, {"name": "tekken", "path": "tekken", "load_supermapping": true}, {"name": "tokenmonster", "path": "englishcode-32000-consistent-v1", "load_supermapping": true}], "dropout": 0.0, "rng_state": {"bit_generator": "PCG64", "state": {"state": 206193056749194630121881000864981655595, "inc": 332724090758049132448979897138935081983}, "has_uint32": 0, "uinteger": 4253535371}, "seed": 42}, "output_seq_len": 4096, "n_views": 2}, "seq_idx": 512, "rng_state": {"bit_generator": "PCG64", "state": {"state": 225591492178849012690493284775251617576, "inc": 19761753544780285878460645500694854795}, "has_uint32": 1, "uinteger": 416179249}, "batch_size": 2, "prefetch_size": 1024}, "scheduler": {"base_lrs": [0.001], "last_epoch": 100000, "verbose": false, "_step_count": 100001, "_get_lr_called_within_step": false, "_last_lr": [0.001], "lr_lambdas": [{}]}}