{"name": "script_1", "dump_dir": "/fsx/craffel/lingua_logs/script_1", "seed": 777, "grad_acc_steps": 8, "gc_collect_freq": 1000, "probe_freq": null, "steps": 100000, "data": {"root_dir": "/scratch/craffel/lingua/data/flexitok/", "sources": {"fw_edu": 0.4, "dan_Latn": 0.0216582869670702, "swe_Latn": 0.0216359765418466, "vie_Latn": 0.0197485510268674, "hun_Latn": 0.0247194573562308, "fas_Arab": 0.0205634624231076, "tur_Latn": 0.0235455794841729, "ces_Latn": 0.0248024455266208, "arb_Arab": 0.0234323706569333, "ell_Grek": 0.0233670886888026, "ind_Latn": 0.0269322054593488, "nld_Latn": 0.0277796326621489, "pol_Latn": 0.0294120104572311, "por_Latn": 0.0301413168306825, "ita_Latn": 0.0324056371021865, "jpn_Jpan": 0.03553104151369, "fra_Latn": 0.0381835560678536, "spa_Latn": 0.0387222793083669, "deu_Latn": 0.0419925340453022, "cmn_Hani": 0.0454067521384114, "rus_Cyrl": 0.0500198157431261}, "batch_size": 4, "seq_len": 4096, "n_views": 2, "seed": 42, "add_bos": true, "add_eos": true, "load_async": true, "prefetch_size": 1024, "tokenizer": {"name": "supertokenizer", "path": "meta-llama/Llama-3.2-1B", "tokenizers": [{"name": "huggingface", "path": "flexitok/bpe_script_Arab_16000", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_script_CmJp_16000", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_ell_Grek_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_fw_edu_32000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_hun_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_rus_Cyrl_16000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_ltr_tur_Latn_8000_v2", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_script_Germ_32000", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_script_Roma_32000", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_script_SEAS_16000", "load_supermapping": true}, {"name": "huggingface", "path": "flexitok/bpe_script_Slav_16000", "load_supermapping": true}], "load_supermapping": false, "dropout": 0.0, "seed": 42, "superset_code_name": "script_1", "n_words": 165022}, "routing": {"source_to_tokenizer": {"arb_Arab": "flexitok/bpe_script_Arab_16000", "fas_Arab": "flexitok/bpe_script_Arab_16000", "cmn_Hani": "flexitok/bpe_script_CmJp_16000", "jpn_Jpan": "flexitok/bpe_script_CmJp_16000", "ell_Grek": "flexitok/bpe_ltr_ell_Grek_8000_v2", "fw_edu": "flexitok/bpe_ltr_fw_edu_32000_v2", "hun_Latn": "flexitok/bpe_ltr_hun_Latn_8000_v2", "rus_Cyrl": "flexitok/bpe_ltr_rus_Cyrl_16000_v2", "tur_Latn": "flexitok/bpe_ltr_tur_Latn_8000_v2", "dan_Latn": "flexitok/bpe_script_Germ_32000", "deu_Latn": "flexitok/bpe_script_Germ_32000", "nld_Latn": "flexitok/bpe_script_Germ_32000", "swe_Latn": "flexitok/bpe_script_Germ_32000", "fra_Latn": "flexitok/bpe_script_Roma_32000", "ita_Latn": "flexitok/bpe_script_Roma_32000", "por_Latn": "flexitok/bpe_script_Roma_32000", "spa_Latn": "flexitok/bpe_script_Roma_32000", "ind_Latn": "flexitok/bpe_script_SEAS_16000", "vie_Latn": "flexitok/bpe_script_SEAS_16000", "ces_Latn": "flexitok/bpe_script_Slav_16000", "pol_Latn": "flexitok/bpe_script_Slav_16000"}, "task_to_tokenizer": {}, "suitable_tokenizer_probability": 0.9}}, "optim": {"lr": 0.001, "weight_decay": 0.1, "epsilon": 1e-08, "beta1": 0.9, "beta2": 0.95, "clip": 1.0, "scheduler": "cosine", "warmup": 2000, "lr_min_ratio": 1e-06, "cycle_length": 1.0, "cosine_theta": 1.0, "annealing_step": 1000, "decay_fraction": 0.1, "exp_factor": 0.5}, "model": {"dim": 2048, "n_layers": 25, "head_dim": null, "n_heads": 16, "n_kv_heads": null, "ffn_dim_multiplier": null, "multiple_of": 256, "norm_eps": 1e-05, "rope_theta": 10000.0, "init_base_std": null, "init_std_factor": "disabled", "max_seqlen": 4096, "seed": 42, "vocab_size": 165022, "weight_tying": false, "sliding_window": null, "use_factorized_embeddings": false, "factorized_embedding_dim": 0}, "distributed": {"dp_shard": 1, "dp_replicate": 8, "tp_size": 1, "selective_activation_checkpointing": false, "compile": true, "fsdp_type": "full_shard", "model_dtype": "bf16", "float8_recipe": null, "float8_filter": "layers\\.[0-9]+\\.", "matmul_allow_tf32": false, "detect_anomaly": false, "compile_cache_size_limit": 8, "spawn_method": "forkserver"}, "env": {"MKL_SERVICE_FORCE_INTEL": "GNU", "OMP_NUM_THREADS": "1", "MKL_NUM_THREADS": "1", "ENABLE_INTRA_NODE_COMM": "1", "TORCH_NCCL_AVOID_RECORD_STREAMS": "1", "NCCL_IB_TIMEOUT": "22", "NCCL_DEBUG": "INFO", "TORCH_NCCL_ASYNC_ERROR_HANDLING": "1"}, "checkpoint": {"dump": {"every": 10000, "keep": -1}, "eval": {"every": 10000, "keep": -1}, "path": "/fsx/craffel/lingua_logs/checkpoints/script_1", "init_ckpt_path": null, "load_init_optimizer_state": false, "save_init_ckpt": false}, "profiling": {"run": true, "trace_folder": "profiling", "mem_warmup": 0, "mem_steps": 4, "profile_warmup": 100, "profile_steps": 4}, "logging": {"freq": 1, "acc_freq": null, "wandb": null}, "async_eval_gpus": 8, "eval": {"harness": {"tasks": ["hellaswag", "piqa", "arc_easy", "arc_challenge", "include_base_44_arabic", "include_base_44_chinese", "include_base_44_german", "include_base_44_greek", "include_base_44_persian", "include_base_44_french", "include_base_44_hungarian", "include_base_44_indonesian", "include_base_44_italian", "include_base_44_japanese", "include_base_44_dutch", "include_base_44_polish", "include_base_44_portuguese", "include_base_44_russian", "include_base_44_spanish", "include_base_44_turkish", "include_base_44_vietnamese", "belebele_arb_Arab", "belebele_ces_Latn", "belebele_zho_Hans", "belebele_dan_Latn", "belebele_deu_Latn", "belebele_ell_Grek", "belebele_pes_Arab", "belebele_fra_Latn", "belebele_hun_Latn", "belebele_ind_Latn", "belebele_ita_Latn", "belebele_jpn_Jpan", "belebele_nld_Latn", "belebele_pol_Latn", "belebele_por_Latn", "belebele_rus_Cyrl", "belebele_spa_Latn", "belebele_swe_Latn", "belebele_tur_Latn", "belebele_vie_Latn", "belebele_eng_Latn", "xnli_ar", "xnli_zh", "xnli_de", "xnli_el", "xnli_en", "xnli_es", "xnli_fr", "xnli_ru", "xnli_tr", "xnli_vi"]}, "generator": {"max_tokens": 16384, "dtype": "bf16", "add_bos": false}}}