Buckets:
| dump_dir: /fsx/craffel/lingua_logs/script_1 | |
| name: script_1 | |
| steps: 100000 | |
| probe_freq: null | |
| seed: 777 | |
| grad_acc_steps: 8 | |
| optim: | |
| lr: 0.001 | |
| weight_decay: 0.1 | |
| epsilon: 1.0e-08 | |
| scheduler: cosine | |
| warmup: 2000 | |
| lr_min_ratio: 1.0e-06 | |
| clip: 1.0 | |
| distributed: | |
| fsdp_type: full_shard | |
| model_dtype: bf16 | |
| matmul_allow_tf32: false | |
| selective_activation_checkpointing: false | |
| tp_size: 1 | |
| compile: true | |
| model: | |
| dim: 2048 | |
| n_layers: 25 | |
| n_heads: 16 | |
| weight_tying: false | |
| vocab_size: 165022 | |
| data: | |
| root_dir: /scratch/craffel/lingua/data/flexitok/ | |
| sources: | |
| fw_edu: 0.4 | |
| dan_Latn: 0.0216582869670702 | |
| swe_Latn: 0.0216359765418466 | |
| vie_Latn: 0.0197485510268674 | |
| hun_Latn: 0.0247194573562308 | |
| fas_Arab: 0.0205634624231076 | |
| tur_Latn: 0.0235455794841729 | |
| ces_Latn: 0.0248024455266208 | |
| arb_Arab: 0.0234323706569333 | |
| ell_Grek: 0.0233670886888026 | |
| ind_Latn: 0.0269322054593488 | |
| nld_Latn: 0.0277796326621489 | |
| pol_Latn: 0.0294120104572311 | |
| por_Latn: 0.0301413168306825 | |
| ita_Latn: 0.0324056371021865 | |
| jpn_Jpan: 0.03553104151369 | |
| fra_Latn: 0.0381835560678536 | |
| spa_Latn: 0.0387222793083669 | |
| deu_Latn: 0.0419925340453022 | |
| cmn_Hani: 0.0454067521384114 | |
| rus_Cyrl: 0.0500198157431261 | |
| batch_size: 4 | |
| prefetch_size: 1024 | |
| seq_len: 4096 | |
| n_views: 2 | |
| load_async: true | |
| add_eos: true | |
| tokenizer: | |
| name: supertokenizer | |
| path: meta-llama/Llama-3.2-1B | |
| seed: 42 | |
| superset_code_name: script_1 | |
| n_words: 165022 | |
| tokenizers: | |
| - name: huggingface | |
| path: flexitok/bpe_script_Arab_16000 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_script_CmJp_16000 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_ltr_ell_Grek_8000_v2 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_ltr_fw_edu_32000_v2 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_ltr_hun_Latn_8000_v2 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_ltr_rus_Cyrl_16000_v2 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_ltr_tur_Latn_8000_v2 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_script_Germ_32000 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_script_Roma_32000 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_script_SEAS_16000 | |
| load_supermapping: true | |
| - name: huggingface | |
| path: flexitok/bpe_script_Slav_16000 | |
| load_supermapping: true | |
| routing: | |
| suitable_tokenizer_probability: 0.9 | |
| source_to_tokenizer: | |
| arb_Arab: flexitok/bpe_script_Arab_16000 | |
| fas_Arab: flexitok/bpe_script_Arab_16000 | |
| cmn_Hani: flexitok/bpe_script_CmJp_16000 | |
| jpn_Jpan: flexitok/bpe_script_CmJp_16000 | |
| ell_Grek: flexitok/bpe_ltr_ell_Grek_8000_v2 | |
| fw_edu: flexitok/bpe_ltr_fw_edu_32000_v2 | |
| hun_Latn: flexitok/bpe_ltr_hun_Latn_8000_v2 | |
| rus_Cyrl: flexitok/bpe_ltr_rus_Cyrl_16000_v2 | |
| tur_Latn: flexitok/bpe_ltr_tur_Latn_8000_v2 | |
| dan_Latn: flexitok/bpe_script_Germ_32000 | |
| deu_Latn: flexitok/bpe_script_Germ_32000 | |
| nld_Latn: flexitok/bpe_script_Germ_32000 | |
| swe_Latn: flexitok/bpe_script_Germ_32000 | |
| fra_Latn: flexitok/bpe_script_Roma_32000 | |
| ita_Latn: flexitok/bpe_script_Roma_32000 | |
| por_Latn: flexitok/bpe_script_Roma_32000 | |
| spa_Latn: flexitok/bpe_script_Roma_32000 | |
| ind_Latn: flexitok/bpe_script_SEAS_16000 | |
| vie_Latn: flexitok/bpe_script_SEAS_16000 | |
| ces_Latn: flexitok/bpe_script_Slav_16000 | |
| pol_Latn: flexitok/bpe_script_Slav_16000 | |
| profiling: | |
| run: true | |
| mem_warmup: 0 | |
| mem_steps: 4 | |
| profile_warmup: 100 | |
| profile_steps: 4 | |
| checkpoint: | |
| path: /fsx/craffel/lingua_logs/checkpoints/script_1 | |
| init_ckpt_path: null | |
| dump: | |
| every: 10000 | |
| keep: -1 | |
| eval: | |
| every: 10000 | |
| keep: -1 | |
| logging: | |
| freq: 1 | |
| async_eval_gpus: 8 | |
| eval: | |
| harness: | |
| tasks: | |
| - hellaswag | |
| - piqa | |
| - arc_easy | |
| - arc_challenge | |
| - include_base_44_arabic | |
| - include_base_44_chinese | |
| - include_base_44_german | |
| - include_base_44_greek | |
| - include_base_44_persian | |
| - include_base_44_french | |
| - include_base_44_hungarian | |
| - include_base_44_indonesian | |
| - include_base_44_italian | |
| - include_base_44_japanese | |
| - include_base_44_dutch | |
| - include_base_44_portuguese | |
| - include_base_44_russian | |
| - include_base_44_spanish | |
| - include_base_44_turkish | |
| - include_base_44_vietnamese | |
| - belebele_arb_Arab | |
| - belebele_ces_Latn | |
| - belebele_zho_Hans | |
| - belebele_dan_Latn | |
| - belebele_deu_Latn | |
| - belebele_ell_Grek | |
| - belebele_pes_Arab | |
| - belebele_fra_Latn | |
| - belebele_hun_Latn | |
| - belebele_ind_Latn | |
| - belebele_ita_Latn | |
| - belebele_jpn_Jpan | |
| - belebele_nld_Latn | |
| - belebele_pol_Latn | |
| - belebele_por_Latn | |
| - belebele_rus_Cyrl | |
| - belebele_spa_Latn | |
| - belebele_swe_Latn | |
| - belebele_tur_Latn | |
| - belebele_vie_Latn | |
| - belebele_eng_Latn | |
| - xnli_ar | |
| - xnli_zh | |
| - xnli_de | |
| - xnli_el | |
| - xnli_en | |
| - xnli_es | |
| - xnli_fr | |
| - xnli_ru | |
| - xnli_tr | |
| - xnli_vi | |
| generator: | |
| max_tokens: 16384 | |
| dtype: bf16 | |
| add_bos: false | |
Xet Storage Details
- Size:
- 5.24 kB
- Xet hash:
- 987ae7d7883627f1a464bccf8cc0469571b93fb2a16d9910942de4042f073dae
·
Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.