craffel/moto_checkpoints / script_1 /base_config.yaml
craffel's picture
download
raw
5.24 kB
dump_dir: /fsx/craffel/lingua_logs/script_1
name: script_1
steps: 100000
probe_freq: null
seed: 777
grad_acc_steps: 8
optim:
lr: 0.001
weight_decay: 0.1
epsilon: 1.0e-08
scheduler: cosine
warmup: 2000
lr_min_ratio: 1.0e-06
clip: 1.0
distributed:
fsdp_type: full_shard
model_dtype: bf16
matmul_allow_tf32: false
selective_activation_checkpointing: false
tp_size: 1
compile: true
model:
dim: 2048
n_layers: 25
n_heads: 16
weight_tying: false
vocab_size: 165022
data:
root_dir: /scratch/craffel/lingua/data/flexitok/
sources:
fw_edu: 0.4
dan_Latn: 0.0216582869670702
swe_Latn: 0.0216359765418466
vie_Latn: 0.0197485510268674
hun_Latn: 0.0247194573562308
fas_Arab: 0.0205634624231076
tur_Latn: 0.0235455794841729
ces_Latn: 0.0248024455266208
arb_Arab: 0.0234323706569333
ell_Grek: 0.0233670886888026
ind_Latn: 0.0269322054593488
nld_Latn: 0.0277796326621489
pol_Latn: 0.0294120104572311
por_Latn: 0.0301413168306825
ita_Latn: 0.0324056371021865
jpn_Jpan: 0.03553104151369
fra_Latn: 0.0381835560678536
spa_Latn: 0.0387222793083669
deu_Latn: 0.0419925340453022
cmn_Hani: 0.0454067521384114
rus_Cyrl: 0.0500198157431261
batch_size: 4
prefetch_size: 1024
seq_len: 4096
n_views: 2
load_async: true
add_eos: true
tokenizer:
name: supertokenizer
path: meta-llama/Llama-3.2-1B
seed: 42
superset_code_name: script_1
n_words: 165022
tokenizers:
- name: huggingface
path: flexitok/bpe_script_Arab_16000
load_supermapping: true
- name: huggingface
path: flexitok/bpe_script_CmJp_16000
load_supermapping: true
- name: huggingface
path: flexitok/bpe_ltr_ell_Grek_8000_v2
load_supermapping: true
- name: huggingface
path: flexitok/bpe_ltr_fw_edu_32000_v2
load_supermapping: true
- name: huggingface
path: flexitok/bpe_ltr_hun_Latn_8000_v2
load_supermapping: true
- name: huggingface
path: flexitok/bpe_ltr_rus_Cyrl_16000_v2
load_supermapping: true
- name: huggingface
path: flexitok/bpe_ltr_tur_Latn_8000_v2
load_supermapping: true
- name: huggingface
path: flexitok/bpe_script_Germ_32000
load_supermapping: true
- name: huggingface
path: flexitok/bpe_script_Roma_32000
load_supermapping: true
- name: huggingface
path: flexitok/bpe_script_SEAS_16000
load_supermapping: true
- name: huggingface
path: flexitok/bpe_script_Slav_16000
load_supermapping: true
routing:
suitable_tokenizer_probability: 0.9
source_to_tokenizer:
arb_Arab: flexitok/bpe_script_Arab_16000
fas_Arab: flexitok/bpe_script_Arab_16000
cmn_Hani: flexitok/bpe_script_CmJp_16000
jpn_Jpan: flexitok/bpe_script_CmJp_16000
ell_Grek: flexitok/bpe_ltr_ell_Grek_8000_v2
fw_edu: flexitok/bpe_ltr_fw_edu_32000_v2
hun_Latn: flexitok/bpe_ltr_hun_Latn_8000_v2
rus_Cyrl: flexitok/bpe_ltr_rus_Cyrl_16000_v2
tur_Latn: flexitok/bpe_ltr_tur_Latn_8000_v2
dan_Latn: flexitok/bpe_script_Germ_32000
deu_Latn: flexitok/bpe_script_Germ_32000
nld_Latn: flexitok/bpe_script_Germ_32000
swe_Latn: flexitok/bpe_script_Germ_32000
fra_Latn: flexitok/bpe_script_Roma_32000
ita_Latn: flexitok/bpe_script_Roma_32000
por_Latn: flexitok/bpe_script_Roma_32000
spa_Latn: flexitok/bpe_script_Roma_32000
ind_Latn: flexitok/bpe_script_SEAS_16000
vie_Latn: flexitok/bpe_script_SEAS_16000
ces_Latn: flexitok/bpe_script_Slav_16000
pol_Latn: flexitok/bpe_script_Slav_16000
profiling:
run: true
mem_warmup: 0
mem_steps: 4
profile_warmup: 100
profile_steps: 4
checkpoint:
path: /fsx/craffel/lingua_logs/checkpoints/script_1
init_ckpt_path: null
dump:
every: 10000
keep: -1
eval:
every: 10000
keep: -1
logging:
freq: 1
async_eval_gpus: 8
eval:
harness:
tasks:
- hellaswag
- piqa
- arc_easy
- arc_challenge
- include_base_44_arabic
- include_base_44_chinese
- include_base_44_german
- include_base_44_greek
- include_base_44_persian
- include_base_44_french
- include_base_44_hungarian
- include_base_44_indonesian
- include_base_44_italian
- include_base_44_japanese
- include_base_44_dutch
- include_base_44_portuguese
- include_base_44_russian
- include_base_44_spanish
- include_base_44_turkish
- include_base_44_vietnamese
- belebele_arb_Arab
- belebele_ces_Latn
- belebele_zho_Hans
- belebele_dan_Latn
- belebele_deu_Latn
- belebele_ell_Grek
- belebele_pes_Arab
- belebele_fra_Latn
- belebele_hun_Latn
- belebele_ind_Latn
- belebele_ita_Latn
- belebele_jpn_Jpan
- belebele_nld_Latn
- belebele_pol_Latn
- belebele_por_Latn
- belebele_rus_Cyrl
- belebele_spa_Latn
- belebele_swe_Latn
- belebele_tur_Latn
- belebele_vie_Latn
- belebele_eng_Latn
- xnli_ar
- xnli_zh
- xnli_de
- xnli_el
- xnli_en
- xnli_es
- xnli_fr
- xnli_ru
- xnli_tr
- xnli_vi
generator:
max_tokens: 16384
dtype: bf16
add_bos: false

Xet Storage Details

Size:
5.24 kB
·
Xet hash:
987ae7d7883627f1a464bccf8cc0469571b93fb2a16d9910942de4042f073dae

Xet efficiently stores files, intelligently splitting them into unique chunks and accelerating uploads and downloads. More info.