Translation
Safetensors
Ukrainian
English
Eval Results (legacy)
radinplaid's picture
Upload folder using huggingface_hub
cdb4ffb verified
model:
d_model: 768
enc_layers: 12
dec_layers: 2
n_heads: 16
ffn_dim: 4096
max_len: 256
vocab_size_src: 32000
vocab_size_tgt: 32000
dropout: 0.05
mlp_type: "standard" # standard or gated
activation: "gelu" # gelu or silu
norm_type: "layernorm" # layernorm or rmsnorm
ff_bias: true
tie_decoder_embeddings: false
layernorm_eps: 1.0e-5
data:
src_lang: "uk"
tgt_lang: "en"
src_dev_path: "dev.ukr"
tgt_dev_path: "dev.eng"
max_tokens_per_batch: 6000
src_spm_nbest_size: -1
src_spm_alpha: 0.5
tgt_spm_nbest_size: 1
tgt_spm_alpha: 1.0
corpora:
- src_file: "train.cleaned.filtered.ukr"
tgt_file: "train.cleaned.filtered.eng"
weight: 1
start_step: 1000
- src_file: "finetranslations.ukr_Cyrl-eng_Latn.ukr_Cyrl"
tgt_file: "finetranslations.ukr_Cyrl-eng_Latn.eng_Latn"
weight: 1
start_step: 0
stop_step: 80000
train:
experiment_name: "uken-base"
lr: 2.5e-3
grad_clip: 0.5
accum_steps: 20
max_checkpoints: 10
precision: "bfloat16"
warmup_steps: 5000
max_steps: 108000
eval_steps: 1000
export:
k: 5