File size: 780 Bytes
681909f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 | model:
name: SOVYN-300M-Cortex
vocab_size: 32000
max_seq_len: 512
n_layers: 21
hidden_size: 1024
n_heads: 16
n_kv_heads: 8
ffn_size: 3584
dropout: 0.0
rope_theta: 10000.0
tie_embeddings: true
tokenizer:
path: tokenizer_300m/sovyn.model
training:
train_path: data/sovyn_300m_train.jsonl
output_dir: checkpoints
checkpoint_prefix: sovyn_300m
seed: 43
device: cuda
dtype: bf16
max_steps: 8000
batch_size: 2
grad_accum_steps: 16
learning_rate: 0.00005
weight_decay: 0.1
warmup_steps: 800
max_grad_norm: 1.0
log_every: 100
save_every: 1000
save_total_limit: 1
save_optimizer: false
save_dtype: bf16
save_step_checkpoints: false
delete_before_save: true
generation:
max_new_tokens: 96
temperature: 0.75
top_k: 40
|