MTP-3 / config.yaml
teszenofficial's picture
Upload 6 files
563bb6a verified
data:
augmentation_prob: 0.3
corpus_path: data.jsonl
max_text_length: 3000
min_text_length: 30
use_augmentation: true
validation_split: 0.15
format: "instruction-context-response"
generation:
default_max_tokens: 200
default_repetition_penalty: 1.2
default_temperature: 0.8
default_top_k: 50
default_top_p: 0.95
min_response_length: 30
prompt_format: "### Instrucción:\n{instruction}\n\n### Contexto:\n{context}\n\n### Respuesta:\n"
model:
d_ff: 4096
d_model: 1024
dropout: 0.1
max_seq_len: 2048
n_heads: 16
n_layers: 24
vocab_size: 8000
training:
accumulation_steps: 8
batch_size: 2
epochs: 30
label_smoothing: 0.1
learning_rate: 0.0003
max_grad_norm: 1.0
min_delta: 0.0005
min_lr: 1.0e-06
num_threads: 4
patience: 7
save_every: 3
use_amp: true
use_lr_scheduler: true
warmup_steps: 500
weight_decay: 0.1