data: augmentation_prob: 0.3 corpus_path: data.jsonl max_text_length: 3000 min_text_length: 30 use_augmentation: true validation_split: 0.15 format: "instruction-context-response" generation: default_max_tokens: 200 default_repetition_penalty: 1.2 default_temperature: 0.8 default_top_k: 50 default_top_p: 0.95 min_response_length: 30 prompt_format: "### Instrucción:\n{instruction}\n\n### Contexto:\n{context}\n\n### Respuesta:\n" model: d_ff: 4096 d_model: 1024 dropout: 0.1 max_seq_len: 2048 n_heads: 16 n_layers: 24 vocab_size: 8000 training: accumulation_steps: 8 batch_size: 2 epochs: 30 label_smoothing: 0.1 learning_rate: 0.0003 max_grad_norm: 1.0 min_delta: 0.0005 min_lr: 1.0e-06 num_threads: 4 patience: 7 save_every: 3 use_amp: true use_lr_scheduler: true warmup_steps: 500 weight_decay: 0.1