| data: | |
| augmentation_prob: 0.3 | |
| corpus_path: data.jsonl | |
| max_text_length: 3000 | |
| min_text_length: 30 | |
| use_augmentation: true | |
| validation_split: 0.15 | |
| format: "instruction-context-response" | |
| generation: | |
| default_max_tokens: 200 | |
| default_repetition_penalty: 1.2 | |
| default_temperature: 0.8 | |
| default_top_k: 50 | |
| default_top_p: 0.95 | |
| min_response_length: 30 | |
| prompt_format: "### Instrucción:\n{instruction}\n\n### Contexto:\n{context}\n\n### Respuesta:\n" | |
| model: | |
| d_ff: 4096 | |
| d_model: 1024 | |
| dropout: 0.1 | |
| max_seq_len: 2048 | |
| n_heads: 16 | |
| n_layers: 24 | |
| vocab_size: 8000 | |
| training: | |
| accumulation_steps: 8 | |
| batch_size: 2 | |
| epochs: 30 | |
| label_smoothing: 0.1 | |
| learning_rate: 0.0003 | |
| max_grad_norm: 1.0 | |
| min_delta: 0.0005 | |
| min_lr: 1.0e-06 | |
| num_threads: 4 | |
| patience: 7 | |
| save_every: 3 | |
| use_amp: true | |
| use_lr_scheduler: true | |
| warmup_steps: 500 | |
| weight_decay: 0.1 |