| { | |
| "model_dir": "./training_outputs/EvaGPT-German-0.7B_2026_7_2026-01-08_17-33-14", | |
| "csv_path": "./datasets/output-100-3k.csv", | |
| "device": "cuda", | |
| "train_mode": "lora", | |
| "learning_rate": 0.0002, | |
| "lr_schedule": "cosine", | |
| "per_device_train_batch_size": 1, | |
| "gradient_accumulation_steps": 7, | |
| "num_train_epochs": 1.0, | |
| "max_steps": null, | |
| "chunk_size": 4096, | |
| "max_seq_length": 4096, | |
| "template_mode": "dialogplus", | |
| "column_name": "text", | |
| "lora_r": 100, | |
| "lora_alpha": 125, | |
| "shuffle": true, | |
| "sort_by_length": true, | |
| "use_ngrams": false, | |
| "ngram_max": 12, | |
| "ngram_top_k": 1500, | |
| "ngram_min_chars": 16, | |
| "ngram_min_words": 2, | |
| "ngram_max_samples": 4000, | |
| "ngram_budgeted": true, | |
| "ngram_target_fit": 0.98, | |
| "ngram_eval_samples": 512, | |
| "ngram_add_batch": 64, | |
| "ngram_min_count": 2, | |
| "ngram_max_token_chars": 384, | |
| "ngram_max_tokens_per_text": 4096, | |
| "precision_mode": "bf16", | |
| "gradient_checkpointing": true, | |
| "save_dir": "./training_outputs", | |
| "merge_lora_on_save": true, | |
| "dataloader_num_workers": 0, | |
| "max_grad_norm": 1.0, | |
| "weight_decay": 0.01 | |
| } |