EvaGPT-German-0.7B / train_config.json
MTSmash's picture
Upload 9 files
1377f79 verified
{
"model_dir": "./training_outputs/EvaGPT-German-0.7B_2026_7_2026-01-08_17-33-14",
"csv_path": "./datasets/output-100-3k.csv",
"device": "cuda",
"train_mode": "lora",
"learning_rate": 0.0002,
"lr_schedule": "cosine",
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 7,
"num_train_epochs": 1.0,
"max_steps": null,
"chunk_size": 4096,
"max_seq_length": 4096,
"template_mode": "dialogplus",
"column_name": "text",
"lora_r": 100,
"lora_alpha": 125,
"shuffle": true,
"sort_by_length": true,
"use_ngrams": false,
"ngram_max": 12,
"ngram_top_k": 1500,
"ngram_min_chars": 16,
"ngram_min_words": 2,
"ngram_max_samples": 4000,
"ngram_budgeted": true,
"ngram_target_fit": 0.98,
"ngram_eval_samples": 512,
"ngram_add_batch": 64,
"ngram_min_count": 2,
"ngram_max_token_chars": 384,
"ngram_max_tokens_per_text": 4096,
"precision_mode": "bf16",
"gradient_checkpointing": true,
"save_dir": "./training_outputs",
"merge_lora_on_save": true,
"dataloader_num_workers": 0,
"max_grad_norm": 1.0,
"weight_decay": 0.01
}