File size: 1,089 Bytes
1377f79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
{
  "model_dir": "./training_outputs/EvaGPT-German-0.7B_2026_7_2026-01-08_17-33-14",
  "csv_path": "./datasets/output-100-3k.csv",
  "device": "cuda",
  "train_mode": "lora",
  "learning_rate": 0.0002,
  "lr_schedule": "cosine",
  "per_device_train_batch_size": 1,
  "gradient_accumulation_steps": 7,
  "num_train_epochs": 1.0,
  "max_steps": null,
  "chunk_size": 4096,
  "max_seq_length": 4096,
  "template_mode": "dialogplus",
  "column_name": "text",
  "lora_r": 100,
  "lora_alpha": 125,
  "shuffle": true,
  "sort_by_length": true,
  "use_ngrams": false,
  "ngram_max": 12,
  "ngram_top_k": 1500,
  "ngram_min_chars": 16,
  "ngram_min_words": 2,
  "ngram_max_samples": 4000,
  "ngram_budgeted": true,
  "ngram_target_fit": 0.98,
  "ngram_eval_samples": 512,
  "ngram_add_batch": 64,
  "ngram_min_count": 2,
  "ngram_max_token_chars": 384,
  "ngram_max_tokens_per_text": 4096,
  "precision_mode": "bf16",
  "gradient_checkpointing": true,
  "save_dir": "./training_outputs",
  "merge_lora_on_save": true,
  "dataloader_num_workers": 0,
  "max_grad_norm": 1.0,
  "weight_decay": 0.01
}