File size: 1,551 Bytes
a61dd32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
{
  "target_model_name": "meta-llama/Llama-3.1-8B-Instruct",
  "load_model_checkpoint": "",
  "train_system": "",
  "train_stimulus_completion": "",
  "train_stimulus": "data/martian_data/pipeline_output/training_files/train_stimulus.json",
  "train_control": "",
  "train_qa": "data/martian_data/pipeline_output/training_files/train_qa.json",
  "train_with_verb_mask": "user",
  "add_thought_tokens": false,
  "nudge_persona": false,
  "modify_chat_template": true,
  "filter": "",
  "train_percent": 1.0,
  "eval_ppl": true,
  "eval_system": "",
  "eval_stimulus_completion": "",
  "eval_stimulus": "data/martian_data/pipeline_output/training_files/eval_stimulus.json",
  "eval_control": "",
  "eval_qa": "data/martian_data/pipeline_output/training_files/eval_qa.json",
  "eval_every_n_steps": 200,
  "output_dir": "out/codereview",
  "save_model": true,
  "save_every_n_steps": 200,
  "use_wandb": true,
  "run_name": "",
  "shift_position_ids": true,
  "min_layer_to_read": 15,
  "max_layer_to_read": 16,
  "layer_to_write": 0,
  "module_setup": "read-vary_write-fixed_n-fixed",
  "num_layers_to_read": 1,
  "num_layers_to_sample": 1,
  "batch_size_training": 1,
  "gradient_accumulation_steps": 8,
  "gradient_clipping": false,
  "gradient_clipping_threshold": 1.0,
  "num_epochs": 5,
  "num_workers_dataloader": 1,
  "lr": 3e-05,
  "ema_decay": 1,
  "warmup_steps": 50,
  "weight_decay": 0.01,
  "gamma": 0.85,
  "seed": 42,
  "peft_method": "lora",
  "use_peft": true,
  "use_fsdp": false,
  "checkpoint_dir": "out/codereview/006/checkpoints"
}