File size: 1,551 Bytes
a61dd32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 | {
"target_model_name": "meta-llama/Llama-3.1-8B-Instruct",
"load_model_checkpoint": "",
"train_system": "",
"train_stimulus_completion": "",
"train_stimulus": "data/martian_data/pipeline_output/training_files/train_stimulus.json",
"train_control": "",
"train_qa": "data/martian_data/pipeline_output/training_files/train_qa.json",
"train_with_verb_mask": "user",
"add_thought_tokens": false,
"nudge_persona": false,
"modify_chat_template": true,
"filter": "",
"train_percent": 1.0,
"eval_ppl": true,
"eval_system": "",
"eval_stimulus_completion": "",
"eval_stimulus": "data/martian_data/pipeline_output/training_files/eval_stimulus.json",
"eval_control": "",
"eval_qa": "data/martian_data/pipeline_output/training_files/eval_qa.json",
"eval_every_n_steps": 200,
"output_dir": "out/codereview",
"save_model": true,
"save_every_n_steps": 200,
"use_wandb": true,
"run_name": "",
"shift_position_ids": true,
"min_layer_to_read": 15,
"max_layer_to_read": 16,
"layer_to_write": 0,
"module_setup": "read-vary_write-fixed_n-fixed",
"num_layers_to_read": 1,
"num_layers_to_sample": 1,
"batch_size_training": 1,
"gradient_accumulation_steps": 8,
"gradient_clipping": false,
"gradient_clipping_threshold": 1.0,
"num_epochs": 5,
"num_workers_dataloader": 1,
"lr": 3e-05,
"ema_decay": 1,
"warmup_steps": 50,
"weight_decay": 0.01,
"gamma": 0.85,
"seed": 42,
"peft_method": "lora",
"use_peft": true,
"use_fsdp": false,
"checkpoint_dir": "out/codereview/006/checkpoints"
} |