Occupying-Mars commited on
Commit
806544d
·
verified ·
1 Parent(s): 386eff8

add command-r 36pct train config

Browse files
command_r_r32_36pct_ood_repair_topk165151_20260609/config.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "args": {
3
+ "model": "/root/models/command-r7b-12-2024",
4
+ "train_jsonl": "data/command_r_ood_repair_mix_10k/train.jsonl",
5
+ "attribution": "runs/non_qwen_command_r7b_r32_q1_collimation_20260608_094142/command_r_relp_full.npz",
6
+ "topk": 165151,
7
+ "out_dir": "runs/non_qwen_command_r7b_r32_36pct_ood_repair_20260609/r32_topk165151_lora_len2048_coherefmt_oodrepair",
8
+ "device": "cuda",
9
+ "device_map": null,
10
+ "max_memory": null,
11
+ "dtype": "bfloat16",
12
+ "seed": 42,
13
+ "max_rows": 9277,
14
+ "max_seq_length": 2048,
15
+ "target_format": "cohere_action",
16
+ "n_calib": 128,
17
+ "epochs": 1.0,
18
+ "max_steps": null,
19
+ "batch_size": 1,
20
+ "grad_accum": 8,
21
+ "lr": 0.0002,
22
+ "weight_decay": 0.0,
23
+ "warmup_ratio": 0.05,
24
+ "max_grad_norm": 1.0,
25
+ "lora_r": 32,
26
+ "lora_alpha": 64,
27
+ "lora_dropout": 0.05,
28
+ "target_modules": "all-linear",
29
+ "use_rslora": true,
30
+ "masked_kl_beta": 1.0,
31
+ "ce_beta": 0.2,
32
+ "unmasked_kl_beta": 0.0,
33
+ "kl_temperature": 1.0,
34
+ "eval_every": 100,
35
+ "save_every": 100,
36
+ "num_workers": 0,
37
+ "save_merged": false
38
+ },
39
+ "n_rows": 9277,
40
+ "n_layers": 32,
41
+ "d_ffn": 14336,
42
+ "mask_kept": 165151,
43
+ "total_steps": 1160,
44
+ "warmup_steps": 58,
45
+ "logs": [],
46
+ "checkpoints": []
47
+ }