add command-r 36pct train config

Files changed (1) hide show

command_r_r32_36pct_ood_repair_topk165151_20260609/config.json ADDED Viewed

+{
+  "args": {
+    "model": "/root/models/command-r7b-12-2024",
+    "train_jsonl": "data/command_r_ood_repair_mix_10k/train.jsonl",
+    "attribution": "runs/non_qwen_command_r7b_r32_q1_collimation_20260608_094142/command_r_relp_full.npz",
+    "topk": 165151,
+    "out_dir": "runs/non_qwen_command_r7b_r32_36pct_ood_repair_20260609/r32_topk165151_lora_len2048_coherefmt_oodrepair",
+    "device": "cuda",
+    "device_map": null,
+    "max_memory": null,
+    "dtype": "bfloat16",
+    "seed": 42,
+    "max_rows": 9277,
+    "max_seq_length": 2048,
+    "target_format": "cohere_action",
+    "n_calib": 128,
+    "epochs": 1.0,
+    "max_steps": null,
+    "batch_size": 1,
+    "grad_accum": 8,
+    "lr": 0.0002,
+    "weight_decay": 0.0,
+    "warmup_ratio": 0.05,
+    "max_grad_norm": 1.0,
+    "lora_r": 32,
+    "lora_alpha": 64,
+    "lora_dropout": 0.05,
+    "target_modules": "all-linear",
+    "use_rslora": true,
+    "masked_kl_beta": 1.0,
+    "ce_beta": 0.2,
+    "unmasked_kl_beta": 0.0,
+    "kl_temperature": 1.0,
+    "eval_every": 100,
+    "save_every": 100,
+    "num_workers": 0,
+    "save_merged": false
+  },
+  "n_rows": 9277,
+  "n_layers": 32,
+  "d_ffn": 14336,
+  "mask_kept": 165151,
+  "total_steps": 1160,
+  "warmup_steps": 58,
+  "logs": [],
+  "checkpoints": []
+}