Upload train_args.json
Browse files
storage/sigma_lora_out/train_args.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"torch_dtype": "bf16",
|
| 7 |
"dataset_id": null,
|
| 8 |
"split": "train",
|
| 9 |
-
"data_dir": "/workspace
|
| 10 |
"hf_data_repo": "Veltraxor/Sigma",
|
| 11 |
"hf_data_subdir": "storage/sigma_pickplace",
|
| 12 |
"prefer_hf_shards": false,
|
|
@@ -26,16 +26,16 @@
|
|
| 26 |
"alpha_a": 1.0,
|
| 27 |
"alpha_b": 1.0,
|
| 28 |
"alpha_c": 1.0,
|
| 29 |
-
"lambda_sem": 1.
|
| 30 |
"lambda_intent": 0.8,
|
| 31 |
-
"lambda_tau": 0.
|
| 32 |
"beta_mi": 0.1,
|
| 33 |
-
"eta_var": 0.
|
| 34 |
"hard_mining_ratio": 0.3,
|
| 35 |
"hard_mining_lambda": 1.0,
|
| 36 |
-
"loss_warmup_ratio": 0.
|
| 37 |
-
"lambda_sem_start": 0.
|
| 38 |
-
"lambda_intent_start": 0.
|
| 39 |
"max_grad_norm": 1.0,
|
| 40 |
"lora_r": 16,
|
| 41 |
"lora_alpha": 32,
|
|
|
|
| 6 |
"torch_dtype": "bf16",
|
| 7 |
"dataset_id": null,
|
| 8 |
"split": "train",
|
| 9 |
+
"data_dir": "/workspace/storage/sigma_pickplace",
|
| 10 |
"hf_data_repo": "Veltraxor/Sigma",
|
| 11 |
"hf_data_subdir": "storage/sigma_pickplace",
|
| 12 |
"prefer_hf_shards": false,
|
|
|
|
| 26 |
"alpha_a": 1.0,
|
| 27 |
"alpha_b": 1.0,
|
| 28 |
"alpha_c": 1.0,
|
| 29 |
+
"lambda_sem": 1.0,
|
| 30 |
"lambda_intent": 0.8,
|
| 31 |
+
"lambda_tau": 0.03,
|
| 32 |
"beta_mi": 0.1,
|
| 33 |
+
"eta_var": 0.2,
|
| 34 |
"hard_mining_ratio": 0.3,
|
| 35 |
"hard_mining_lambda": 1.0,
|
| 36 |
+
"loss_warmup_ratio": 0.6,
|
| 37 |
+
"lambda_sem_start": 0.1,
|
| 38 |
+
"lambda_intent_start": 0.1,
|
| 39 |
"max_grad_norm": 1.0,
|
| 40 |
"lora_r": 16,
|
| 41 |
"lora_alpha": 32,
|