Upload adapter_config.json with huggingface_hub
Browse files- adapter_config.json +34 -0
adapter_config.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"base_model": "Qwen/Qwen2.5-7B-Instruct",
|
| 3 |
+
"lora_parameters": {
|
| 4 |
+
"rank": 16,
|
| 5 |
+
"scale": 2.0,
|
| 6 |
+
"dropout": 0.02
|
| 7 |
+
},
|
| 8 |
+
"num_layers": 28,
|
| 9 |
+
"fine_tune_type": "lora",
|
| 10 |
+
"training": {
|
| 11 |
+
"stage_a": {
|
| 12 |
+
"iters": 800,
|
| 13 |
+
"data": "554K multi-workflow policy-corrected"
|
| 14 |
+
},
|
| 15 |
+
"stage_b": {
|
| 16 |
+
"iters": 100,
|
| 17 |
+
"data": "20K contrastive alignment"
|
| 18 |
+
},
|
| 19 |
+
"learning_rate": 3e-05,
|
| 20 |
+
"batch_size": 4,
|
| 21 |
+
"max_seq_length": 512,
|
| 22 |
+
"mask_prompt": true,
|
| 23 |
+
"seed": 42
|
| 24 |
+
},
|
| 25 |
+
"performance": {
|
| 26 |
+
"total": "42/76 (55.3%)",
|
| 27 |
+
"next": "10/22 (45%)",
|
| 28 |
+
"retry": "7/12 (58%)",
|
| 29 |
+
"fork": "13/14 (93%)",
|
| 30 |
+
"join": "10/15 (67%)",
|
| 31 |
+
"meta": "2/13 (15%)",
|
| 32 |
+
"beats_gpt41": true
|
| 33 |
+
}
|
| 34 |
+
}
|