Fix/Check RT-Qwen3-30B-AWQ/recipe.yaml
Browse files- RT-Qwen3-30B-AWQ/recipe.yaml +17 -0
RT-Qwen3-30B-AWQ/recipe.yaml
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
default_stage:
|
| 2 |
+
default_modifiers:
|
| 3 |
+
AWQModifier:
|
| 4 |
+
targets: [Linear]
|
| 5 |
+
ignore: [lm_head]
|
| 6 |
+
scheme: W4A16_ASYM
|
| 7 |
+
mappings:
|
| 8 |
+
- smooth_layer: re:.*input_layernorm
|
| 9 |
+
balance_layers: ['re:.*q_proj', 're:.*k_proj', 're:.*v_proj']
|
| 10 |
+
- smooth_layer: re:.*v_proj
|
| 11 |
+
balance_layers: ['re:.*o_proj']
|
| 12 |
+
- smooth_layer: re:.*post_attention_layernorm
|
| 13 |
+
balance_layers: ['re:.*gate_proj', 're:.*up_proj']
|
| 14 |
+
- smooth_layer: re:.*up_proj
|
| 15 |
+
balance_layers: ['re:.*down_proj']
|
| 16 |
+
duo_scaling: true
|
| 17 |
+
n_grid: 20
|