Spaces:
Sleeping
Sleeping
Commit
·
5bf02e9
1
Parent(s):
cf9ed91
Remove duplicate build_awq_modifier_config - keep existing correct version
Browse files- Remove duplicate helper function from cell 7
- Keep existing build_awq_modifier_config in cell 9 that uses QuantizationScheme objects
- Existing function correctly returns config_groups and ignore separately
- Quantization function already uses it correctly with AWQModifier
- quantize_to_awq_colab.ipynb +1 -86
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -147,92 +147,7 @@
|
|
| 147 |
"source": [
|
| 148 |
"## 3. Helper Function: Build AWQ Modifier Config\n",
|
| 149 |
"\n",
|
| 150 |
-
"
|
| 151 |
-
" \"\"\"Build proper AWQ quantization config using QuantizationScheme objects.\n",
|
| 152 |
-
" \n",
|
| 153 |
-
" This helper function creates the correct structure that AWQModifier expects,\n",
|
| 154 |
-
" using QuantizationScheme/QuantizationArgs objects instead of plain dicts.\n",
|
| 155 |
-
" \n",
|
| 156 |
-
" Args:\n",
|
| 157 |
-
" num_bits: Number of bits for quantization (default: 4)\n",
|
| 158 |
-
" group_size: Group size for quantization (default: 128)\n",
|
| 159 |
-
" zero_point: Whether to use zero-point quantization (default: True)\n",
|
| 160 |
-
" \n",
|
| 161 |
-
" Returns:\n",
|
| 162 |
-
" quantization_config dict with proper QuantizationScheme structure\n",
|
| 163 |
-
" \"\"\"\n",
|
| 164 |
-
" try:\n",
|
| 165 |
-
" # Try to import QuantizationScheme and related classes\n",
|
| 166 |
-
" from compressed_tensors.quantization import (\n",
|
| 167 |
-
" QuantizationConfig,\n",
|
| 168 |
-
" QuantizationScheme,\n",
|
| 169 |
-
" QuantizationArgs\n",
|
| 170 |
-
" )\n",
|
| 171 |
-
" \n",
|
| 172 |
-
" # Create QuantizationArgs for weights\n",
|
| 173 |
-
" weights_args = QuantizationArgs(\n",
|
| 174 |
-
" num_bits=num_bits,\n",
|
| 175 |
-
" group_size=group_size,\n",
|
| 176 |
-
" zero_point=zero_point,\n",
|
| 177 |
-
" symmetric=False,\n",
|
| 178 |
-
" strategy=\"group\",\n",
|
| 179 |
-
" observer=\"minmax\",\n",
|
| 180 |
-
" type=\"int\",\n",
|
| 181 |
-
" dynamic=False\n",
|
| 182 |
-
" )\n",
|
| 183 |
-
" \n",
|
| 184 |
-
" # Create QuantizationScheme with targets and weights\n",
|
| 185 |
-
" scheme = QuantizationScheme(\n",
|
| 186 |
-
" targets=[\"Linear\"], # Target Linear layers\n",
|
| 187 |
-
" weights=weights_args,\n",
|
| 188 |
-
" input_activations=None,\n",
|
| 189 |
-
" output_activations=None\n",
|
| 190 |
-
" )\n",
|
| 191 |
-
" \n",
|
| 192 |
-
" # Create QuantizationConfig with config_groups\n",
|
| 193 |
-
" quant_config = QuantizationConfig(\n",
|
| 194 |
-
" config_groups={\"group_0\": scheme},\n",
|
| 195 |
-
" ignore=[\"lm_head\"],\n",
|
| 196 |
-
" quant_method=\"compressed-tensors\",\n",
|
| 197 |
-
" quantization_status=\"compressed\",\n",
|
| 198 |
-
" format=\"pack-quantized\"\n",
|
| 199 |
-
" )\n",
|
| 200 |
-
" \n",
|
| 201 |
-
" print(f\"✅ Built AWQ config using QuantizationScheme objects\")\n",
|
| 202 |
-
" return quant_config\n",
|
| 203 |
-
" \n",
|
| 204 |
-
" except ImportError as e:\n",
|
| 205 |
-
" # Fallback: If QuantizationScheme not available, try dict-based approach\n",
|
| 206 |
-
" print(f\"⚠️ QuantizationScheme not available: {e}\")\n",
|
| 207 |
-
" print(f\" → Falling back to dict-based config...\")\n",
|
| 208 |
-
" \n",
|
| 209 |
-
" # Return dict structure (may still work with some versions)\n",
|
| 210 |
-
" return {\n",
|
| 211 |
-
" \"config_groups\": {\n",
|
| 212 |
-
" \"group_0\": {\n",
|
| 213 |
-
" \"targets\": [\"Linear\"],\n",
|
| 214 |
-
" \"weights\": {\n",
|
| 215 |
-
" \"num_bits\": num_bits,\n",
|
| 216 |
-
" \"group_size\": group_size,\n",
|
| 217 |
-
" \"zero_point\": zero_point,\n",
|
| 218 |
-
" \"symmetric\": False,\n",
|
| 219 |
-
" \"strategy\": \"group\",\n",
|
| 220 |
-
" \"observer\": \"minmax\",\n",
|
| 221 |
-
" \"type\": \"int\",\n",
|
| 222 |
-
" \"dynamic\": False\n",
|
| 223 |
-
" },\n",
|
| 224 |
-
" \"input_activations\": None,\n",
|
| 225 |
-
" \"output_activations\": None\n",
|
| 226 |
-
" }\n",
|
| 227 |
-
" },\n",
|
| 228 |
-
" \"ignore\": [\"lm_head\"],\n",
|
| 229 |
-
" \"quant_method\": \"compressed-tensors\",\n",
|
| 230 |
-
" \"quantization_status\": \"compressed\",\n",
|
| 231 |
-
" \"format\": \"pack-quantized\"\n",
|
| 232 |
-
" }\n",
|
| 233 |
-
" except Exception as e:\n",
|
| 234 |
-
" print(f\"❌ Failed to build AWQ config: {e}\")\n",
|
| 235 |
-
" raise\n",
|
| 236 |
"\n"
|
| 237 |
]
|
| 238 |
},
|
|
|
|
| 147 |
"source": [
|
| 148 |
"## 3. Helper Function: Build AWQ Modifier Config\n",
|
| 149 |
"\n",
|
| 150 |
+
"**Note:** The `build_awq_modifier_config` helper function is defined in the next cell (Cell 9) along with the imports. It properly constructs `QuantizationScheme` and `QuantizationArgs` objects as required by `AWQModifier`.\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
"\n"
|
| 152 |
]
|
| 153 |
},
|