Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on Nov 10, 2025

Commit

35d8225

1 Parent(s): e9f4b24

Fix oneshot() API: use correct parameter names from documentation

- Change 'model_id' to 'model'
- Change 'modifiers' to 'recipe' (list of modifiers)
- Change 'calibration_data' to 'dataset' (list of strings)
- Remove 'token' parameter (not needed)
- Add optional 'max_seq_length' and 'num_calibration_samples'
- Based on official llm-compressor documentation

Files changed (1) hide show

quantize_to_awq_colab.ipynb +17 -25

quantize_to_awq_colab.ipynb CHANGED Viewed

@@ -346,36 +346,28 @@
     "        print(f\"  ✅ AWQModifier created successfully\")\n",
     "        \n",
     "        # Call oneshot with the modifier\n",
-    "        # Note: oneshot() uses HfArgumentParser - check actual API for correct parameter names\n",
     "        print(f\"  → Starting quantization process...\")\n",
     "        \n",
     "        # Prepare calibration dataset (limit to reasonable size)\n",
     "        calibration_dataset = calibration_texts[:min(calibration_dataset_size, 128)]\n",
     "        \n",
-    "        # Try different parameter combinations based on oneshot() API\n",
-    "        # The error suggests 'calibration_data', 'modifiers', 'token' aren't recognized\n",
-    "        # Let's try the most common parameter names\n",
-    "        try:\n",
-    "            # Attempt 1: Try with model_id, modifiers, dataset, token\n",
-    "            oneshot(\n",
-    "                model_id=repo_id,\n",
-    "                output_dir=temp_output_dir,\n",
-    "                modifiers=modifiers,\n",
-    "                dataset=calibration_dataset,\n",
-    "                token=os.environ.get(\"HF_TOKEN\")\n",
-    "            )\n",
-    "        except ValueError as e:\n",
-    "            if \"not used by the HfArgumentParser\" in str(e):\n",
-    "                # Attempt 2: Try with just model_id and output_dir, pass rest via kwargs\n",
-    "                print(f\"  ⚠️ Parameter names incorrect, trying alternative API...\")\n",
-    "                # Check if oneshot accepts **kwargs or needs different structure\n",
-    "                # For now, try minimal parameters\n",
-    "                oneshot(\n",
-    "                    model_id=repo_id,\n",
-    "                    output_dir=temp_output_dir\n",
-    "                )\n",
-    "            else:\n",
-    "                raise\n",
     "        \n",
     "        print(f\"✅ Model quantized to AWQ successfully\")\n",
     "    except Exception as e:\n",

     "        print(f\"  ✅ AWQModifier created successfully\")\n",
     "        \n",
     "        # Call oneshot with the modifier\n",
+    "        # Correct API based on llm-compressor documentation:\n",
+    "        # oneshot(model, dataset, recipe, output_dir, max_seq_length, num_calibration_samples)\n",
     "        print(f\"  → Starting quantization process...\")\n",
     "        \n",
     "        # Prepare calibration dataset (limit to reasonable size)\n",
     "        calibration_dataset = calibration_texts[:min(calibration_dataset_size, 128)]\n",
     "        \n",
+    "        # oneshot() API parameters:\n",
+    "        # - model: model ID or path\n",
+    "        # - dataset: dataset name (string) or list of calibration strings\n",
+    "        # - recipe: list of modifiers\n",
+    "        # - output_dir: output directory\n",
+    "        # - max_seq_length: optional, max sequence length\n",
+    "        # - num_calibration_samples: optional, number of calibration samples\n",
+    "        oneshot(\n",
+    "            model=repo_id,\n",
+    "            dataset=calibration_dataset,  # List of calibration strings\n",
+    "            recipe=modifiers,  # List of modifiers (e.g., [AWQModifier(...)])\n",
+    "            output_dir=temp_output_dir,\n",
+    "            max_seq_length=2048,  # Optional: max sequence length\n",
+    "            num_calibration_samples=len(calibration_dataset)  # Optional: number of samples\n",
+    "        )\n",
     "        \n",
     "        print(f\"✅ Model quantized to AWQ successfully\")\n",
     "    except Exception as e:\n",