Spaces:

Alovestocode
/

ZeroGPU-LLM-Inference

Sleeping

Alikestocode commited on Nov 10, 2025

Commit

671d7f9

1 Parent(s): 35d8225

Fix oneshot() API: use Recipe and Dataset objects

- Import Recipe from llmcompressor.recipe and Dataset from datasets
- Convert modifiers list to Recipe object using Recipe.from_modifiers()
- Convert calibration_texts to Hugging Face Dataset using Dataset.from_dict()
- Use use_auth_token=True instead of token parameter
- All kwargs now map to ModelArguments, DatasetArguments, RecipeArguments
- Fixes HfArgumentParser 'Some keys are not used' error

Files changed (1) hide show

quantize_to_awq_colab.ipynb +31 -14

quantize_to_awq_colab.ipynb CHANGED Viewed

@@ -189,6 +189,8 @@
     "\n",
     "from transformers import AutoTokenizer\n",
     "from huggingface_hub import HfApi, scan_cache_dir, upload_folder\n",
     "import torch\n",
     "import shutil\n",
     "import gc\n",
@@ -346,27 +348,42 @@
     "        print(f\"  ✅ AWQModifier created successfully\")\n",
     "        \n",
     "        # Call oneshot with the modifier\n",
-    "        # Correct API based on llm-compressor documentation:\n",
-    "        # oneshot(model, dataset, recipe, output_dir, max_seq_length, num_calibration_samples)\n",
     "        print(f\"  → Starting quantization process...\")\n",
     "        \n",
     "        # Prepare calibration dataset (limit to reasonable size)\n",
-    "        calibration_dataset = calibration_texts[:min(calibration_dataset_size, 128)]\n",
     "        \n",
-    "        # oneshot() API parameters:\n",
-    "        # - model: model ID or path\n",
-    "        # - dataset: dataset name (string) or list of calibration strings\n",
-    "        # - recipe: list of modifiers\n",
-    "        # - output_dir: output directory\n",
-    "        # - max_seq_length: optional, max sequence length\n",
-    "        # - num_calibration_samples: optional, number of calibration samples\n",
     "        oneshot(\n",
     "            model=repo_id,\n",
-    "            dataset=calibration_dataset,  # List of calibration strings\n",
-    "            recipe=modifiers,  # List of modifiers (e.g., [AWQModifier(...)])\n",
     "            output_dir=temp_output_dir,\n",
-    "            max_seq_length=2048,  # Optional: max sequence length\n",
-    "            num_calibration_samples=len(calibration_dataset)  # Optional: number of samples\n",
     "        )\n",
     "        \n",
     "        print(f\"✅ Model quantized to AWQ successfully\")\n",

     "\n",
     "from transformers import AutoTokenizer\n",
     "from huggingface_hub import HfApi, scan_cache_dir, upload_folder\n",
+    "from datasets import Dataset\n",
+    "from llmcompressor.recipe import Recipe\n",
     "import torch\n",
     "import shutil\n",
     "import gc\n",
     "        print(f\"  ✅ AWQModifier created successfully\")\n",
     "        \n",
     "        # Call oneshot with the modifier\n",
+    "        # oneshot() uses HfArgumentParser which only understands ModelArguments, DatasetArguments, RecipeArguments\n",
+    "        # We need to convert modifiers to Recipe and calibration_texts to Dataset\n",
     "        print(f\"  → Starting quantization process...\")\n",
     "        \n",
     "        # Prepare calibration dataset (limit to reasonable size)\n",
+    "        calibration_texts_limited = calibration_texts[:min(calibration_dataset_size, 128)]\n",
     "        \n",
+    "        # Convert calibration texts to Hugging Face Dataset\n",
+    "        # DatasetArguments expects a Dataset object, not a list\n",
+    "        print(f\"  → Creating Hugging Face Dataset from calibration texts...\")\n",
+    "        calibration_dataset = Dataset.from_dict({\"text\": calibration_texts_limited})\n",
+    "        print(f\"  ✅ Created dataset with {len(calibration_dataset)} samples\")\n",
+    "        \n",
+    "        # Convert modifiers list to Recipe object\n",
+    "        # RecipeArguments expects a Recipe object, not a list of modifiers\n",
+    "        print(f\"  → Converting modifiers to Recipe object...\")\n",
+    "        recipe = Recipe.from_modifiers(modifiers)\n",
+    "        print(f\"  ✅ Recipe created from modifiers\")\n",
+    "        \n",
+    "        # oneshot() API - all kwargs must map to ModelArguments, DatasetArguments, or RecipeArguments\n",
+    "        # - model: ModelArguments.model\n",
+    "        # - output_dir: ModelArguments.output_dir\n",
+    "        # - recipe: RecipeArguments.recipe (Recipe object)\n",
+    "        # - dataset: DatasetArguments.dataset (Dataset object)\n",
+    "        # - num_calibration_samples: DatasetArguments.num_calibration_samples\n",
+    "        # - use_auth_token: ModelArguments.use_auth_token (reads from HF_TOKEN env var)\n",
+    "        # - trust_remote_code_model: ModelArguments.trust_remote_code_model\n",
+    "        print(f\"  → Calling oneshot() with proper argument structure...\")\n",
     "        oneshot(\n",
     "            model=repo_id,\n",
     "            output_dir=temp_output_dir,\n",
+    "            recipe=recipe,\n",
+    "            dataset=calibration_dataset,\n",
+    "            num_calibration_samples=min(calibration_dataset_size, len(calibration_dataset)),\n",
+    "            use_auth_token=True,  # Reads from os.environ[\"HF_TOKEN\"]\n",
+    "            trust_remote_code_model=True\n",
     "        )\n",
     "        \n",
     "        print(f\"✅ Model quantized to AWQ successfully\")\n",