Spaces:
Sleeping
Sleeping
Commit
·
5bf2e9f
1
Parent(s):
5bceece
Fix processor error: pass tokenizer explicitly for text-only models
Browse files- Load tokenizer using AutoTokenizer.from_pretrained()
- Pass tokenizer parameter to oneshot() to avoid processor initialization errors
- For text-only LLMs, tokenizer serves as the processor
- Fixes RuntimeError about processor initialization for text models
- quantize_to_awq_colab.ipynb +13 -0
quantize_to_awq_colab.ipynb
CHANGED
|
@@ -367,6 +367,17 @@
|
|
| 367 |
" recipe = Recipe.from_modifiers(modifiers)\n",
|
| 368 |
" print(f\" ✅ Recipe created from modifiers\")\n",
|
| 369 |
" \n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
" # oneshot() API - all kwargs must map to ModelArguments, DatasetArguments, or RecipeArguments\n",
|
| 371 |
" # - model: ModelArguments.model\n",
|
| 372 |
" # - output_dir: ModelArguments.output_dir\n",
|
|
@@ -376,6 +387,7 @@
|
|
| 376 |
" # - use_auth_token: ModelArguments.use_auth_token (reads from HF_TOKEN env var)\n",
|
| 377 |
" # - trust_remote_code_model: ModelArguments.trust_remote_code_model\n",
|
| 378 |
" # - stage: RecipeArguments.stage (default: \"default\")\n",
|
|
|
|
| 379 |
" print(f\" → Calling oneshot() with proper argument structure...\")\n",
|
| 380 |
" oneshot(\n",
|
| 381 |
" model=repo_id,\n",
|
|
@@ -384,6 +396,7 @@
|
|
| 384 |
" stage=\"default\", # Recipe stage\n",
|
| 385 |
" dataset=calibration_dataset,\n",
|
| 386 |
" num_calibration_samples=min(calibration_dataset_size, len(calibration_dataset)),\n",
|
|
|
|
| 387 |
" use_auth_token=True, # Reads from os.environ[\"HF_TOKEN\"]\n",
|
| 388 |
" trust_remote_code_model=True\n",
|
| 389 |
" )\n",
|
|
|
|
| 367 |
" recipe = Recipe.from_modifiers(modifiers)\n",
|
| 368 |
" print(f\" ✅ Recipe created from modifiers\")\n",
|
| 369 |
" \n",
|
| 370 |
+
" # Load tokenizer for text-only models (required as processor)\n",
|
| 371 |
+
" # For text-only LLMs, we need to pass tokenizer explicitly to avoid processor initialization errors\n",
|
| 372 |
+
" print(f\" → Loading tokenizer for text-only model...\")\n",
|
| 373 |
+
" tokenizer = AutoTokenizer.from_pretrained(\n",
|
| 374 |
+
" repo_id,\n",
|
| 375 |
+
" use_fast=True,\n",
|
| 376 |
+
" trust_remote_code=True,\n",
|
| 377 |
+
" token=os.environ.get(\"HF_TOKEN\")\n",
|
| 378 |
+
" )\n",
|
| 379 |
+
" print(f\" ✅ Tokenizer loaded\")\n",
|
| 380 |
+
" \n",
|
| 381 |
" # oneshot() API - all kwargs must map to ModelArguments, DatasetArguments, or RecipeArguments\n",
|
| 382 |
" # - model: ModelArguments.model\n",
|
| 383 |
" # - output_dir: ModelArguments.output_dir\n",
|
|
|
|
| 387 |
" # - use_auth_token: ModelArguments.use_auth_token (reads from HF_TOKEN env var)\n",
|
| 388 |
" # - trust_remote_code_model: ModelArguments.trust_remote_code_model\n",
|
| 389 |
" # - stage: RecipeArguments.stage (default: \"default\")\n",
|
| 390 |
+
" # - tokenizer: ModelArguments.tokenizer (required for text-only models to avoid processor errors)\n",
|
| 391 |
" print(f\" → Calling oneshot() with proper argument structure...\")\n",
|
| 392 |
" oneshot(\n",
|
| 393 |
" model=repo_id,\n",
|
|
|
|
| 396 |
" stage=\"default\", # Recipe stage\n",
|
| 397 |
" dataset=calibration_dataset,\n",
|
| 398 |
" num_calibration_samples=min(calibration_dataset_size, len(calibration_dataset)),\n",
|
| 399 |
+
" tokenizer=tokenizer, # Pass tokenizer explicitly for text-only models\n",
|
| 400 |
" use_auth_token=True, # Reads from os.environ[\"HF_TOKEN\"]\n",
|
| 401 |
" trust_remote_code_model=True\n",
|
| 402 |
" )\n",
|