Alikestocode commited on
Commit
5bf2e9f
·
1 Parent(s): 5bceece

Fix processor error: pass tokenizer explicitly for text-only models

Browse files

- Load tokenizer using AutoTokenizer.from_pretrained()
- Pass tokenizer parameter to oneshot() to avoid processor initialization errors
- For text-only LLMs, tokenizer serves as the processor
- Fixes RuntimeError about processor initialization for text models

Files changed (1) hide show
  1. quantize_to_awq_colab.ipynb +13 -0
quantize_to_awq_colab.ipynb CHANGED
@@ -367,6 +367,17 @@
367
  " recipe = Recipe.from_modifiers(modifiers)\n",
368
  " print(f\" ✅ Recipe created from modifiers\")\n",
369
  " \n",
 
 
 
 
 
 
 
 
 
 
 
370
  " # oneshot() API - all kwargs must map to ModelArguments, DatasetArguments, or RecipeArguments\n",
371
  " # - model: ModelArguments.model\n",
372
  " # - output_dir: ModelArguments.output_dir\n",
@@ -376,6 +387,7 @@
376
  " # - use_auth_token: ModelArguments.use_auth_token (reads from HF_TOKEN env var)\n",
377
  " # - trust_remote_code_model: ModelArguments.trust_remote_code_model\n",
378
  " # - stage: RecipeArguments.stage (default: \"default\")\n",
 
379
  " print(f\" → Calling oneshot() with proper argument structure...\")\n",
380
  " oneshot(\n",
381
  " model=repo_id,\n",
@@ -384,6 +396,7 @@
384
  " stage=\"default\", # Recipe stage\n",
385
  " dataset=calibration_dataset,\n",
386
  " num_calibration_samples=min(calibration_dataset_size, len(calibration_dataset)),\n",
 
387
  " use_auth_token=True, # Reads from os.environ[\"HF_TOKEN\"]\n",
388
  " trust_remote_code_model=True\n",
389
  " )\n",
 
367
  " recipe = Recipe.from_modifiers(modifiers)\n",
368
  " print(f\" ✅ Recipe created from modifiers\")\n",
369
  " \n",
370
+ " # Load tokenizer for text-only models (required as processor)\n",
371
+ " # For text-only LLMs, we need to pass tokenizer explicitly to avoid processor initialization errors\n",
372
+ " print(f\" → Loading tokenizer for text-only model...\")\n",
373
+ " tokenizer = AutoTokenizer.from_pretrained(\n",
374
+ " repo_id,\n",
375
+ " use_fast=True,\n",
376
+ " trust_remote_code=True,\n",
377
+ " token=os.environ.get(\"HF_TOKEN\")\n",
378
+ " )\n",
379
+ " print(f\" ✅ Tokenizer loaded\")\n",
380
+ " \n",
381
  " # oneshot() API - all kwargs must map to ModelArguments, DatasetArguments, or RecipeArguments\n",
382
  " # - model: ModelArguments.model\n",
383
  " # - output_dir: ModelArguments.output_dir\n",
 
387
  " # - use_auth_token: ModelArguments.use_auth_token (reads from HF_TOKEN env var)\n",
388
  " # - trust_remote_code_model: ModelArguments.trust_remote_code_model\n",
389
  " # - stage: RecipeArguments.stage (default: \"default\")\n",
390
+ " # - tokenizer: ModelArguments.tokenizer (required for text-only models to avoid processor errors)\n",
391
  " print(f\" → Calling oneshot() with proper argument structure...\")\n",
392
  " oneshot(\n",
393
  " model=repo_id,\n",
 
396
  " stage=\"default\", # Recipe stage\n",
397
  " dataset=calibration_dataset,\n",
398
  " num_calibration_samples=min(calibration_dataset_size, len(calibration_dataset)),\n",
399
+ " tokenizer=tokenizer, # Pass tokenizer explicitly for text-only models\n",
400
  " use_auth_token=True, # Reads from os.environ[\"HF_TOKEN\"]\n",
401
  " trust_remote_code_model=True\n",
402
  " )\n",