Sambhavnoobcoder commited on
Commit
fa78a7f
·
1 Parent(s): 4a05067

Deploy Auto-Quantization MVP

Browse files
Files changed (1) hide show
  1. quantizer.py +3 -2
quantizer.py CHANGED
@@ -74,7 +74,7 @@ async def quantize_model(job: Dict) -> Dict:
74
  # Step 2: Load tokenizer
75
  print(f"\n📋 Step 2/5: Loading tokenizer...")
76
  try:
77
- tokenizer = AutoTokenizer.from_pretrained(model_id)
78
  print(f"✓ Tokenizer loaded")
79
  except Exception as e:
80
  raise Exception(f"Failed to load tokenizer: {str(e)}")
@@ -98,7 +98,8 @@ async def quantize_model(job: Dict) -> Dict:
98
  quantization_config=quant_config,
99
  torch_dtype=torch.float16,
100
  low_cpu_mem_usage=True,
101
- trust_remote_code=False # Security: don't trust remote code
 
102
  )
103
  print(f"✓ Model quantized successfully")
104
 
 
74
  # Step 2: Load tokenizer
75
  print(f"\n📋 Step 2/5: Loading tokenizer...")
76
  try:
77
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
78
  print(f"✓ Tokenizer loaded")
79
  except Exception as e:
80
  raise Exception(f"Failed to load tokenizer: {str(e)}")
 
98
  quantization_config=quant_config,
99
  torch_dtype=torch.float16,
100
  low_cpu_mem_usage=True,
101
+ trust_remote_code=False, # Security: don't trust remote code
102
+ token=HF_TOKEN
103
  )
104
  print(f"✓ Model quantized successfully")
105