BoostedJonP commited on
Commit
185a134
·
1 Parent(s): 30494ce

revert quantization removal breaking tensor shape

Browse files
Files changed (1) hide show
  1. app.py +2 -10
app.py CHANGED
@@ -44,24 +44,16 @@ def load_model():
44
  )
45
  else:
46
  logger.info("CUDA not available, loading with CPU optimizations")
47
- if hasattr(config, "quantization_config"):
48
- logger.info("Removing quantization configuration for CPU execution")
49
- try:
50
- delattr(config, "quantization_config")
51
- except AttributeError:
52
- config.quantization_config = None
53
  model = AutoModelForCausalLM.from_pretrained(
54
  MODEL_NAME,
55
  config=config,
56
  trust_remote_code=True,
57
- torch_dtype=torch.float32,
58
  attn_implementation="eager",
59
  cache_dir="/tmp/model_cache",
60
- low_cpu_mem_usage=True,
61
- quantization_config=None,
62
  )
63
  # Move model to CPU manually
64
- model = model.to("cpu")
65
 
66
  logger.info("Model loaded successfully!")
67
  logger.info(f"Model device: {next(model.parameters()).device}")
 
44
  )
45
  else:
46
  logger.info("CUDA not available, loading with CPU optimizations")
 
 
 
 
 
 
47
  model = AutoModelForCausalLM.from_pretrained(
48
  MODEL_NAME,
49
  config=config,
50
  trust_remote_code=True,
51
+ torch_dtype=torch.float16,
52
  attn_implementation="eager",
53
  cache_dir="/tmp/model_cache",
 
 
54
  )
55
  # Move model to CPU manually
56
+ model = model.to(device="cpu", dtype=torch.float32)
57
 
58
  logger.info("Model loaded successfully!")
59
  logger.info(f"Model device: {next(model.parameters()).device}")