Spaces:
Sleeping
Sleeping
Commit ·
185a134
1
Parent(s): 30494ce
revert quantization removal breaking tensor shape
Browse files
app.py
CHANGED
|
@@ -44,24 +44,16 @@ def load_model():
|
|
| 44 |
)
|
| 45 |
else:
|
| 46 |
logger.info("CUDA not available, loading with CPU optimizations")
|
| 47 |
-
if hasattr(config, "quantization_config"):
|
| 48 |
-
logger.info("Removing quantization configuration for CPU execution")
|
| 49 |
-
try:
|
| 50 |
-
delattr(config, "quantization_config")
|
| 51 |
-
except AttributeError:
|
| 52 |
-
config.quantization_config = None
|
| 53 |
model = AutoModelForCausalLM.from_pretrained(
|
| 54 |
MODEL_NAME,
|
| 55 |
config=config,
|
| 56 |
trust_remote_code=True,
|
| 57 |
-
torch_dtype=torch.
|
| 58 |
attn_implementation="eager",
|
| 59 |
cache_dir="/tmp/model_cache",
|
| 60 |
-
low_cpu_mem_usage=True,
|
| 61 |
-
quantization_config=None,
|
| 62 |
)
|
| 63 |
# Move model to CPU manually
|
| 64 |
-
model = model.to("cpu")
|
| 65 |
|
| 66 |
logger.info("Model loaded successfully!")
|
| 67 |
logger.info(f"Model device: {next(model.parameters()).device}")
|
|
|
|
| 44 |
)
|
| 45 |
else:
|
| 46 |
logger.info("CUDA not available, loading with CPU optimizations")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
model = AutoModelForCausalLM.from_pretrained(
|
| 48 |
MODEL_NAME,
|
| 49 |
config=config,
|
| 50 |
trust_remote_code=True,
|
| 51 |
+
torch_dtype=torch.float16,
|
| 52 |
attn_implementation="eager",
|
| 53 |
cache_dir="/tmp/model_cache",
|
|
|
|
|
|
|
| 54 |
)
|
| 55 |
# Move model to CPU manually
|
| 56 |
+
model = model.to(device="cpu", dtype=torch.float32)
|
| 57 |
|
| 58 |
logger.info("Model loaded successfully!")
|
| 59 |
logger.info(f"Model device: {next(model.parameters()).device}")
|