Upload app.py with huggingface_hub
Browse files
app.py
CHANGED
|
@@ -28,11 +28,12 @@ def load_model():
|
|
| 28 |
"""Load model once and cache it"""
|
| 29 |
print("Loading model...")
|
| 30 |
|
| 31 |
-
# Prepare model loading kwargs
|
| 32 |
model_kwargs = {
|
| 33 |
"device_map": "auto",
|
| 34 |
"trust_remote_code": True,
|
| 35 |
"low_cpu_mem_usage": True,
|
|
|
|
| 36 |
}
|
| 37 |
|
| 38 |
# Use 8-bit quantization if enabled (saves memory)
|
|
|
|
| 28 |
"""Load model once and cache it"""
|
| 29 |
print("Loading model...")
|
| 30 |
|
| 31 |
+
# Prepare model loading kwargs with disk offloading for limited memory
|
| 32 |
model_kwargs = {
|
| 33 |
"device_map": "auto",
|
| 34 |
"trust_remote_code": True,
|
| 35 |
"low_cpu_mem_usage": True,
|
| 36 |
+
"offload_folder": "offload", # Enable disk offloading for HF Space
|
| 37 |
}
|
| 38 |
|
| 39 |
# Use 8-bit quantization if enabled (saves memory)
|