Spaces:

saakshigupta
/

deepfake-explainer-app

Paused

saakshigupta commited on Apr 2

Commit

aa3f85c

verified ·

1 Parent(s): 954f59a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -45,14 +45,15 @@ def load_model():
         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
-        # Configure quantization settings
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_compute_dtype=torch.float16,
             bnb_4bit_use_double_quant=True,
             bnb_4bit_quant_type="nf4",
             bnb_4bit_quant_storage=torch.float16,
-            llm_int8_skip_modules=["lm_head"]
         )
         # Load the pre-quantized model with unsloth settings
@@ -62,7 +63,9 @@ def load_model():
             quantization_config=quantization_config,
             torch_dtype=torch.float16,
             trust_remote_code=True,
-            low_cpu_mem_usage=True
         )
         # Load adapter

         # Load processor
         processor = AutoProcessor.from_pretrained(base_model_id)
+        # Configure quantization settings for unsloth model
         quantization_config = BitsAndBytesConfig(
             load_in_4bit=True,
             bnb_4bit_compute_dtype=torch.float16,
             bnb_4bit_use_double_quant=True,
             bnb_4bit_quant_type="nf4",
             bnb_4bit_quant_storage=torch.float16,
+            llm_int8_skip_modules=["lm_head"],
+            llm_int8_enable_fp32_cpu_offload=True
         )
         # Load the pre-quantized model with unsloth settings
             quantization_config=quantization_config,
             torch_dtype=torch.float16,
             trust_remote_code=True,
+            low_cpu_mem_usage=True,
+            use_cache=True,
+            offload_folder="offload"  # Enable disk offloading
         )
         # Load adapter