Spaces:

Mangesh223
/

DefendModel

Sleeping

Mangesh223 commited on Mar 27, 2025

Commit

7c4e758

verified ·

1 Parent(s): ea59144

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -14,28 +14,40 @@ from dotenv import load_dotenv
 load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
-# Quantization config
 quant_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16,
     bnb_4bit_quant_type="nf4"
 )
-# Load tokenizer and model with quantization
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
-model = AutoModelForCausalLM.from_pretrained(
-    "mistralai/Mistral-7B-Instruct-v0.3",
-    device_map="auto",
-    quantization_config=quant_config,
-    torch_dtype=torch.float16
-)
 # Initialize pipeline with preloaded model and tokenizer
 analyzer = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
-    device_map="auto",  # Still respected from model
     torch_dtype=torch.float16
 )

 load_dotenv()
 login(token=os.getenv("HF_TOKEN"))
+# Quantization config (only used if CUDA is available)
 quant_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_compute_dtype=torch.float16,
     bnb_4bit_quant_type="nf4"
 )
+# Check if CUDA is available
+cuda_available = torch.cuda.is_available()
+# Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
+if cuda_available:
+    # Use quantization if CUDA is available
+    model = AutoModelForCausalLM.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        device_map="auto",
+        quantization_config=quant_config,
+        torch_dtype=torch.float16
+    )
+else:
+    # Fall back to full precision (no quantization) if no CUDA
+    model = AutoModelForCausalLM.from_pretrained(
+        "mistralai/Mistral-7B-Instruct-v0.3",
+        device_map="cpu",  # Explicitly set to CPU
+        torch_dtype=torch.float16
+    )
 # Initialize pipeline with preloaded model and tokenizer
 analyzer = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
+    device_map="auto" if cuda_available else "cpu",  # Match model device
     torch_dtype=torch.float16
 )