Mangesh223 commited on
Commit
7c4e758
·
verified ·
1 Parent(s): ea59144

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -9
app.py CHANGED
@@ -14,28 +14,40 @@ from dotenv import load_dotenv
14
  load_dotenv()
15
  login(token=os.getenv("HF_TOKEN"))
16
 
17
- # Quantization config
18
  quant_config = BitsAndBytesConfig(
19
  load_in_4bit=True,
20
  bnb_4bit_compute_dtype=torch.float16,
21
  bnb_4bit_quant_type="nf4"
22
  )
23
 
24
- # Load tokenizer and model with quantization
 
 
 
25
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
26
- model = AutoModelForCausalLM.from_pretrained(
27
- "mistralai/Mistral-7B-Instruct-v0.3",
28
- device_map="auto",
29
- quantization_config=quant_config,
30
- torch_dtype=torch.float16
31
- )
 
 
 
 
 
 
 
 
 
32
 
33
  # Initialize pipeline with preloaded model and tokenizer
34
  analyzer = pipeline(
35
  "text-generation",
36
  model=model,
37
  tokenizer=tokenizer,
38
- device_map="auto", # Still respected from model
39
  torch_dtype=torch.float16
40
  )
41
 
 
14
  load_dotenv()
15
  login(token=os.getenv("HF_TOKEN"))
16
 
17
+ # Quantization config (only used if CUDA is available)
18
  quant_config = BitsAndBytesConfig(
19
  load_in_4bit=True,
20
  bnb_4bit_compute_dtype=torch.float16,
21
  bnb_4bit_quant_type="nf4"
22
  )
23
 
24
+ # Check if CUDA is available
25
+ cuda_available = torch.cuda.is_available()
26
+
27
+ # Load tokenizer and model
28
  tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.3")
29
+ if cuda_available:
30
+ # Use quantization if CUDA is available
31
+ model = AutoModelForCausalLM.from_pretrained(
32
+ "mistralai/Mistral-7B-Instruct-v0.3",
33
+ device_map="auto",
34
+ quantization_config=quant_config,
35
+ torch_dtype=torch.float16
36
+ )
37
+ else:
38
+ # Fall back to full precision (no quantization) if no CUDA
39
+ model = AutoModelForCausalLM.from_pretrained(
40
+ "mistralai/Mistral-7B-Instruct-v0.3",
41
+ device_map="cpu", # Explicitly set to CPU
42
+ torch_dtype=torch.float16
43
+ )
44
 
45
  # Initialize pipeline with preloaded model and tokenizer
46
  analyzer = pipeline(
47
  "text-generation",
48
  model=model,
49
  tokenizer=tokenizer,
50
+ device_map="auto" if cuda_available else "cpu", # Match model device
51
  torch_dtype=torch.float16
52
  )
53