Slaiwala commited on
Commit
10c4116
·
verified ·
1 Parent(s): ba598c0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -4
app.py CHANGED
@@ -6,11 +6,23 @@ from typing import List, Dict, Any, Optional
6
  from functools import lru_cache
7
  from xml.etree import ElementTree as ET
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
9
  try:
10
- from transformers import BitsAndBytesConfig
11
- except ImportError:
12
  BitsAndBytesConfig = None
13
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
  import numpy as np
16
  import requests
@@ -205,7 +217,9 @@ if ADAPTER_REPO:
205
  dlog("LLM", f"Loading base model: {BASE_MODEL}")
206
  tokenizer_lm = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
207
 
208
- if QUANTIZE in {"8bit", "4bit"} and BitsAndBytesConfig is not None:
 
 
209
  bnb_config = BitsAndBytesConfig(
210
  load_in_8bit=(QUANTIZE == "8bit"),
211
  load_in_4bit=(QUANTIZE == "4bit"),
@@ -219,7 +233,7 @@ if QUANTIZE in {"8bit", "4bit"} and BitsAndBytesConfig is not None:
219
  quantization_config=bnb_config,
220
  )
221
  else:
222
- # Fallback / default: fp16 (no bitsandbytes required)
223
  base_model = AutoModelForCausalLM.from_pretrained(
224
  BASE_MODEL,
225
  torch_dtype=dtype,
@@ -228,6 +242,7 @@ else:
228
 
229
 
230
 
 
231
  dlog("LLM", f"Loading LoRA adapter from: {ADAPTER_PATH}")
232
  model_lm = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
233
  model_lm.eval()
 
6
  from functools import lru_cache
7
  from xml.etree import ElementTree as ET
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ from transformers import AutoTokenizer, AutoModelForCausalLM
10
  try:
11
+ from transformers import BitsAndBytesConfig # exists even if bitsandbytes isn't installed
12
+ except Exception:
13
  BitsAndBytesConfig = None
14
 
15
+ # Normalize QUANTIZE env
16
+ QUANTIZE = os.environ.get("QUANTIZE", "none").strip().lower()
17
+
18
+ # Detect bitsandbytes presence
19
+ try:
20
+ import bitsandbytes as _bnb # noqa: F401
21
+ _BNB_AVAILABLE = True
22
+ except Exception:
23
+ _BNB_AVAILABLE = False
24
+
25
+
26
 
27
  import numpy as np
28
  import requests
 
217
  dlog("LLM", f"Loading base model: {BASE_MODEL}")
218
  tokenizer_lm = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
219
 
220
+ use_bnb = QUANTIZE in {"8bit", "4bit"} and BitsAndBytesConfig is not None and _BNB_AVAILABLE
221
+
222
+ if use_bnb:
223
  bnb_config = BitsAndBytesConfig(
224
  load_in_8bit=(QUANTIZE == "8bit"),
225
  load_in_4bit=(QUANTIZE == "4bit"),
 
233
  quantization_config=bnb_config,
234
  )
235
  else:
236
+ # Default / fallback: fp16 (no bitsandbytes required)
237
  base_model = AutoModelForCausalLM.from_pretrained(
238
  BASE_MODEL,
239
  torch_dtype=dtype,
 
242
 
243
 
244
 
245
+
246
  dlog("LLM", f"Loading LoRA adapter from: {ADAPTER_PATH}")
247
  model_lm = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
248
  model_lm.eval()