Slaiwala commited on
Commit
1fbb928
·
verified ·
1 Parent(s): 63e871e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -5,7 +5,12 @@ import os, re, json, time, sys, csv, uuid, datetime
5
  from typing import List, Dict, Any, Optional
6
  from functools import lru_cache
7
  from xml.etree import ElementTree as ET
8
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
 
 
 
 
 
9
 
10
  import numpy as np
11
  import requests
@@ -200,7 +205,7 @@ if ADAPTER_REPO:
200
  dlog("LLM", f"Loading base model: {BASE_MODEL}")
201
  tokenizer_lm = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
202
 
203
- if QUANTIZE in {"8bit", "4bit"}:
204
  bnb_config = BitsAndBytesConfig(
205
  load_in_8bit=(QUANTIZE == "8bit"),
206
  load_in_4bit=(QUANTIZE == "4bit"),
@@ -214,6 +219,7 @@ if QUANTIZE in {"8bit", "4bit"}:
214
  quantization_config=bnb_config,
215
  )
216
  else:
 
217
  base_model = AutoModelForCausalLM.from_pretrained(
218
  BASE_MODEL,
219
  torch_dtype=dtype,
@@ -221,6 +227,7 @@ else:
221
  )
222
 
223
 
 
224
  dlog("LLM", f"Loading LoRA adapter from: {ADAPTER_PATH}")
225
  model_lm = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
226
  model_lm.eval()
 
5
  from typing import List, Dict, Any, Optional
6
  from functools import lru_cache
7
  from xml.etree import ElementTree as ET
8
+ from transformers import AutoTokenizer, AutoModelForCausalLM
9
+ try:
10
+ from transformers import BitsAndBytesConfig
11
+ except ImportError:
12
+ BitsAndBytesConfig = None
13
+
14
 
15
  import numpy as np
16
  import requests
 
205
  dlog("LLM", f"Loading base model: {BASE_MODEL}")
206
  tokenizer_lm = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
207
 
208
+ if QUANTIZE in {"8bit", "4bit"} and BitsAndBytesConfig is not None:
209
  bnb_config = BitsAndBytesConfig(
210
  load_in_8bit=(QUANTIZE == "8bit"),
211
  load_in_4bit=(QUANTIZE == "4bit"),
 
219
  quantization_config=bnb_config,
220
  )
221
  else:
222
+ # Fallback: run in fp16 without bitsandbytes
223
  base_model = AutoModelForCausalLM.from_pretrained(
224
  BASE_MODEL,
225
  torch_dtype=dtype,
 
227
  )
228
 
229
 
230
+
231
  dlog("LLM", f"Loading LoRA adapter from: {ADAPTER_PATH}")
232
  model_lm = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
233
  model_lm.eval()