Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,12 @@ import os, re, json, time, sys, csv, uuid, datetime
|
|
| 5 |
from typing import List, Dict, Any, Optional
|
| 6 |
from functools import lru_cache
|
| 7 |
from xml.etree import ElementTree as ET
|
| 8 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
|
| 10 |
import numpy as np
|
| 11 |
import requests
|
|
@@ -200,7 +205,7 @@ if ADAPTER_REPO:
|
|
| 200 |
dlog("LLM", f"Loading base model: {BASE_MODEL}")
|
| 201 |
tokenizer_lm = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
|
| 202 |
|
| 203 |
-
if QUANTIZE in {"8bit", "4bit"}:
|
| 204 |
bnb_config = BitsAndBytesConfig(
|
| 205 |
load_in_8bit=(QUANTIZE == "8bit"),
|
| 206 |
load_in_4bit=(QUANTIZE == "4bit"),
|
|
@@ -214,6 +219,7 @@ if QUANTIZE in {"8bit", "4bit"}:
|
|
| 214 |
quantization_config=bnb_config,
|
| 215 |
)
|
| 216 |
else:
|
|
|
|
| 217 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 218 |
BASE_MODEL,
|
| 219 |
torch_dtype=dtype,
|
|
@@ -221,6 +227,7 @@ else:
|
|
| 221 |
)
|
| 222 |
|
| 223 |
|
|
|
|
| 224 |
dlog("LLM", f"Loading LoRA adapter from: {ADAPTER_PATH}")
|
| 225 |
model_lm = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
|
| 226 |
model_lm.eval()
|
|
|
|
| 5 |
from typing import List, Dict, Any, Optional
|
| 6 |
from functools import lru_cache
|
| 7 |
from xml.etree import ElementTree as ET
|
| 8 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 9 |
+
try:
|
| 10 |
+
from transformers import BitsAndBytesConfig
|
| 11 |
+
except ImportError:
|
| 12 |
+
BitsAndBytesConfig = None
|
| 13 |
+
|
| 14 |
|
| 15 |
import numpy as np
|
| 16 |
import requests
|
|
|
|
| 205 |
dlog("LLM", f"Loading base model: {BASE_MODEL}")
|
| 206 |
tokenizer_lm = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)
|
| 207 |
|
| 208 |
+
if QUANTIZE in {"8bit", "4bit"} and BitsAndBytesConfig is not None:
|
| 209 |
bnb_config = BitsAndBytesConfig(
|
| 210 |
load_in_8bit=(QUANTIZE == "8bit"),
|
| 211 |
load_in_4bit=(QUANTIZE == "4bit"),
|
|
|
|
| 219 |
quantization_config=bnb_config,
|
| 220 |
)
|
| 221 |
else:
|
| 222 |
+
# Fallback: run in fp16 without bitsandbytes
|
| 223 |
base_model = AutoModelForCausalLM.from_pretrained(
|
| 224 |
BASE_MODEL,
|
| 225 |
torch_dtype=dtype,
|
|
|
|
| 227 |
)
|
| 228 |
|
| 229 |
|
| 230 |
+
|
| 231 |
dlog("LLM", f"Loading LoRA adapter from: {ADAPTER_PATH}")
|
| 232 |
model_lm = PeftModel.from_pretrained(base_model, ADAPTER_PATH)
|
| 233 |
model_lm.eval()
|