Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,6 @@ import faiss
|
|
| 11 |
from transformers import (
|
| 12 |
AutoTokenizer,
|
| 13 |
AutoModelForCausalLM,
|
| 14 |
-
BitsAndBytesConfig,
|
| 15 |
AutoModel,
|
| 16 |
TextIteratorStreamer
|
| 17 |
)
|
|
@@ -384,13 +383,13 @@ else:
|
|
| 384 |
index = faiss.read_index('./storage/faiss_index.faiss')
|
| 385 |
|
| 386 |
# Load the model
|
| 387 |
-
nf4_config = BitsAndBytesConfig(
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
|
| 392 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
| 393 |
-
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it",
|
| 394 |
|
| 395 |
|
| 396 |
def make_inference(query, hist):
|
|
|
|
| 11 |
from transformers import (
|
| 12 |
AutoTokenizer,
|
| 13 |
AutoModelForCausalLM,
|
|
|
|
| 14 |
AutoModel,
|
| 15 |
TextIteratorStreamer
|
| 16 |
)
|
|
|
|
| 383 |
index = faiss.read_index('./storage/faiss_index.faiss')
|
| 384 |
|
| 385 |
# Load the model
|
| 386 |
+
# nf4_config = BitsAndBytesConfig(
|
| 387 |
+
# load_in_4bit=True,
|
| 388 |
+
# bnb_4bit_quant_type="nf4",
|
| 389 |
+
# )quantization_config = nf4_config,
|
| 390 |
|
| 391 |
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
| 392 |
+
model = AutoModelForCausalLM.from_pretrained("google/gemma-2b-it", token=HF_TOKEN)
|
| 393 |
|
| 394 |
|
| 395 |
def make_inference(query, hist):
|