Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ from typing import Iterator
|
|
| 5 |
from mongoengine import connect, Document, StringField, SequenceField
|
| 6 |
import gradio as gr
|
| 7 |
import spaces
|
| 8 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
| 9 |
from peft import PeftModel
|
| 10 |
|
| 11 |
# Constants
|
|
@@ -29,7 +29,7 @@ if not torch.cuda.is_available():
|
|
| 29 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
| 30 |
|
| 31 |
# Model and Tokenizer Configuration
|
| 32 |
-
model_id = "meta-llama/Llama-2-7b-
|
| 33 |
bnb_config = BitsAndBytesConfig(
|
| 34 |
load_in_4bit=True,
|
| 35 |
bnb_4bit_use_double_quant=False,
|
|
|
|
| 5 |
from mongoengine import connect, Document, StringField, SequenceField
|
| 6 |
import gradio as gr
|
| 7 |
import spaces
|
| 8 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TextIteratorStreamer
|
| 9 |
from peft import PeftModel
|
| 10 |
|
| 11 |
# Constants
|
|
|
|
| 29 |
DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
|
| 30 |
|
| 31 |
# Model and Tokenizer Configuration
|
| 32 |
+
model_id = "meta-llama/Llama-2-7b-hf"
|
| 33 |
bnb_config = BitsAndBytesConfig(
|
| 34 |
load_in_4bit=True,
|
| 35 |
bnb_4bit_use_double_quant=False,
|