webpluging

Paused

ranamhamoud commited on Apr 18, 2024

Commit

04c002a

verified ·

1 Parent(s): c11dcf8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from peft import PeftModel
 # Constants
 MAX_MAX_NEW_TOKENS = 2048
-DEFAULT_MAX_NEW_TOKENS = 900
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 # Description and License Texts
@@ -30,7 +30,7 @@ if not torch.cuda.is_available():
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 # Model and Tokenizer Configuration
-model_id = "meta-llama/Llama-2-7b-chat"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=False,

 # Constants
 MAX_MAX_NEW_TOKENS = 2048
+DEFAULT_MAX_NEW_TOKENS = 950
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 # Description and License Texts
     DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 # Model and Tokenizer Configuration
+model_id = "meta-llama/Llama-2-7b-hf"
 bnb_config = BitsAndBytesConfig(
     load_in_4bit=True,
     bnb_4bit_use_double_quant=False,