Spaces:
Configuration error
Configuration error
Update app.py
Browse files
app.py
CHANGED
|
@@ -7,8 +7,8 @@ import spaces
|
|
| 7 |
import torch
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
| 9 |
|
| 10 |
-
MAX_MAX_NEW_TOKENS =
|
| 11 |
-
DEFAULT_MAX_NEW_TOKENS =
|
| 12 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 13 |
|
| 14 |
DESCRIPTION = """\
|
|
@@ -31,16 +31,16 @@ if torch.cuda.is_available():
|
|
| 31 |
tokenizer.use_default_system_prompt = False
|
| 32 |
|
| 33 |
|
| 34 |
-
@spaces.GPU
|
| 35 |
def generate(
|
| 36 |
message: str,
|
| 37 |
chat_history: list[tuple[str, str]],
|
| 38 |
system_prompt: str,
|
| 39 |
max_new_tokens: int = 1024,
|
| 40 |
-
temperature: float = 0.
|
| 41 |
-
top_p: float = 0.
|
| 42 |
-
top_k: int = 50,
|
| 43 |
-
repetition_penalty: float = 1.2,
|
| 44 |
) -> Iterator[str]:
|
| 45 |
conversation = []
|
| 46 |
if system_prompt:
|
|
@@ -227,28 +227,14 @@ chat_interface = gr.ChatInterface(
|
|
| 227 |
minimum=0.1,
|
| 228 |
maximum=4.0,
|
| 229 |
step=0.1,
|
| 230 |
-
value=0.
|
| 231 |
),
|
| 232 |
gr.Slider(
|
| 233 |
label="Top-p (nucleus sampling)",
|
| 234 |
minimum=0.05,
|
| 235 |
maximum=1.0,
|
| 236 |
step=0.05,
|
| 237 |
-
value=0.
|
| 238 |
-
),
|
| 239 |
-
gr.Slider(
|
| 240 |
-
label="Top-k",
|
| 241 |
-
minimum=1,
|
| 242 |
-
maximum=1000,
|
| 243 |
-
step=1,
|
| 244 |
-
value=50,
|
| 245 |
-
),
|
| 246 |
-
gr.Slider(
|
| 247 |
-
label="Repetition penalty",
|
| 248 |
-
minimum=1.0,
|
| 249 |
-
maximum=2.0,
|
| 250 |
-
step=0.05,
|
| 251 |
-
value=1.2,
|
| 252 |
),
|
| 253 |
],
|
| 254 |
stop_btn=None,
|
|
|
|
| 7 |
import torch
|
| 8 |
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
|
| 9 |
|
| 10 |
+
MAX_MAX_NEW_TOKENS = 1024
|
| 11 |
+
DEFAULT_MAX_NEW_TOKENS = 512
|
| 12 |
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
| 13 |
|
| 14 |
DESCRIPTION = """\
|
|
|
|
| 31 |
tokenizer.use_default_system_prompt = False
|
| 32 |
|
| 33 |
|
| 34 |
+
@spaces.GPU(duration=120)
|
| 35 |
def generate(
|
| 36 |
message: str,
|
| 37 |
chat_history: list[tuple[str, str]],
|
| 38 |
system_prompt: str,
|
| 39 |
max_new_tokens: int = 1024,
|
| 40 |
+
temperature: float = 0.2,
|
| 41 |
+
top_p: float = 0.95,
|
| 42 |
+
# top_k: int = 50,
|
| 43 |
+
# repetition_penalty: float = 1.2,
|
| 44 |
) -> Iterator[str]:
|
| 45 |
conversation = []
|
| 46 |
if system_prompt:
|
|
|
|
| 227 |
minimum=0.1,
|
| 228 |
maximum=4.0,
|
| 229 |
step=0.1,
|
| 230 |
+
value=0.2,
|
| 231 |
),
|
| 232 |
gr.Slider(
|
| 233 |
label="Top-p (nucleus sampling)",
|
| 234 |
minimum=0.05,
|
| 235 |
maximum=1.0,
|
| 236 |
step=0.05,
|
| 237 |
+
value=0.95,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
),
|
| 239 |
],
|
| 240 |
stop_btn=None,
|