Spaces:
Running on Zero
Running on Zero
Commit ·
6246295
1
Parent(s): 2709f63
Restore max_new_tokens to 512 (4-bit gen is fast: ~25 tok/s on GPU)
Browse files- generate.py +1 -1
generate.py
CHANGED
|
@@ -71,7 +71,7 @@ def _render(messages, tok) -> str:
|
|
| 71 |
|
| 72 |
|
| 73 |
@GPU(duration=180)
|
| 74 |
-
def generate(messages: list[dict], max_new_tokens: int =
|
| 75 |
temperature: float = 0.2) -> str:
|
| 76 |
"""Generate an assistant reply for chat-format ``messages``."""
|
| 77 |
if STUB:
|
|
|
|
| 71 |
|
| 72 |
|
| 73 |
@GPU(duration=180)
|
| 74 |
+
def generate(messages: list[dict], max_new_tokens: int = 512,
|
| 75 |
temperature: float = 0.2) -> str:
|
| 76 |
"""Generate an assistant reply for chat-format ``messages``."""
|
| 77 |
if STUB:
|