vivekchakraverty commited on
Commit
5fa56c1
·
1 Parent(s): 743e3d3

ZeroGPU: raise GPU budget 120->180s, cap max_new_tokens 512->256

Browse files
Files changed (1) hide show
  1. generate.py +2 -2
generate.py CHANGED
@@ -80,8 +80,8 @@ def _render(messages, tok) -> str:
80
  messages, tokenize=False, add_generation_prompt=True)
81
 
82
 
83
- @GPU(duration=120)
84
- def generate(messages: list[dict], max_new_tokens: int = 512,
85
  temperature: float = 0.2) -> str:
86
  """Generate an assistant reply for chat-format ``messages``."""
87
  if STUB:
 
80
  messages, tokenize=False, add_generation_prompt=True)
81
 
82
 
83
+ @GPU(duration=180)
84
+ def generate(messages: list[dict], max_new_tokens: int = 256,
85
  temperature: float = 0.2) -> str:
86
  """Generate an assistant reply for chat-format ``messages``."""
87
  if STUB: