Ilke Ileri commited on
Commit
d54cae5
·
1 Parent(s): 0ccd1fa

Reduce max_tokens to 100 for faster response to prevent Vapi timeout

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -143,7 +143,7 @@ def chat_completions():
143
  print(f"Generating response...")
144
  outputs = model.generate(
145
  **inputs,
146
- max_new_tokens=150, # Reduced from 256 for faster response
147
  temperature=0.7,
148
  do_sample=True,
149
  top_p=0.9,
 
143
  print(f"Generating response...")
144
  outputs = model.generate(
145
  **inputs,
146
+ max_new_tokens=100, # Further reduced for faster response (was 150)
147
  temperature=0.7,
148
  do_sample=True,
149
  top_p=0.9,