Spaces:
Sleeping
Sleeping
Ilke Ileri
commited on
Commit
·
d54cae5
1
Parent(s):
0ccd1fa
Reduce max_tokens to 100 for faster response to prevent Vapi timeout
Browse files
app.py
CHANGED
|
@@ -143,7 +143,7 @@ def chat_completions():
|
|
| 143 |
print(f"Generating response...")
|
| 144 |
outputs = model.generate(
|
| 145 |
**inputs,
|
| 146 |
-
max_new_tokens=
|
| 147 |
temperature=0.7,
|
| 148 |
do_sample=True,
|
| 149 |
top_p=0.9,
|
|
|
|
| 143 |
print(f"Generating response...")
|
| 144 |
outputs = model.generate(
|
| 145 |
**inputs,
|
| 146 |
+
max_new_tokens=100, # Further reduced for faster response (was 150)
|
| 147 |
temperature=0.7,
|
| 148 |
do_sample=True,
|
| 149 |
top_p=0.9,
|