Spaces:
Running
Running
Reduce max_new_tokens to 64 for faster replies
Browse files
app.py
CHANGED
|
@@ -20,7 +20,7 @@ def respond(message: str, history: list[tuple[str, str]]):
|
|
| 20 |
|
| 21 |
out = llm(
|
| 22 |
prompt,
|
| 23 |
-
max_new_tokens=
|
| 24 |
temperature=0.7,
|
| 25 |
top_p=0.9,
|
| 26 |
)
|
|
|
|
| 20 |
|
| 21 |
out = llm(
|
| 22 |
prompt,
|
| 23 |
+
max_new_tokens=64,
|
| 24 |
temperature=0.7,
|
| 25 |
top_p=0.9,
|
| 26 |
)
|