Spaces:
Sleeping
Sleeping
Increase max_new_tokens to 8192 for unlimited length responses
Browse files- gradio_app.py +15 -15
gradio_app.py
CHANGED
|
@@ -222,21 +222,21 @@ def chat_with_model(message, history, temperature, json_mode=False, json_templat
|
|
| 222 |
inputs = {k: v.to(model_device) for k, v in inputs.items()}
|
| 223 |
|
| 224 |
with torch.no_grad():
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
|
| 241 |
# Decode response
|
| 242 |
generated_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
| 222 |
inputs = {k: v.to(model_device) for k, v in inputs.items()}
|
| 223 |
|
| 224 |
with torch.no_grad():
|
| 225 |
+
outputs = model_manager.model.generate(
|
| 226 |
+
**inputs,
|
| 227 |
+
max_new_tokens=8192, # Much higher limit for complete responses
|
| 228 |
+
temperature=temperature,
|
| 229 |
+
top_p=0.95,
|
| 230 |
+
do_sample=True,
|
| 231 |
+
num_beams=1,
|
| 232 |
+
pad_token_id=model_manager.tokenizer.eos_token_id,
|
| 233 |
+
eos_token_id=model_manager.tokenizer.eos_token_id,
|
| 234 |
+
early_stopping=False, # Disable early stopping
|
| 235 |
+
repetition_penalty=1.05, # Lighter repetition penalty
|
| 236 |
+
no_repeat_ngram_size=0, # Disable n-gram repetition blocking
|
| 237 |
+
length_penalty=1.0, # Neutral length penalty
|
| 238 |
+
min_new_tokens=50 # Ensure minimum response length
|
| 239 |
+
)
|
| 240 |
|
| 241 |
# Decode response
|
| 242 |
generated_text = model_manager.tokenizer.decode(outputs[0], skip_special_tokens=True)
|