Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -888,7 +888,7 @@ class ZeroEngine:
|
|
| 888 |
logger.info(f"[TOKEN] User batch size: {user_batch_size}")
|
| 889 |
|
| 890 |
# CPU can handle larger batches with quantized models
|
| 891 |
-
optimal_batch = max(
|
| 892 |
|
| 893 |
# Context size
|
| 894 |
optimal_ctx = quant_config["ctx_size"]
|
|
|
|
| 888 |
logger.info(f"[TOKEN] User batch size: {user_batch_size}")
|
| 889 |
|
| 890 |
# CPU can handle larger batches with quantized models
|
| 891 |
+
optimal_batch = max(256, min(512, optimal_batch)) # 256-512 range for CPU (much more conservative)
|
| 892 |
|
| 893 |
# Context size
|
| 894 |
optimal_ctx = quant_config["ctx_size"]
|