turtle170 commited on
Commit
7e42957
·
verified ·
1 Parent(s): 4cf69fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -888,7 +888,7 @@ class ZeroEngine:
888
  logger.info(f"[TOKEN] User batch size: {user_batch_size}")
889
 
890
  # CPU can handle larger batches with quantized models
891
- optimal_batch = max(1024, min(8192, optimal_batch)) # 1024-8192 range for CPU
892
 
893
  # Context size
894
  optimal_ctx = quant_config["ctx_size"]
 
888
  logger.info(f"[TOKEN] User batch size: {user_batch_size}")
889
 
890
  # CPU can handle larger batches with quantized models
891
+ optimal_batch = max(256, min(512, optimal_batch)) # 256-512 range for CPU (much more conservative)
892
 
893
  # Context size
894
  optimal_ctx = quant_config["ctx_size"]