turtle170 commited on
Commit
e64b130
·
verified ·
1 Parent(s): 91ebf27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -9
app.py CHANGED
@@ -1234,11 +1234,11 @@ class ZeroEngine:
1234
  logger.warning(f"[BOOT] Cleanup warning: {e}")
1235
 
1236
  # Calculate optimal parameters with token purchases
1237
- # HARD-CODE: Force 16GB total RAM for Hugging Face Spaces reliability
1238
  import psutil
1239
  ram = psutil.virtual_memory()
1240
- total_ram_gb = 16.0 # HARD-CODED: 16GB total for container
1241
- available_ram_gb = 6.0 # HARD-CODED: 6GB available for model
1242
 
1243
  logger.info(f"[RAM] HARD-CODED: Total: {total_ram_gb:.1f}GB, Available: {available_ram_gb:.1f}GB")
1244
  logger.info(f"[RAM] (Ignoring host system {ram.total/(1024**3):.1f}GB)")
@@ -1257,23 +1257,23 @@ class ZeroEngine:
1257
  # IGNORE user batch size - use conservative settings for reliability
1258
  logger.info(f"[TOKEN] User batch size: {user_batch_size} (OVERRIDDEN for reliability)")
1259
 
1260
- # ULTRA-CONSERVATIVE SETTINGS FOR 6GB AVAILABLE RAM
1261
- optimal_batch = 128 # FIXED: Very conservative batch size
1262
- optimal_ctx = 256 # FIXED: Very conservative context size
1263
  optimal_threads = 2 # FIXED: 2 threads for 2 vCPU
1264
 
1265
- logger.info(f"[RAM] ULTRA-CONSERVATIVE: batch={optimal_batch}, ctx={optimal_ctx}")
1266
 
1267
  # Reduce context for Gemma models (they have 131K n_ctx_train)
1268
  if model_format == "gemma":
1269
- optimal_ctx = 128 # EXTREMELY conservative for Gemma
1270
  logger.info(f"[FORMAT] Gemma detected: reducing context to {optimal_ctx}")
1271
 
1272
  # Apply CPU optimizations before model loading
1273
  optimize_cpu_performance()
1274
  boost_cpu_frequency()
1275
 
1276
- logger.info(f"[CPU] ULTRA-OPTIMIZED: {optimal_threads} threads for 2 vCPU + 16GB RAM")
1277
 
1278
  try:
1279
  logger.info(f"[BOOT] Initializing {model_format.upper()} {quant_config['type']}: threads={optimal_threads}, batch={optimal_batch}, ctx={optimal_ctx}")
 
1234
  logger.warning(f"[BOOT] Cleanup warning: {e}")
1235
 
1236
  # Calculate optimal parameters with token purchases
1237
+ # HARD-CODE: Force 18GB total RAM for Hugging Face Spaces reliability
1238
  import psutil
1239
  ram = psutil.virtual_memory()
1240
+ total_ram_gb = 18.0 # HARD-CODED: 18GB total for container
1241
+ available_ram_gb = 16.0 # HARD-CODED: 16GB usable for model (2GB reserved)
1242
 
1243
  logger.info(f"[RAM] HARD-CODED: Total: {total_ram_gb:.1f}GB, Available: {available_ram_gb:.1f}GB")
1244
  logger.info(f"[RAM] (Ignoring host system {ram.total/(1024**3):.1f}GB)")
 
1257
  # IGNORE user batch size - use conservative settings for reliability
1258
  logger.info(f"[TOKEN] User batch size: {user_batch_size} (OVERRIDDEN for reliability)")
1259
 
1260
+ # OPTIMIZED SETTINGS FOR 16GB USABLE RAM
1261
+ optimal_batch = 512 # OPTIMIZED: Good batch size
1262
+ optimal_ctx = 1024 # OPTIMIZED: Good context size
1263
  optimal_threads = 2 # FIXED: 2 threads for 2 vCPU
1264
 
1265
+ logger.info(f"[RAM] OPTIMIZED: batch={optimal_batch}, ctx={optimal_ctx}")
1266
 
1267
  # Reduce context for Gemma models (they have 131K n_ctx_train)
1268
  if model_format == "gemma":
1269
+ optimal_ctx = 512 # OPTIMIZED for Gemma
1270
  logger.info(f"[FORMAT] Gemma detected: reducing context to {optimal_ctx}")
1271
 
1272
  # Apply CPU optimizations before model loading
1273
  optimize_cpu_performance()
1274
  boost_cpu_frequency()
1275
 
1276
+ logger.info(f"[CPU] ULTRA-OPTIMIZED: {optimal_threads} threads for 2 vCPU + 18GB RAM")
1277
 
1278
  try:
1279
  logger.info(f"[BOOT] Initializing {model_format.upper()} {quant_config['type']}: threads={optimal_threads}, batch={optimal_batch}, ctx={optimal_ctx}")