Spaces:

turtle170
/

ZeroEngine

Running

App Files Files Community

turtle170 commited on Feb 1

Commit

e64b130

verified ·

1 Parent(s): 91ebf27

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -9

app.py CHANGED Viewed

@@ -1234,11 +1234,11 @@ class ZeroEngine:
                         logger.warning(f"[BOOT] Cleanup warning: {e}")
                 # Calculate optimal parameters with token purchases
-                # HARD-CODE: Force 16GB total RAM for Hugging Face Spaces reliability
                 import psutil
                 ram = psutil.virtual_memory()
-                total_ram_gb = 16.0  # HARD-CODED: 16GB total for container
-                available_ram_gb = 6.0  # HARD-CODED: 6GB available for model
                 logger.info(f"[RAM] HARD-CODED: Total: {total_ram_gb:.1f}GB, Available: {available_ram_gb:.1f}GB")
                 logger.info(f"[RAM] (Ignoring host system {ram.total/(1024**3):.1f}GB)")
@@ -1257,23 +1257,23 @@ class ZeroEngine:
                     # IGNORE user batch size - use conservative settings for reliability
                     logger.info(f"[TOKEN] User batch size: {user_batch_size} (OVERRIDDEN for reliability)")
-                # ULTRA-CONSERVATIVE SETTINGS FOR 6GB AVAILABLE RAM
-                optimal_batch = 128  # FIXED: Very conservative batch size
-                optimal_ctx = 256    # FIXED: Very conservative context size
                 optimal_threads = 2   # FIXED: 2 threads for 2 vCPU
-                logger.info(f"[RAM] ULTRA-CONSERVATIVE: batch={optimal_batch}, ctx={optimal_ctx}")
                 # Reduce context for Gemma models (they have 131K n_ctx_train)
                 if model_format == "gemma":
-                    optimal_ctx = 128  # EXTREMELY conservative for Gemma
                     logger.info(f"[FORMAT] Gemma detected: reducing context to {optimal_ctx}")
                 # Apply CPU optimizations before model loading
                 optimize_cpu_performance()
                 boost_cpu_frequency()
-                logger.info(f"[CPU] ULTRA-OPTIMIZED: {optimal_threads} threads for 2 vCPU + 16GB RAM")
                 try:
                     logger.info(f"[BOOT] Initializing {model_format.upper()} {quant_config['type']}: threads={optimal_threads}, batch={optimal_batch}, ctx={optimal_ctx}")

                         logger.warning(f"[BOOT] Cleanup warning: {e}")
                 # Calculate optimal parameters with token purchases
+                # HARD-CODE: Force 18GB total RAM for Hugging Face Spaces reliability
                 import psutil
                 ram = psutil.virtual_memory()
+                total_ram_gb = 18.0  # HARD-CODED: 18GB total for container
+                available_ram_gb = 16.0  # HARD-CODED: 16GB usable for model (2GB reserved)
                 logger.info(f"[RAM] HARD-CODED: Total: {total_ram_gb:.1f}GB, Available: {available_ram_gb:.1f}GB")
                 logger.info(f"[RAM] (Ignoring host system {ram.total/(1024**3):.1f}GB)")
                     # IGNORE user batch size - use conservative settings for reliability
                     logger.info(f"[TOKEN] User batch size: {user_batch_size} (OVERRIDDEN for reliability)")
+                # OPTIMIZED SETTINGS FOR 16GB USABLE RAM
+                optimal_batch = 512  # OPTIMIZED: Good batch size
+                optimal_ctx = 1024   # OPTIMIZED: Good context size
                 optimal_threads = 2   # FIXED: 2 threads for 2 vCPU
+                logger.info(f"[RAM] OPTIMIZED: batch={optimal_batch}, ctx={optimal_ctx}")
                 # Reduce context for Gemma models (they have 131K n_ctx_train)
                 if model_format == "gemma":
+                    optimal_ctx = 512  # OPTIMIZED for Gemma
                     logger.info(f"[FORMAT] Gemma detected: reducing context to {optimal_ctx}")
                 # Apply CPU optimizations before model loading
                 optimize_cpu_performance()
                 boost_cpu_frequency()
+                logger.info(f"[CPU] ULTRA-OPTIMIZED: {optimal_threads} threads for 2 vCPU + 18GB RAM")
                 try:
                     logger.info(f"[BOOT] Initializing {model_format.upper()} {quant_config['type']}: threads={optimal_threads}, batch={optimal_batch}, ctx={optimal_ctx}")