Spaces:

turtle170
/

ZeroEngine

Running

turtle170 commited on Feb 1

Commit

3057246

verified ·

1 Parent(s): b91cca5

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1286,7 +1286,7 @@ class ZeroEngine:
                     if is_cached:
                         model_cache.preload_cache(path)
-                    # SIMPLIFIED CPU-ONLY INITIALIZATION FOR RELIABILITY
                     init_params = {
                         "model_path": path,
                         "n_ctx": optimal_ctx,
@@ -1298,6 +1298,14 @@ class ZeroEngine:
                         "n_gpu_layers": 0,
                         "verbose": False,
                         "seed": -1,
                     }
                     # Remove None values to avoid llama.cpp errors

                     if is_cached:
                         model_cache.preload_cache(path)
+                    # ENHANCED CPU-ONLY INITIALIZATION WITH SPEED OPTIMIZATIONS
                     init_params = {
                         "model_path": path,
                         "n_ctx": optimal_ctx,
                         "n_gpu_layers": 0,
                         "verbose": False,
                         "seed": -1,
+                        # SPEED OPTIMIZATIONS
+                        "f16_kv": True,                              # Faster KV cache
+                        "type_k": 2 if model_format != "gemma" else None,  # KV quantization
+                        "type_v": 2 if model_format != "gemma" else None,  # KV quantization
+                        "use_scratch": True,                         # Scratch buffer
+                        "cache_prompt": True,                        # Prompt caching
+                        "cache_prompt_tokens": 512,                  # Larger prompt cache
+                        "numa": True,                                # NUMA optimization
                     }
                     # Remove None values to avoid llama.cpp errors