turtle170 commited on
Commit
3057246
·
verified ·
1 Parent(s): b91cca5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -1
app.py CHANGED
@@ -1286,7 +1286,7 @@ class ZeroEngine:
1286
  if is_cached:
1287
  model_cache.preload_cache(path)
1288
 
1289
- # SIMPLIFIED CPU-ONLY INITIALIZATION FOR RELIABILITY
1290
  init_params = {
1291
  "model_path": path,
1292
  "n_ctx": optimal_ctx,
@@ -1298,6 +1298,14 @@ class ZeroEngine:
1298
  "n_gpu_layers": 0,
1299
  "verbose": False,
1300
  "seed": -1,
 
 
 
 
 
 
 
 
1301
  }
1302
 
1303
  # Remove None values to avoid llama.cpp errors
 
1286
  if is_cached:
1287
  model_cache.preload_cache(path)
1288
 
1289
+ # ENHANCED CPU-ONLY INITIALIZATION WITH SPEED OPTIMIZATIONS
1290
  init_params = {
1291
  "model_path": path,
1292
  "n_ctx": optimal_ctx,
 
1298
  "n_gpu_layers": 0,
1299
  "verbose": False,
1300
  "seed": -1,
1301
+ # SPEED OPTIMIZATIONS
1302
+ "f16_kv": True, # Faster KV cache
1303
+ "type_k": 2 if model_format != "gemma" else None, # KV quantization
1304
+ "type_v": 2 if model_format != "gemma" else None, # KV quantization
1305
+ "use_scratch": True, # Scratch buffer
1306
+ "cache_prompt": True, # Prompt caching
1307
+ "cache_prompt_tokens": 512, # Larger prompt cache
1308
+ "numa": True, # NUMA optimization
1309
  }
1310
 
1311
  # Remove None values to avoid llama.cpp errors