turtle170 commited on
Commit
30555ab
·
verified ·
1 Parent(s): 3057246

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -9
app.py CHANGED
@@ -1286,7 +1286,7 @@ class ZeroEngine:
1286
  if is_cached:
1287
  model_cache.preload_cache(path)
1288
 
1289
- # ENHANCED CPU-ONLY INITIALIZATION WITH SPEED OPTIMIZATIONS
1290
  init_params = {
1291
  "model_path": path,
1292
  "n_ctx": optimal_ctx,
@@ -1298,14 +1298,6 @@ class ZeroEngine:
1298
  "n_gpu_layers": 0,
1299
  "verbose": False,
1300
  "seed": -1,
1301
- # SPEED OPTIMIZATIONS
1302
- "f16_kv": True, # Faster KV cache
1303
- "type_k": 2 if model_format != "gemma" else None, # KV quantization
1304
- "type_v": 2 if model_format != "gemma" else None, # KV quantization
1305
- "use_scratch": True, # Scratch buffer
1306
- "cache_prompt": True, # Prompt caching
1307
- "cache_prompt_tokens": 512, # Larger prompt cache
1308
- "numa": True, # NUMA optimization
1309
  }
1310
 
1311
  # Remove None values to avoid llama.cpp errors
 
1286
  if is_cached:
1287
  model_cache.preload_cache(path)
1288
 
1289
+ # SIMPLE CPU-ONLY INITIALIZATION - NO CONSTRAINTS
1290
  init_params = {
1291
  "model_path": path,
1292
  "n_ctx": optimal_ctx,
 
1298
  "n_gpu_layers": 0,
1299
  "verbose": False,
1300
  "seed": -1,
 
 
 
 
 
 
 
 
1301
  }
1302
 
1303
  # Remove None values to avoid llama.cpp errors