Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1286,7 +1286,7 @@ class ZeroEngine:
|
|
| 1286 |
if is_cached:
|
| 1287 |
model_cache.preload_cache(path)
|
| 1288 |
|
| 1289 |
-
#
|
| 1290 |
init_params = {
|
| 1291 |
"model_path": path,
|
| 1292 |
"n_ctx": optimal_ctx,
|
|
@@ -1298,14 +1298,6 @@ class ZeroEngine:
|
|
| 1298 |
"n_gpu_layers": 0,
|
| 1299 |
"verbose": False,
|
| 1300 |
"seed": -1,
|
| 1301 |
-
# SPEED OPTIMIZATIONS
|
| 1302 |
-
"f16_kv": True, # Faster KV cache
|
| 1303 |
-
"type_k": 2 if model_format != "gemma" else None, # KV quantization
|
| 1304 |
-
"type_v": 2 if model_format != "gemma" else None, # KV quantization
|
| 1305 |
-
"use_scratch": True, # Scratch buffer
|
| 1306 |
-
"cache_prompt": True, # Prompt caching
|
| 1307 |
-
"cache_prompt_tokens": 512, # Larger prompt cache
|
| 1308 |
-
"numa": True, # NUMA optimization
|
| 1309 |
}
|
| 1310 |
|
| 1311 |
# Remove None values to avoid llama.cpp errors
|
|
|
|
| 1286 |
if is_cached:
|
| 1287 |
model_cache.preload_cache(path)
|
| 1288 |
|
| 1289 |
+
# SIMPLE CPU-ONLY INITIALIZATION - NO CONSTRAINTS
|
| 1290 |
init_params = {
|
| 1291 |
"model_path": path,
|
| 1292 |
"n_ctx": optimal_ctx,
|
|
|
|
| 1298 |
"n_gpu_layers": 0,
|
| 1299 |
"verbose": False,
|
| 1300 |
"seed": -1,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1301 |
}
|
| 1302 |
|
| 1303 |
# Remove None values to avoid llama.cpp errors
|