Spaces:

viskav
/

format

Sleeping

viskav commited on Dec 9, 2025

Commit

095c18a

verified ·

1 Parent(s): 9efa685

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
 # Your model repository on Hugging Face
 MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
 # The specific GGUF file name that is around 3.1GB
-MODEL_FILE = "Phi-3.1-mini-4k-instruct-Q6_K_L.gguf"
 # n_gpu_layers=-1 tells llama.cpp to offload all layers to the GPU if one is available
 N_GPU_LAYERS = -1
 # Maximum tokens the model can generate in a single response

 # Your model repository on Hugging Face
 MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
 # The specific GGUF file name that is around 3.1GB
+MODEL_FILE = "Phi-3.1-mini-4k-instruct-Q5_K_M.gguf"
 # n_gpu_layers=-1 tells llama.cpp to offload all layers to the GPU if one is available
 N_GPU_LAYERS = -1
 # Maximum tokens the model can generate in a single response