Update app.py
Browse files
app.py
CHANGED
|
@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
|
|
| 9 |
# Your model repository on Hugging Face
|
| 10 |
MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
|
| 11 |
# The specific GGUF file name that is around 3.1GB
|
| 12 |
-
MODEL_FILE = "Phi-3.1-mini-4k-instruct-
|
| 13 |
# n_gpu_layers=-1 tells llama.cpp to offload all layers to the GPU if one is available
|
| 14 |
N_GPU_LAYERS = -1
|
| 15 |
# Maximum tokens the model can generate in a single response
|
|
|
|
| 9 |
# Your model repository on Hugging Face
|
| 10 |
MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
|
| 11 |
# The specific GGUF file name that is around 3.1GB
|
| 12 |
+
MODEL_FILE = "Phi-3.1-mini-4k-instruct-Q5_K_M.gguf"
|
| 13 |
# n_gpu_layers=-1 tells llama.cpp to offload all layers to the GPU if one is available
|
| 14 |
N_GPU_LAYERS = -1
|
| 15 |
# Maximum tokens the model can generate in a single response
|