viskav commited on
Commit
92f4f79
·
verified ·
1 Parent(s): 095c18a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -9,7 +9,7 @@ from contextlib import asynccontextmanager
9
  # Your model repository on Hugging Face
10
  MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
11
  # The specific GGUF file name that is around 3.1GB
12
- MODEL_FILE = "Phi-3.1-mini-4k-instruct-Q5_K_M.gguf"
13
  # n_gpu_layers=-1 tells llama.cpp to offload all layers to the GPU if one is available
14
  N_GPU_LAYERS = -1
15
  # Maximum tokens the model can generate in a single response
 
9
  # Your model repository on Hugging Face
10
  MODEL_REPO = "bartowski/Phi-3.1-mini-4k-instruct-GGUF"
11
  # The specific GGUF file name that is around 3.1GB
12
+ MODEL_FILE = "Phi-3.1-mini-4k-instruct-Q4_K_M.gguf"
13
  # n_gpu_layers=-1 tells llama.cpp to offload all layers to the GPU if one is available
14
  N_GPU_LAYERS = -1
15
  # Maximum tokens the model can generate in a single response