satyaki-mitra commited on
Commit
ef7c05f
·
1 Parent(s): 2473068

Fix: Handle None LLAMA_CPP_MODEL_PATH and pre-download model

Browse files
Files changed (2) hide show
  1. Dockerfile +4 -4
  2. config/settings.py +1 -1
Dockerfile CHANGED
@@ -5,7 +5,7 @@ ENV PIP_NO_CACHE_DIR=1
5
  ENV DOCKER_CONTAINER=true
6
  ENV SPACE_APP_DATA=/data
7
  ENV HF_HOME=/data/huggingface
8
- ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf
9
 
10
  # Optimize llama-cpp-python build for CPU only
11
  ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
@@ -38,15 +38,15 @@ RUN python -m spacy download en_core_web_sm
38
  # Create directories that your app expects
39
  RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
40
 
41
- # Download GGUF model during build (BEFORE copying app code)
42
  RUN python -c "from huggingface_hub import hf_hub_download; \
43
  import shutil; \
44
  downloaded = hf_hub_download( \
45
  repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \
46
- filename='Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf', \
47
  cache_dir='/data/huggingface' \
48
  ); \
49
- shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf')" && \
50
  echo "Model downloaded to /data/models/"
51
 
52
  # Copy app code
 
5
  ENV DOCKER_CONTAINER=true
6
  ENV SPACE_APP_DATA=/data
7
  ENV HF_HOME=/data/huggingface
8
+ ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
9
 
10
  # Optimize llama-cpp-python build for CPU only
11
  ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
 
38
  # Create directories that your app expects
39
  RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
40
 
41
+ # Download GGUF model during build
42
  RUN python -c "from huggingface_hub import hf_hub_download; \
43
  import shutil; \
44
  downloaded = hf_hub_download( \
45
  repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \
46
+ filename='Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf', \
47
  cache_dir='/data/huggingface' \
48
  ); \
49
+ shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf')" && \
50
  echo "Model downloaded to /data/models/"
51
 
52
  # Copy app code
config/settings.py CHANGED
@@ -48,7 +48,7 @@ class Settings(BaseSettings):
48
  LLAMA_CPP_ENABLED : bool = False # Auto-enabled in HF Spaces
49
  LLAMA_CPP_MODEL_PATH : Optional[Path] = None # Local path to GGUF model
50
  LLAMA_CPP_MODEL_REPO : str = "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF"
51
- LLAMA_CPP_MODEL_FILE : str = "Hermes-2-Pro-Llama-3-8B-GGUF.Q4_K_M.gguf"
52
  LLAMA_CPP_N_CTX : int = 4096 # Context window
53
  LLAMA_CPP_N_GPU_LAYERS : int = -1 # -1 = all layers on GPU
54
  LLAMA_CPP_N_BATCH : int = 512 # Batch size for prompt processing
 
48
  LLAMA_CPP_ENABLED : bool = False # Auto-enabled in HF Spaces
49
  LLAMA_CPP_MODEL_PATH : Optional[Path] = None # Local path to GGUF model
50
  LLAMA_CPP_MODEL_REPO : str = "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF"
51
+ LLAMA_CPP_MODEL_FILE : str = "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf"
52
  LLAMA_CPP_N_CTX : int = 4096 # Context window
53
  LLAMA_CPP_N_GPU_LAYERS : int = -1 # -1 = all layers on GPU
54
  LLAMA_CPP_N_BATCH : int = 512 # Batch size for prompt processing