Spaces:
Sleeping
Sleeping
Commit ·
ef7c05f
1
Parent(s): 2473068
Fix: Handle None LLAMA_CPP_MODEL_PATH and pre-download model
Browse files- Dockerfile +4 -4
- config/settings.py +1 -1
Dockerfile
CHANGED
|
@@ -5,7 +5,7 @@ ENV PIP_NO_CACHE_DIR=1
|
|
| 5 |
ENV DOCKER_CONTAINER=true
|
| 6 |
ENV SPACE_APP_DATA=/data
|
| 7 |
ENV HF_HOME=/data/huggingface
|
| 8 |
-
ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-
|
| 9 |
|
| 10 |
# Optimize llama-cpp-python build for CPU only
|
| 11 |
ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
|
|
@@ -38,15 +38,15 @@ RUN python -m spacy download en_core_web_sm
|
|
| 38 |
# Create directories that your app expects
|
| 39 |
RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
|
| 40 |
|
| 41 |
-
# Download GGUF model during build
|
| 42 |
RUN python -c "from huggingface_hub import hf_hub_download; \
|
| 43 |
import shutil; \
|
| 44 |
downloaded = hf_hub_download( \
|
| 45 |
repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \
|
| 46 |
-
filename='Hermes-2-Pro-Llama-3-8B-
|
| 47 |
cache_dir='/data/huggingface' \
|
| 48 |
); \
|
| 49 |
-
shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-
|
| 50 |
echo "Model downloaded to /data/models/"
|
| 51 |
|
| 52 |
# Copy app code
|
|
|
|
| 5 |
ENV DOCKER_CONTAINER=true
|
| 6 |
ENV SPACE_APP_DATA=/data
|
| 7 |
ENV HF_HOME=/data/huggingface
|
| 8 |
+
ENV LLAMA_CPP_MODEL_PATH=/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf
|
| 9 |
|
| 10 |
# Optimize llama-cpp-python build for CPU only
|
| 11 |
ENV CMAKE_ARGS="-DLLAMA_BLAS=0 -DLLAMA_CUBLAS=0"
|
|
|
|
| 38 |
# Create directories that your app expects
|
| 39 |
RUN mkdir -p /data/models /data/uploads /data/cache /data/logs /data/huggingface
|
| 40 |
|
| 41 |
+
# Download GGUF model during build
|
| 42 |
RUN python -c "from huggingface_hub import hf_hub_download; \
|
| 43 |
import shutil; \
|
| 44 |
downloaded = hf_hub_download( \
|
| 45 |
repo_id='NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF', \
|
| 46 |
+
filename='Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf', \
|
| 47 |
cache_dir='/data/huggingface' \
|
| 48 |
); \
|
| 49 |
+
shutil.copy(downloaded, '/data/models/Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf')" && \
|
| 50 |
echo "Model downloaded to /data/models/"
|
| 51 |
|
| 52 |
# Copy app code
|
config/settings.py
CHANGED
|
@@ -48,7 +48,7 @@ class Settings(BaseSettings):
|
|
| 48 |
LLAMA_CPP_ENABLED : bool = False # Auto-enabled in HF Spaces
|
| 49 |
LLAMA_CPP_MODEL_PATH : Optional[Path] = None # Local path to GGUF model
|
| 50 |
LLAMA_CPP_MODEL_REPO : str = "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF"
|
| 51 |
-
LLAMA_CPP_MODEL_FILE : str = "Hermes-2-Pro-Llama-3-8B-
|
| 52 |
LLAMA_CPP_N_CTX : int = 4096 # Context window
|
| 53 |
LLAMA_CPP_N_GPU_LAYERS : int = -1 # -1 = all layers on GPU
|
| 54 |
LLAMA_CPP_N_BATCH : int = 512 # Batch size for prompt processing
|
|
|
|
| 48 |
LLAMA_CPP_ENABLED : bool = False # Auto-enabled in HF Spaces
|
| 49 |
LLAMA_CPP_MODEL_PATH : Optional[Path] = None # Local path to GGUF model
|
| 50 |
LLAMA_CPP_MODEL_REPO : str = "NousResearch/Hermes-2-Pro-Llama-3-8B-GGUF"
|
| 51 |
+
LLAMA_CPP_MODEL_FILE : str = "Hermes-2-Pro-Llama-3-8B-Q4_K_M.gguf"
|
| 52 |
LLAMA_CPP_N_CTX : int = 4096 # Context window
|
| 53 |
LLAMA_CPP_N_GPU_LAYERS : int = -1 # -1 = all layers on GPU
|
| 54 |
LLAMA_CPP_N_BATCH : int = 512 # Batch size for prompt processing
|