Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +16 -10
Dockerfile
CHANGED
|
@@ -31,7 +31,7 @@ COPY . /BitNet
|
|
| 31 |
# Create Python virtual environment
|
| 32 |
RUN python -m venv $VENV_PATH
|
| 33 |
|
| 34 |
-
# Upgrade pip and install
|
| 35 |
RUN pip install --upgrade pip
|
| 36 |
RUN pip install -r requirements.txt
|
| 37 |
RUN pip install huggingface_hub
|
|
@@ -40,15 +40,21 @@ RUN pip install huggingface_hub
|
|
| 40 |
RUN huggingface-cli download HF1BitLLM/Llama3-8B-1.58-100B-tokens \
|
| 41 |
--local-dir models/Llama3-8B-1.58-100B-tokens
|
| 42 |
|
| 43 |
-
# Setup environment / quantization
|
| 44 |
-
WORKDIR /BitNet
|
| 45 |
RUN python setup_env.py -md ./models/Llama3-8B-1.58-100B-tokens -q i2_s
|
| 46 |
-
WORKDIR /BitNet/models/Llama3-8B-1.58-100B-tokens
|
| 47 |
-
# Optional: list files to confirm
|
| 48 |
-
RUN ls -lah
|
| 49 |
|
| 50 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
WORKDIR /BitNet
|
| 52 |
-
|
| 53 |
-
#
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# Create Python virtual environment
|
| 32 |
RUN python -m venv $VENV_PATH
|
| 33 |
|
| 34 |
+
# Upgrade pip and install dependencies
|
| 35 |
RUN pip install --upgrade pip
|
| 36 |
RUN pip install -r requirements.txt
|
| 37 |
RUN pip install huggingface_hub
|
|
|
|
| 40 |
RUN huggingface-cli download HF1BitLLM/Llama3-8B-1.58-100B-tokens \
|
| 41 |
--local-dir models/Llama3-8B-1.58-100B-tokens
|
| 42 |
|
| 43 |
+
# Setup environment / quantization
|
|
|
|
| 44 |
RUN python setup_env.py -md ./models/Llama3-8B-1.58-100B-tokens -q i2_s
|
|
|
|
|
|
|
|
|
|
| 45 |
|
| 46 |
+
# Confirm files exist
|
| 47 |
+
WORKDIR /BitNet/models/Llama3-8B-1.58-100B-tokens
|
| 48 |
+
RUN ls -lah
|
| 49 |
+
|
| 50 |
+
# Reset working directory
|
| 51 |
WORKDIR /BitNet
|
| 52 |
+
|
| 53 |
+
# Expose port for inference server
|
| 54 |
+
EXPOSE 7860
|
| 55 |
+
|
| 56 |
+
# Default command to run inference server directly
|
| 57 |
+
CMD ["python", "run_inference_server.py", \
|
| 58 |
+
"-m", "./models/Llama3-8B-1.58-100B-tokens/ggml-model-i2_s.gguf", \
|
| 59 |
+
"--host", "0.0.0.0", \
|
| 60 |
+
"--port", "7860"]
|