Xenobd commited on
Commit
d7cbddd
·
verified ·
1 Parent(s): 38c9ea7

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -10
Dockerfile CHANGED
@@ -31,7 +31,7 @@ COPY . /BitNet
31
  # Create Python virtual environment
32
  RUN python -m venv $VENV_PATH
33
 
34
- # Upgrade pip and install Python dependencies
35
  RUN pip install --upgrade pip
36
  RUN pip install -r requirements.txt
37
  RUN pip install huggingface_hub
@@ -40,15 +40,21 @@ RUN pip install huggingface_hub
40
  RUN huggingface-cli download HF1BitLLM/Llama3-8B-1.58-100B-tokens \
41
  --local-dir models/Llama3-8B-1.58-100B-tokens
42
 
43
- # Setup environment / quantization in one go
44
- WORKDIR /BitNet
45
  RUN python setup_env.py -md ./models/Llama3-8B-1.58-100B-tokens -q i2_s
46
- WORKDIR /BitNet/models/Llama3-8B-1.58-100B-tokens
47
- # Optional: list files to confirm
48
- RUN ls -lah
49
 
50
- # Reset working directory for app
 
 
 
 
51
  WORKDIR /BitNet
52
- RUN pwd
53
- # Default command
54
- CMD ["python", "app.py"]
 
 
 
 
 
 
 
31
  # Create Python virtual environment
32
  RUN python -m venv $VENV_PATH
33
 
34
+ # Upgrade pip and install dependencies
35
  RUN pip install --upgrade pip
36
  RUN pip install -r requirements.txt
37
  RUN pip install huggingface_hub
 
40
  RUN huggingface-cli download HF1BitLLM/Llama3-8B-1.58-100B-tokens \
41
  --local-dir models/Llama3-8B-1.58-100B-tokens
42
 
43
+ # Setup environment / quantization
 
44
  RUN python setup_env.py -md ./models/Llama3-8B-1.58-100B-tokens -q i2_s
 
 
 
45
 
46
+ # Confirm files exist
47
+ WORKDIR /BitNet/models/Llama3-8B-1.58-100B-tokens
48
+ RUN ls -lah
49
+
50
+ # Reset working directory
51
  WORKDIR /BitNet
52
+
53
+ # Expose port for inference server
54
+ EXPOSE 7860
55
+
56
+ # Default command to run inference server directly
57
+ CMD ["python", "run_inference_server.py", \
58
+ "-m", "./models/Llama3-8B-1.58-100B-tokens/ggml-model-i2_s.gguf", \
59
+ "--host", "0.0.0.0", \
60
+ "--port", "7860"]