Biitnet.cpp-servers0-mini

Sleeping

Xenobd commited on Aug 24, 2025

Commit

d7cbddd

verified ·

1 Parent(s): 38c9ea7

Update Dockerfile

Files changed (1) hide show

Dockerfile CHANGED Viewed

@@ -31,7 +31,7 @@ COPY . /BitNet
 # Create Python virtual environment
 RUN python -m venv $VENV_PATH
-# Upgrade pip and install Python dependencies
 RUN pip install --upgrade pip
 RUN pip install -r requirements.txt
 RUN pip install huggingface_hub
@@ -40,15 +40,21 @@ RUN pip install huggingface_hub
 RUN huggingface-cli download HF1BitLLM/Llama3-8B-1.58-100B-tokens \
     --local-dir models/Llama3-8B-1.58-100B-tokens
-# Setup environment / quantization in one go
-WORKDIR /BitNet
 RUN python setup_env.py -md ./models/Llama3-8B-1.58-100B-tokens -q i2_s
-WORKDIR /BitNet/models/Llama3-8B-1.58-100B-tokens
-# Optional: list files to confirm
-RUN ls -lah
-# Reset working directory for app
 WORKDIR /BitNet
-RUN pwd
-# Default command
-CMD ["python", "app.py"]

 # Create Python virtual environment
 RUN python -m venv $VENV_PATH
+# Upgrade pip and install dependencies
 RUN pip install --upgrade pip
 RUN pip install -r requirements.txt
 RUN pip install huggingface_hub
 RUN huggingface-cli download HF1BitLLM/Llama3-8B-1.58-100B-tokens \
     --local-dir models/Llama3-8B-1.58-100B-tokens
+# Setup environment / quantization
 RUN python setup_env.py -md ./models/Llama3-8B-1.58-100B-tokens -q i2_s
+# Confirm files exist
+WORKDIR /BitNet/models/Llama3-8B-1.58-100B-tokens
+RUN ls -lah
+# Reset working directory
 WORKDIR /BitNet
+# Expose port for inference server
+EXPOSE 7860
+# Default command to run inference server directly
+CMD ["python", "run_inference_server.py", \
+     "-m", "./models/Llama3-8B-1.58-100B-tokens/ggml-model-i2_s.gguf", \
+     "--host", "0.0.0.0", \
+     "--port", "7860"]