Spaces:
Runtime error
Runtime error
Upload Dockerfile with huggingface_hub
Browse files- Dockerfile +3 -3
Dockerfile
CHANGED
|
@@ -5,15 +5,15 @@ FROM vllm/vllm-openai:v0.6.6.post1
|
|
| 5 |
# Set environment variables
|
| 6 |
ENV MODEL_NAME="TheBloke/Llama-2-7B-Chat-AWQ"
|
| 7 |
ENV HOST="0.0.0.0"
|
| 8 |
-
ENV PORT="
|
| 9 |
|
| 10 |
# Expose the API port
|
| 11 |
-
EXPOSE
|
| 12 |
|
| 13 |
# Run vLLM OpenAI-compatible server
|
| 14 |
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
| 15 |
CMD ["--model", "TheBloke/Llama-2-7B-Chat-AWQ", \
|
| 16 |
"--quantization", "awq", \
|
| 17 |
"--host", "0.0.0.0", \
|
| 18 |
-
"--port", "
|
| 19 |
"--max-model-len", "4096"]
|
|
|
|
| 5 |
# Set environment variables
|
| 6 |
ENV MODEL_NAME="TheBloke/Llama-2-7B-Chat-AWQ"
|
| 7 |
ENV HOST="0.0.0.0"
|
| 8 |
+
ENV PORT="7860"
|
| 9 |
|
| 10 |
# Expose the API port
|
| 11 |
+
EXPOSE 7860
|
| 12 |
|
| 13 |
# Run vLLM OpenAI-compatible server
|
| 14 |
ENTRYPOINT ["python3", "-m", "vllm.entrypoints.openai.api_server"]
|
| 15 |
CMD ["--model", "TheBloke/Llama-2-7B-Chat-AWQ", \
|
| 16 |
"--quantization", "awq", \
|
| 17 |
"--host", "0.0.0.0", \
|
| 18 |
+
"--port", "7860", \
|
| 19 |
"--max-model-len", "4096"]
|