| FROM python:3.10-slim |
|
|
| ENV PYTHONUNBUFFERED=1 PORT=7860 OMP_NUM_THREADS=2 |
|
|
| WORKDIR /code |
|
|
| |
| |
|
|
| |
| |
| RUN pip install --no-cache-dir \ |
| "https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.21/llama_cpp_python-0.3.21-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl" |
| RUN pip install --no-cache-dir fastapi uvicorn huggingface_hub |
|
|
| |
| |
| |
| |
| RUN mkdir -p /code/models && \ |
| python3 -c "from huggingface_hub import hf_hub_download; \ |
| hf_hub_download(repo_id='unsloth/gemma-4-E4B-it-GGUF', \ |
| filename='gemma-4-E4B-it-Q4_K_M.gguf', local_dir='/code/models')" |
|
|
| COPY . . |
|
|
| EXPOSE 7860 |
| CMD ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "30"] |