FROM python:3.10-slim WORKDIR /app ENV TF_CPP_MIN_LOG_LEVEL=3 \ CUDA_VISIBLE_DEVICES=-1 \ PYTHONUNBUFFERED=1 \ PORT=7860 \ HF_HOME=/tmp/hf \ TRANSFORMERS_CACHE=/tmp/hf # Slim, serving-only dependencies (CPU TensorFlow, no training/CUDA packages). COPY requirements-serve.txt . RUN pip install --no-cache-dir -r requirements-serve.txt COPY app/ ./app/ COPY transformer_model/ ./transformer_model/ COPY index.html . # Only the artifacts the server needs (not the training checkpoints). COPY saved_models/tinystories_tokenizer.json ./saved_models/ COPY saved_models/tinystories_model.weights.h5 ./saved_models/ EXPOSE 7860 CMD ["sh", "-c", "uvicorn app.server:app --host 0.0.0.0 --port ${PORT:-7860}"]