Spaces:
Paused
Paused
srayuth Claude Opus 4.6 commited on
Commit ·
7bebf4c
1
Parent(s): ee885a3
CUDA base image, pre-cache models, fix double loading
Browse files- Switch to nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 for GPU support
- Pre-download BiRefNet and ISNet models in Docker build
- Use --noreload to prevent Django autoreload double model loading
- Fix apps.py preload condition for --noreload mode
- Update hardware options table with current HF pricing
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
- DEPLOY.md +11 -5
- Dockerfile +30 -32
- transparent/apps.py +8 -3
DEPLOY.md
CHANGED
|
@@ -48,11 +48,17 @@ git push
|
|
| 48 |
|
| 49 |
## Hardware Options
|
| 50 |
|
| 51 |
-
| Hardware | VRAM | Speed | Cost |
|
| 52 |
-
|----------|------|-------|------|
|
| 53 |
-
| CPU
|
| 54 |
-
|
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
|
| 57 |
Enable GPU: Settings → Hardware → Select GPU → Save
|
| 58 |
|
|
|
|
| 48 |
|
| 49 |
## Hardware Options
|
| 50 |
|
| 51 |
+
| Hardware | Specs | VRAM | Speed | Cost |
|
| 52 |
+
|----------|-------|------|-------|------|
|
| 53 |
+
| CPU basic | 2 vCPU · 16 GB RAM | - | ~30-60s | Free |
|
| 54 |
+
| CPU upgrade | 8 vCPU · 32 GB RAM | - | ~15-30s | $0.03/hr |
|
| 55 |
+
| T4 small | 4 vCPU · 15 GB RAM | 16GB | ~2-5s | $0.40/hr |
|
| 56 |
+
| T4 medium | 8 vCPU · 30 GB RAM | 16GB | ~2-5s | $0.60/hr |
|
| 57 |
+
| L4 | 8 vCPU · 30 GB RAM | 24GB | ~1-3s | $0.80/hr |
|
| 58 |
+
| A10G small | 4 vCPU · 15 GB RAM | 24GB | ~1-3s | $1.00/hr |
|
| 59 |
+
| A10G large | 12 vCPU · 46 GB RAM | 24GB | ~1-3s | $1.50/hr |
|
| 60 |
+
| L40S | 8 vCPU · 62 GB RAM | 48GB | ~0.5-2s | $1.80/hr |
|
| 61 |
+
| A100 large | 12 vCPU · 142 GB RAM | 80GB | ~0.5-1s | $2.50/hr |
|
| 62 |
|
| 63 |
Enable GPU: Settings → Hardware → Select GPU → Save
|
| 64 |
|
Dockerfile
CHANGED
|
@@ -1,57 +1,55 @@
|
|
| 1 |
# Hugging Face Spaces Dockerfile
|
| 2 |
# GPU-enabled Django API for background removal
|
| 3 |
|
| 4 |
-
FROM
|
| 5 |
|
| 6 |
# Set environment variables
|
| 7 |
ENV PYTHONDONTWRITEBYTECODE=1
|
| 8 |
ENV PYTHONUNBUFFERED=1
|
| 9 |
ENV DEBIAN_FRONTEND=noninteractive
|
| 10 |
-
|
| 11 |
-
# HF Spaces runs on port 7860
|
| 12 |
ENV PORT=7860
|
|
|
|
| 13 |
|
| 14 |
-
# Install system dependencies
|
| 15 |
-
RUN apt-get update &&
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
libgomp1 \
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
WORKDIR /app
|
| 27 |
|
| 28 |
-
|
| 29 |
-
COPY requirements.txt .
|
| 30 |
|
| 31 |
# Install Python dependencies
|
|
|
|
| 32 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 33 |
|
| 34 |
-
# Copy application code
|
| 35 |
-
COPY . .
|
| 36 |
-
|
| 37 |
# Create non-root user (HF Spaces requirement)
|
| 38 |
RUN useradd -m -u 1000 user
|
| 39 |
-
RUN chown -R user:user /app
|
| 40 |
-
USER user
|
| 41 |
-
|
| 42 |
-
# Set home for model cache
|
| 43 |
ENV HOME=/home/user
|
| 44 |
ENV HF_HOME=/home/user/.cache/huggingface
|
| 45 |
|
| 46 |
-
#
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
-
# Expose port
|
| 50 |
EXPOSE 7860
|
| 51 |
|
| 52 |
-
# Health check (
|
| 53 |
-
HEALTHCHECK --interval=30s --timeout=30s --start-period=
|
| 54 |
CMD curl -f http://localhost:7860/api/transparent/health/ || exit 1
|
| 55 |
|
| 56 |
-
#
|
| 57 |
-
CMD ["python", "manage.py", "runserver", "0.0.0.0:7860"]
|
|
|
|
| 1 |
# Hugging Face Spaces Dockerfile
|
| 2 |
# GPU-enabled Django API for background removal
|
| 3 |
|
| 4 |
+
FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
|
| 5 |
|
| 6 |
# Set environment variables
|
| 7 |
ENV PYTHONDONTWRITEBYTECODE=1
|
| 8 |
ENV PYTHONUNBUFFERED=1
|
| 9 |
ENV DEBIAN_FRONTEND=noninteractive
|
|
|
|
|
|
|
| 10 |
ENV PORT=7860
|
| 11 |
+
ENV OMP_NUM_THREADS=4
|
| 12 |
|
| 13 |
+
# Install Python 3.11 and system dependencies
|
| 14 |
+
RUN apt-get update && \
|
| 15 |
+
apt-get install -y --no-install-recommends software-properties-common && \
|
| 16 |
+
add-apt-repository ppa:deadsnakes/ppa && \
|
| 17 |
+
apt-get update && \
|
| 18 |
+
apt-get install -y --no-install-recommends \
|
| 19 |
+
python3.11 python3.11-venv python3.11-distutils \
|
| 20 |
+
libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 libgomp1 curl && \
|
| 21 |
+
rm -rf /var/lib/apt/lists/* && \
|
| 22 |
+
ln -sf /usr/bin/python3.11 /usr/bin/python && \
|
| 23 |
+
ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
|
| 24 |
+
curl -sS https://bootstrap.pypa.io/get-pip.py | python
|
|
|
|
| 25 |
|
| 26 |
+
WORKDIR /app
|
|
|
|
| 27 |
|
| 28 |
# Install Python dependencies
|
| 29 |
+
COPY requirements.txt .
|
| 30 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 31 |
|
|
|
|
|
|
|
|
|
|
| 32 |
# Create non-root user (HF Spaces requirement)
|
| 33 |
RUN useradd -m -u 1000 user
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
ENV HOME=/home/user
|
| 35 |
ENV HF_HOME=/home/user/.cache/huggingface
|
| 36 |
|
| 37 |
+
# Pre-download AI models so they don't re-download on every restart
|
| 38 |
+
USER user
|
| 39 |
+
RUN mkdir -p /home/user/.u2net /home/user/.config/Ultralytics && \
|
| 40 |
+
python -c "from rembg import new_session; new_session('birefnet-general', providers=['CPUExecutionProvider']); new_session('isnet-general-use', providers=['CPUExecutionProvider'])"
|
| 41 |
+
|
| 42 |
+
# Copy application code
|
| 43 |
+
USER root
|
| 44 |
+
COPY . .
|
| 45 |
+
RUN chown -R user:user /app
|
| 46 |
+
USER user
|
| 47 |
|
|
|
|
| 48 |
EXPOSE 7860
|
| 49 |
|
| 50 |
+
# Health check (shorter start period since models are pre-cached)
|
| 51 |
+
HEALTHCHECK --interval=30s --timeout=30s --start-period=120s --retries=5 \
|
| 52 |
CMD curl -f http://localhost:7860/api/transparent/health/ || exit 1
|
| 53 |
|
| 54 |
+
# --noreload prevents double model loading from Django's autoreload
|
| 55 |
+
CMD ["python", "manage.py", "runserver", "0.0.0.0:7860", "--noreload"]
|
transparent/apps.py
CHANGED
|
@@ -14,11 +14,16 @@ class TransparentConfig(AppConfig):
|
|
| 14 |
|
| 15 |
def ready(self):
|
| 16 |
"""Preload AI models when the app starts."""
|
| 17 |
-
|
| 18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
from transparent.controllers.style_generator import StyleGenerator
|
| 20 |
from transparent.controllers.style_recommender import StyleRecommender
|
| 21 |
|
| 22 |
-
# Preload all AI models at startup
|
| 23 |
StyleGenerator.preload_models()
|
| 24 |
StyleRecommender.preload_models()
|
|
|
|
| 14 |
|
| 15 |
def ready(self):
|
| 16 |
"""Preload AI models when the app starts."""
|
| 17 |
+
import sys
|
| 18 |
+
is_runserver = 'runserver' in sys.argv
|
| 19 |
+
is_gunicorn = os.environ.get('GUNICORN_WORKER')
|
| 20 |
+
# With --noreload: RUN_MAIN is not set, load in main process
|
| 21 |
+
# Without --noreload: RUN_MAIN='true' in the reloader child process
|
| 22 |
+
is_reloader_child = os.environ.get('RUN_MAIN') == 'true'
|
| 23 |
+
|
| 24 |
+
if is_gunicorn or is_reloader_child or (is_runserver and '--noreload' in sys.argv):
|
| 25 |
from transparent.controllers.style_generator import StyleGenerator
|
| 26 |
from transparent.controllers.style_recommender import StyleRecommender
|
| 27 |
|
|
|
|
| 28 |
StyleGenerator.preload_models()
|
| 29 |
StyleRecommender.preload_models()
|