srayuth Claude Opus 4.6 commited on
Commit
7bebf4c
·
1 Parent(s): ee885a3

CUDA base image, pre-cache models, fix double loading

Browse files

- Switch to nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04 for GPU support
- Pre-download BiRefNet and ISNet models in Docker build
- Use --noreload to prevent Django autoreload double model loading
- Fix apps.py preload condition for --noreload mode
- Update hardware options table with current HF pricing

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (3) hide show
  1. DEPLOY.md +11 -5
  2. Dockerfile +30 -32
  3. transparent/apps.py +8 -3
DEPLOY.md CHANGED
@@ -48,11 +48,17 @@ git push
48
 
49
  ## Hardware Options
50
 
51
- | Hardware | VRAM | Speed | Cost |
52
- |----------|------|-------|------|
53
- | CPU (free) | - | ~30-60s | Free |
54
- | T4 small | 16GB | ~2-5s | $0.40/hr |
55
- | A10G small | 24GB | ~1-3s | $1.05/hr |
 
 
 
 
 
 
56
 
57
  Enable GPU: Settings → Hardware → Select GPU → Save
58
 
 
48
 
49
  ## Hardware Options
50
 
51
+ | Hardware | Specs | VRAM | Speed | Cost |
52
+ |----------|-------|------|-------|------|
53
+ | CPU basic | 2 vCPU · 16 GB RAM | - | ~30-60s | Free |
54
+ | CPU upgrade | 8 vCPU · 32 GB RAM | - | ~15-30s | $0.03/hr |
55
+ | T4 small | 4 vCPU · 15 GB RAM | 16GB | ~2-5s | $0.40/hr |
56
+ | T4 medium | 8 vCPU · 30 GB RAM | 16GB | ~2-5s | $0.60/hr |
57
+ | L4 | 8 vCPU · 30 GB RAM | 24GB | ~1-3s | $0.80/hr |
58
+ | A10G small | 4 vCPU · 15 GB RAM | 24GB | ~1-3s | $1.00/hr |
59
+ | A10G large | 12 vCPU · 46 GB RAM | 24GB | ~1-3s | $1.50/hr |
60
+ | L40S | 8 vCPU · 62 GB RAM | 48GB | ~0.5-2s | $1.80/hr |
61
+ | A100 large | 12 vCPU · 142 GB RAM | 80GB | ~0.5-1s | $2.50/hr |
62
 
63
  Enable GPU: Settings → Hardware → Select GPU → Save
64
 
Dockerfile CHANGED
@@ -1,57 +1,55 @@
1
  # Hugging Face Spaces Dockerfile
2
  # GPU-enabled Django API for background removal
3
 
4
- FROM python:3.11-slim
5
 
6
  # Set environment variables
7
  ENV PYTHONDONTWRITEBYTECODE=1
8
  ENV PYTHONUNBUFFERED=1
9
  ENV DEBIAN_FRONTEND=noninteractive
10
-
11
- # HF Spaces runs on port 7860
12
  ENV PORT=7860
 
13
 
14
- # Install system dependencies
15
- RUN apt-get update && apt-get install -y --no-install-recommends \
16
- libgl1 \
17
- libglib2.0-0 \
18
- libsm6 \
19
- libxext6 \
20
- libxrender1 \
21
- libgomp1 \
22
- curl \
23
- && rm -rf /var/lib/apt/lists/*
24
-
25
- # Create app directory
26
- WORKDIR /app
27
 
28
- # Copy requirements first for caching
29
- COPY requirements.txt .
30
 
31
  # Install Python dependencies
 
32
  RUN pip install --no-cache-dir -r requirements.txt
33
 
34
- # Copy application code
35
- COPY . .
36
-
37
  # Create non-root user (HF Spaces requirement)
38
  RUN useradd -m -u 1000 user
39
- RUN chown -R user:user /app
40
- USER user
41
-
42
- # Set home for model cache
43
  ENV HOME=/home/user
44
  ENV HF_HOME=/home/user/.cache/huggingface
45
 
46
- # Fix OMP_NUM_THREADS invalid value from HF
47
- ENV OMP_NUM_THREADS=4
 
 
 
 
 
 
 
 
48
 
49
- # Expose port
50
  EXPOSE 7860
51
 
52
- # Health check (longer start period for model loading)
53
- HEALTHCHECK --interval=30s --timeout=30s --start-period=300s --retries=5 \
54
  CMD curl -f http://localhost:7860/api/transparent/health/ || exit 1
55
 
56
- # Start server
57
- CMD ["python", "manage.py", "runserver", "0.0.0.0:7860"]
 
1
  # Hugging Face Spaces Dockerfile
2
  # GPU-enabled Django API for background removal
3
 
4
+ FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
5
 
6
  # Set environment variables
7
  ENV PYTHONDONTWRITEBYTECODE=1
8
  ENV PYTHONUNBUFFERED=1
9
  ENV DEBIAN_FRONTEND=noninteractive
 
 
10
  ENV PORT=7860
11
+ ENV OMP_NUM_THREADS=4
12
 
13
+ # Install Python 3.11 and system dependencies
14
+ RUN apt-get update && \
15
+ apt-get install -y --no-install-recommends software-properties-common && \
16
+ add-apt-repository ppa:deadsnakes/ppa && \
17
+ apt-get update && \
18
+ apt-get install -y --no-install-recommends \
19
+ python3.11 python3.11-venv python3.11-distutils \
20
+ libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 libgomp1 curl && \
21
+ rm -rf /var/lib/apt/lists/* && \
22
+ ln -sf /usr/bin/python3.11 /usr/bin/python && \
23
+ ln -sf /usr/bin/python3.11 /usr/bin/python3 && \
24
+ curl -sS https://bootstrap.pypa.io/get-pip.py | python
 
25
 
26
+ WORKDIR /app
 
27
 
28
  # Install Python dependencies
29
+ COPY requirements.txt .
30
  RUN pip install --no-cache-dir -r requirements.txt
31
 
 
 
 
32
  # Create non-root user (HF Spaces requirement)
33
  RUN useradd -m -u 1000 user
 
 
 
 
34
  ENV HOME=/home/user
35
  ENV HF_HOME=/home/user/.cache/huggingface
36
 
37
+ # Pre-download AI models so they don't re-download on every restart
38
+ USER user
39
+ RUN mkdir -p /home/user/.u2net /home/user/.config/Ultralytics && \
40
+ python -c "from rembg import new_session; new_session('birefnet-general', providers=['CPUExecutionProvider']); new_session('isnet-general-use', providers=['CPUExecutionProvider'])"
41
+
42
+ # Copy application code
43
+ USER root
44
+ COPY . .
45
+ RUN chown -R user:user /app
46
+ USER user
47
 
 
48
  EXPOSE 7860
49
 
50
+ # Health check (shorter start period since models are pre-cached)
51
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=120s --retries=5 \
52
  CMD curl -f http://localhost:7860/api/transparent/health/ || exit 1
53
 
54
+ # --noreload prevents double model loading from Django's autoreload
55
+ CMD ["python", "manage.py", "runserver", "0.0.0.0:7860", "--noreload"]
transparent/apps.py CHANGED
@@ -14,11 +14,16 @@ class TransparentConfig(AppConfig):
14
 
15
  def ready(self):
16
  """Preload AI models when the app starts."""
17
- # Only preload in the main process (not in manage.py commands)
18
- if os.environ.get('RUN_MAIN') == 'true' or os.environ.get('GUNICORN_WORKER'):
 
 
 
 
 
 
19
  from transparent.controllers.style_generator import StyleGenerator
20
  from transparent.controllers.style_recommender import StyleRecommender
21
 
22
- # Preload all AI models at startup
23
  StyleGenerator.preload_models()
24
  StyleRecommender.preload_models()
 
14
 
15
  def ready(self):
16
  """Preload AI models when the app starts."""
17
+ import sys
18
+ is_runserver = 'runserver' in sys.argv
19
+ is_gunicorn = os.environ.get('GUNICORN_WORKER')
20
+ # With --noreload: RUN_MAIN is not set, load in main process
21
+ # Without --noreload: RUN_MAIN='true' in the reloader child process
22
+ is_reloader_child = os.environ.get('RUN_MAIN') == 'true'
23
+
24
+ if is_gunicorn or is_reloader_child or (is_runserver and '--noreload' in sys.argv):
25
  from transparent.controllers.style_generator import StyleGenerator
26
  from transparent.controllers.style_recommender import StyleRecommender
27
 
 
28
  StyleGenerator.preload_models()
29
  StyleRecommender.preload_models()