MogensR commited on
Commit
3afce52
·
1 Parent(s): 8f6e77a
Files changed (7) hide show
  1. Dockerfile +95 -35
  2. README.md +3 -28
  3. app.py +17 -736
  4. pipeline.py +669 -0
  5. requirements.txt +7 -12
  6. ui.py +356 -0
  7. utils/oom.py +60 -0
Dockerfile CHANGED
@@ -1,46 +1,106 @@
1
- FROM python:3.10-slim
 
 
 
2
 
3
- # Install system dependencies
4
- RUN apt-get update && apt-get install -y \
5
- git \
6
- ffmpeg \
7
- libgl1-mesa-glx \
8
- libglib2.0-0 \
9
- libsm6 \
10
- libxext6 \
11
- libxrender-dev \
12
- libgomp1 \
13
- && rm -rf /var/lib/apt/lists/*
14
 
15
- # Set working directory
16
- WORKDIR /code
 
 
17
 
18
- # Copy requirements first for better caching
19
- COPY ./requirements.txt /code/requirements.txt
 
 
 
20
 
21
- # Install Python dependencies
22
- RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
 
 
 
 
23
 
24
- # Clone SAM2 and MatAnyone repositories
25
- RUN git clone https://github.com/facebookresearch/segment-anything-2.git /code/third_party/sam2
26
- RUN git clone https://github.com/pq-yang/MatAnyone.git /code/third_party/matanyone
 
 
 
27
 
28
- # Set Python path
29
- ENV PYTHONPATH="${PYTHONPATH}:/code/third_party/sam2:/code/third_party/matanyone"
 
 
30
 
31
- # Copy the rest of the application
32
- COPY . /code
 
33
 
34
- # Set environment variables for GPU optimization
35
- ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
36
- ENV OMP_NUM_THREADS=2
37
- ENV HF_HOME=/code/.cache
 
 
 
38
 
39
- # Create cache directory
40
- RUN mkdir -p /code/.cache
 
 
 
 
 
41
 
42
- # Expose port
43
- EXPOSE 7860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # Run the application
46
- CMD ["python", "app.py"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===============================
2
+ # BackgroundFX Pro — Dockerfile
3
+ # Hugging Face Spaces Pro (GPU)
4
+ # ===============================
5
 
6
+ # CUDA base image (T4-friendly). Build stage has NO GPU access.
7
+ FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
 
 
 
 
 
 
 
 
 
8
 
9
+ # --- Build args (override in Space Settings → Build args) ---
10
+ # Pin external repos for reproducible builds
11
+ ARG SAM2_SHA=__PIN_ME__
12
+ ARG MATANYONE_SHA=__PIN_ME__
13
 
14
+ # Weights to pre-warm (public models only)
15
+ ARG SAM2_MODEL_ID=facebook/sam2
16
+ ARG SAM2_VARIANT=sam2_hiera_large # sam2_hiera_small | sam2_hiera_base | sam2_hiera_large
17
+ ARG MATANY_REPO_ID=PeiqingYang/MatAnyone
18
+ ARG MATANY_FILENAME=matanyone_v1.0.pth
19
 
20
+ # --- Create non-root user (uid 1000 required by HF) ---
21
+ RUN useradd -m -u 1000 user
22
+ USER user
23
+ ENV HOME=/home/user \
24
+ PATH=/home/user/.local/bin:$PATH
25
+ WORKDIR $HOME/app
26
 
27
+ # --- System packages ---
28
+ USER root
29
+ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
30
+ git ffmpeg libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1 \
31
+ && rm -rf /var/lib/apt/lists/*
32
+ USER user
33
 
34
+ # --- Python & CUDA wheels (Torch cu121) ---
35
+ RUN pip install --no-cache-dir --upgrade pip
36
+ RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu121 \
37
+ torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
38
 
39
+ # --- App Python deps ---
40
+ COPY --chown=user requirements.txt ./requirements.txt
41
+ RUN pip install --no-cache-dir -r requirements.txt
42
 
43
+ # --- Clone external repos (SAM2 & MatAnyone) ---
44
+ RUN git clone https://github.com/facebookresearch/segment-anything-2.git third_party/sam2 && \
45
+ cd third_party/sam2 && \
46
+ if [ "${SAM2_SHA}" != "__PIN_ME__" ]; then git checkout ${SAM2_SHA}; fi
47
+ RUN git clone https://github.com/pq-yang/MatAnyone.git third_party/matanyone && \
48
+ cd third_party/matanyone && \
49
+ if [ "${MATANYONE_SHA}" != "__PIN_ME__" ]; then git checkout ${MATANYONE_SHA}; fi
50
 
51
+ # --- Pre-warm model weights into image cache (public models only) ---
52
+ # NOTE: Build time has no access to private tokens on Spaces for gated models.
53
+ ENV HF_HOME_BUILD=${HOME}/.cache/huggingface
54
+ RUN python - <<'PY'
55
+ import os
56
+ from pathlib import Path
57
+ from huggingface_hub import hf_hub_download
58
 
59
+ SAM2_MODEL_ID = os.environ.get("SAM2_MODEL_ID", "facebook/sam2")
60
+ SAM2_VARIANT = os.environ.get("SAM2_VARIANT", "sam2_hiera_large")
61
+ MATANY_REPO_ID = os.environ.get("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
62
+ MATANY_FILENAME = os.environ.get("MATANY_FILENAME", "matanyone_v1.0.pth")
63
+
64
+ VARIANT_FILES = {
65
+ "sam2_hiera_small": "sam2_hiera_small.pt",
66
+ "sam2_hiera_base": "sam2_hiera_base.pt",
67
+ "sam2_hiera_large": "sam2_hiera_large.pt",
68
+ }
69
+ ckpt_name = VARIANT_FILES.get(SAM2_VARIANT, VARIANT_FILES["sam2_hiera_large"])
70
+
71
+ cache_dir = os.environ.get("HF_HOME_BUILD", str(Path.home() / ".cache" / "huggingface"))
72
+ Path(cache_dir).mkdir(parents=True, exist_ok=True)
73
+
74
+ print(f"[PREWARM] SAM2: repo={SAM2_MODEL_ID}, file={ckpt_name}")
75
+ p1 = hf_hub_download(repo_id=SAM2_MODEL_ID, filename=ckpt_name, local_dir=cache_dir)
76
+ print(f"[PREWARM] -> {p1}")
77
+
78
+ print(f"[PREWARM] MatAnyone: repo={MATANY_REPO_ID}, file={MATANY_FILENAME}")
79
+ p2 = hf_hub_download(repo_id=MATANY_REPO_ID, filename=MATANY_FILENAME, local_dir=cache_dir)
80
+ print(f"[PREWARM] -> {p2}")
81
 
82
+ print("[PREWARM] Done.")
83
+ PY
84
+
85
+ # --- App code ---
86
+ COPY --chown=user . $HOME/app
87
+
88
+ # --- Runtime environment ---
89
+ # Caches in /data persist across Space restarts
90
+ ENV PYTHONUNBUFFERED=1 \
91
+ OMP_NUM_THREADS=2 \
92
+ TOKENIZERS_PARALLELISM=false \
93
+ HF_HOME=/data/.cache/huggingface \
94
+ TORCH_HOME=/data/.cache/torch \
95
+ MPLCONFIGDIR=/data/.cache/matplotlib \
96
+ PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
97
+ PYTHONPATH="$PYTHONPATH:$HOME/app/third_party/sam2:$HOME/app/third_party/matanyone" \
98
+ PORT=7860 \
99
+ SAM2_MODEL_ID=${SAM2_MODEL_ID} \
100
+ SAM2_VARIANT=${SAM2_VARIANT} \
101
+ MATANY_REPO_ID=${MATANY_REPO_ID} \
102
+ MATANY_FILENAME=${MATANY_FILENAME}
103
+
104
+ # --- Networking / Entrypoint ---
105
+ EXPOSE 7860
106
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,12 +1,10 @@
1
  ---
2
- title: 🎬 BackgroundFX Pro - SAM2 + MatAnyOne
3
  emoji: 🎥
4
  colorFrom: indigo
5
  colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.42.0
8
- app_file: app.py
9
- pinned: false
10
  license: mit
11
  tags:
12
  - video
@@ -15,27 +13,4 @@ tags:
15
  - matting
16
  - SAM2
17
  - MatAnyOne
18
- - gradio
19
  ---
20
- # 🎬 BackgroundFX Pro — SAM2 + MatAnyOne Edition
21
- **State-of-the-art video background replacement** with a simple Gradio UI.
22
- - ✅ **Segment Anything Model 2 (SAM2)** video propagation
23
- - ✅ **MatAnyOne (CVPR 2025)** professional matting refinement
24
- - ✅ GPU acceleration when available
25
- - ✅ Robust fallbacks (SAM2-only → GrabCut)
26
- - ✅ Backgrounds: gradient / solid / custom / AI (HF Inference API)
27
- ## 🚀 How to use
28
- 1. Upload a **video** (left panel)
29
- 2. Choose your **background** (gradient / solid / custom / AI)
30
- 3. Click **Process Video**
31
- 4. Download the **final video** in the Results panel 🎉
32
- ## 📦 Files
33
- - `app.py` — main app (SAM2 + MatAnyOne + UI)
34
- - `requirements.txt` — Python dependencies (commit-pinned for stability)
35
- ## 🛡️ Licenses
36
- Wrapper code: MIT.
37
- Models: see their repos:
38
- - [SAM2](https://github.com/facebookresearch/segment-anything-2)
39
- - [MatAnyOne](https://github.com/pq-yang/MatAnyOne)
40
- 👤 Maintainer: Mogens Rye
41
- 📧 Contact: mogens@ryeoutsourcing.dk
 
1
  ---
2
+ title: 🎬 BackgroundFX Pro - SAM2 + MatAnyone
3
  emoji: 🎥
4
  colorFrom: indigo
5
  colorTo: purple
6
+ sdk: docker
7
+ app_port: 7860
 
 
8
  license: mit
9
  tags:
10
  - video
 
13
  - matting
14
  - SAM2
15
  - MatAnyOne
 
16
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
app.py CHANGED
@@ -1,768 +1,49 @@
1
  #!/usr/bin/env python3
2
  """
3
- BackgroundFX Pro - GPU Optimized Version
4
- Professional video background replacement with SAM2 + MatAnyone
5
  """
6
 
7
  import os
8
  import sys
9
- import gc
10
- import cv2
11
- import json
12
- import time
13
- import torch
14
  import logging
15
- import requests
16
- import tempfile
17
- import subprocess
18
- import threading
19
- import numpy as np
20
- import io
21
- from PIL import Image
22
- from pathlib import Path
23
- from datetime import datetime
24
- from typing import Optional, Tuple, List, Dict, Any
25
 
26
- import gradio as gr
27
-
28
- # Import optimized modules
29
- from utils.accelerator import pick_device, torch_global_tuning, memory_checkpoint, cleanup
30
- from models.sam2_loader import SAM2Predictor
31
- from models.matanyone_loader import MatAnyoneSession
32
 
33
- # Configure logging
34
  logging.basicConfig(
35
  level=logging.INFO,
36
  format='%(asctime)s - %(levelname)s - %(message)s'
37
  )
38
- logger = logging.getLogger(__name__)
39
-
40
- try:
41
- from sklearn.cluster import KMeans
42
- SKLEARN_AVAILABLE = True
43
- except ImportError:
44
- SKLEARN_AVAILABLE = False
45
- logger.warning("sklearn not available, using fallback color detection")
46
-
47
- # Global processing control
48
- processing_active = False
49
- processing_thread = None
50
-
51
- # Initialize optimized system
52
- device = pick_device()
53
- torch_global_tuning()
54
- GPU_NAME = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
55
- GPU_MEMORY = torch.cuda.get_device_properties(0).total_memory / (1024**3) if torch.cuda.is_available() else 0
56
- MODEL_SIZE = "large" if "T4" in GPU_NAME else "base"
57
-
58
- logger.info(f"System initialized - Device: {device} | GPU: {GPU_NAME} | Memory: {GPU_MEMORY:.1f}GB")
59
-
60
- # Environment variables for model control
61
- SAM2_ENABLED = os.environ.get("ENABLE_SAM2", "1") == "1"
62
- MATANY_ENABLED = os.environ.get("ENABLE_MATANY", "1") == "1"
63
- MAX_SIDE = int(os.environ.get("MAX_SIDE", "1280"))
64
- FRAME_CHUNK = int(os.environ.get("FRAME_CHUNK", "64"))
65
-
66
- # Global optimized model instances
67
- sam2_predictor = None
68
- matanyone_session = None
69
-
70
- def get_sam2():
71
- """Get SAM2 predictor with lazy loading"""
72
- global sam2_predictor
73
- if sam2_predictor is None and SAM2_ENABLED:
74
- try:
75
- sam2_predictor = SAM2Predictor(device).load()
76
- logger.info("SAM2 loaded with optimized pipeline")
77
- except Exception as e:
78
- logger.error(f"SAM2 loading failed: {e}")
79
- sam2_predictor = None
80
- return sam2_predictor
81
-
82
- def get_matanyone():
83
- """Get MatAnyone session with lazy loading"""
84
- global matanyone_session
85
- if matanyone_session is None and MATANY_ENABLED:
86
- try:
87
- repo_id = os.environ.get("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
88
- filename = os.environ.get("MATANY_FILENAME", "matanyone_v1.0.pth")
89
- matanyone_session = MatAnyoneSession(device).load(
90
- repo_id=repo_id,
91
- filename=filename
92
- )
93
- logger.info("MatAnyone loaded with optimized pipeline")
94
- except Exception as e:
95
- logger.error(f"MatAnyone loading failed: {e}")
96
- matanyone_session = None
97
- return matanyone_session
98
-
99
- def iter_video_frames(path, target_max_side=MAX_SIDE, chunk=FRAME_CHUNK):
100
- """Memory-mapped video frame generator"""
101
- import cv2
102
- cap = cv2.VideoCapture(path)
103
- if not cap.isOpened():
104
- raise RuntimeError("Cannot open video")
105
-
106
- # Get video properties
107
- w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
108
- h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
109
- fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
110
-
111
- # Scale to fit GPU memory constraints
112
- scale = min(1.0, float(target_max_side) / float(max(w, h)))
113
- new_w, new_h = (w, h) if scale >= 0.999 else (int(w*scale)//2*2, int(h*scale)//2*2)
114
-
115
- batch = []
116
- while True:
117
- if not processing_active:
118
- cap.release()
119
- return
120
-
121
- ok, f = cap.read()
122
- if not ok:
123
- if batch:
124
- yield batch, fps, (w, h), (new_w, new_h)
125
- break
126
-
127
- if new_w != w or new_h != h:
128
- f = cv2.resize(f, (new_w, new_h), interpolation=cv2.INTER_AREA)
129
- f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
130
- batch.append(f)
131
-
132
- if len(batch) >= chunk:
133
- yield batch, fps, (w, h), (new_w, new_h)
134
- batch = []
135
-
136
- cap.release()
137
-
138
- def composite_frame(frame_rgb, bg_rgb, alpha01):
139
- """GPU-optimized frame compositing"""
140
- if bg_rgb is None:
141
- bg = np.full_like(frame_rgb, 200, dtype=np.uint8)
142
- else:
143
- bg = bg_rgb
144
- if bg.shape[:2] != frame_rgb.shape[:2]:
145
- bg = cv2.resize(bg, (frame_rgb.shape[1], frame_rgb.shape[0]), interpolation=cv2.INTER_AREA)
146
-
147
- a = np.clip(alpha01[..., None], 0.0, 1.0)
148
- out = (frame_rgb.astype("float32") * a + bg.astype("float32") * (1.0 - a)).astype("uint8")
149
- return out
150
-
151
- def cheap_fallback_alpha(fr, seed_mask=None):
152
- """Fast CPU fallback alpha generation"""
153
- if seed_mask is not None:
154
- return seed_mask
155
-
156
- # Center-focused soft alpha
157
- H, W = fr.shape[:2]
158
- yy, xx = np.mgrid[0:H, 0:W].astype("float32")
159
- cx, cy = W/2.0, H/2.0
160
- r = np.sqrt((xx-cx)**2 + (yy-cy)**2) / max(W, H)
161
- a = 1.0 - np.clip((r-0.2)/0.4, 0.0, 1.0)
162
- return a.astype("float32")
163
-
164
- def process_video_gpu_optimized(input_path, bg_image_rgb=None, out_path="output.mp4"):
165
- """GPU-optimized video processing pipeline"""
166
- global processing_active
167
-
168
- writer = None
169
- seed_mask = None
170
- total = 0
171
-
172
- try:
173
- for frames, fps, orig_hw, new_hw in iter_video_frames(input_path, MAX_SIDE, FRAME_CHUNK):
174
- if not processing_active:
175
- logger.info("Processing stopped by user")
176
- break
177
-
178
- H, W = frames[0].shape[:2]
179
- if writer is None:
180
- writer = cv2.VideoWriter(
181
- out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (W, H)
182
- )
183
-
184
- # First frame: try SAM2 for seed mask
185
- if seed_mask is None:
186
- try:
187
- sam2 = get_sam2()
188
- if sam2:
189
- seed_mask = sam2.first_frame_mask(frames[0].astype("float32") / 255.0)
190
- seed_mask = (cv2.GaussianBlur(seed_mask, (0, 0), 1.0) > 0.5).astype("float32")
191
- logger.info("SAM2 seed mask generated")
192
- except Exception as e:
193
- logger.warning(f"SAM2 failed, continuing without: {e}")
194
- seed_mask = None
195
-
196
- # Professional matting pipeline
197
- matany = get_matanyone()
198
- if matany and MATANY_ENABLED:
199
- try:
200
- with torch.autocast(device_type=str(device).split(":")[0], dtype=torch.float16, enabled=(device.type=="cuda")):
201
- for i, fr in enumerate(frames):
202
- if not processing_active:
203
- break
204
-
205
- alpha = matany.step(fr, seed_mask if total == 0 and i == 0 else None)
206
- comp = composite_frame(fr, bg_image_rgb, alpha)
207
- writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
208
- total += 1
209
-
210
- if total % 64 == 0:
211
- cleanup()
212
- memory_checkpoint(f"frames={total}")
213
-
214
- except Exception as e:
215
- logger.warning(f"MatAnyone failed: {e}")
216
- matany = None
217
-
218
- # Fallback if MatAnyone unavailable
219
- if not matany:
220
- for fr in frames:
221
- if not processing_active:
222
- break
223
-
224
- alpha = cheap_fallback_alpha(fr, seed_mask)
225
- comp = composite_frame(fr, bg_image_rgb, alpha)
226
- writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
227
- total += 1
228
-
229
- if total % 64 == 0:
230
- cleanup()
231
-
232
- memory_checkpoint(f"processed={total}")
233
-
234
- except Exception as e:
235
- logger.error(f"Processing error: {e}")
236
- finally:
237
- if writer:
238
- writer.release()
239
- cleanup()
240
-
241
- return out_path if processing_active else None
242
 
243
- def stop_processing():
244
- """Stop video processing"""
245
- global processing_active
246
- processing_active = False
247
- return gr.update(visible=False), "Processing stopped by user"
248
-
249
- class MyAvatarAPI:
250
- """MyAvatar API integration"""
251
-
252
- def __init__(self):
253
- self.api_base = "https://app.myavatar.dk/api"
254
- self.videos_cache = []
255
- self.last_refresh = 0
256
-
257
- def fetch_videos(self) -> List[Dict[str, Any]]:
258
- """Fetch videos from MyAvatar API"""
259
- try:
260
- if time.time() - self.last_refresh < 300 and self.videos_cache:
261
- return self.videos_cache
262
-
263
- response = requests.get(f"{self.api_base}/videos", timeout=10)
264
- if response.status_code == 200:
265
- data = response.json()
266
- self.videos_cache = data.get('videos', [])
267
- self.last_refresh = time.time()
268
- logger.info(f"Fetched {len(self.videos_cache)} videos from MyAvatar")
269
- return self.videos_cache
270
- else:
271
- logger.error(f"API error: {response.status_code}")
272
- return []
273
-
274
- except Exception as e:
275
- logger.error(f"Error fetching videos: {e}")
276
- return []
277
-
278
- def get_video_choices(self) -> List[str]:
279
- """Get video choices for dropdown"""
280
- videos = self.fetch_videos()
281
- if not videos:
282
- return ["No videos available"]
283
-
284
- choices = []
285
- for video in videos:
286
- title = video.get('title', 'Untitled')
287
- video_id = video.get('id', 'unknown')
288
- status = video.get('status', 'unknown')
289
- choices.append(f"{title} (ID: {video_id}) - {status}")
290
-
291
- return choices
292
-
293
- def get_video_url(self, selection: str) -> Optional[str]:
294
- """Extract video URL from selection"""
295
- if not selection or selection == "No videos available":
296
- return None
297
-
298
- try:
299
- if "(ID: " in selection:
300
- video_id = selection.split("(ID: ")[1].split(")")[0]
301
-
302
- for video in self.videos_cache:
303
- if str(video.get('id')) == video_id:
304
- return video.get('video_url')
305
-
306
- return None
307
-
308
- except Exception as e:
309
- logger.error(f"Error extracting video URL: {e}")
310
- return None
311
-
312
- # Initialize API
313
- myavatar_api = MyAvatarAPI()
314
-
315
- def create_gradient_background(gradient_type: str, width: int, height: int) -> Image.Image:
316
- """Create gradient backgrounds"""
317
- try:
318
- img = np.zeros((height, width, 3), dtype=np.uint8)
319
-
320
- if gradient_type == "sunset":
321
- for i in range(height):
322
- ratio = i / height
323
- r = int(255 * (1 - ratio) + 128 * ratio)
324
- g = int(165 * (1 - ratio) + 64 * ratio)
325
- b = int(0 * (1 - ratio) + 128 * ratio)
326
- img[i, :] = [r, g, b]
327
- elif gradient_type == "ocean":
328
- for i in range(height):
329
- ratio = i / height
330
- r = int(0 * (1 - ratio) + 30 * ratio)
331
- g = int(100 * (1 - ratio) + 144 * ratio)
332
- b = int(200 * (1 - ratio) + 255 * ratio)
333
- img[i, :] = [r, g, b]
334
- elif gradient_type == "forest":
335
- for i in range(height):
336
- ratio = i / height
337
- r = int(34 * (1 - ratio) + 0 * ratio)
338
- g = int(139 * (1 - ratio) + 100 * ratio)
339
- b = int(34 * (1 - ratio) + 0 * ratio)
340
- img[i, :] = [r, g, b]
341
- else: # default blue
342
- for i in range(height):
343
- ratio = i / height
344
- r = int(70 * (1 - ratio) + 20 * ratio)
345
- g = int(130 * (1 - ratio) + 100 * ratio)
346
- b = int(180 * (1 - ratio) + 255 * ratio)
347
- img[i, :] = [r, g, b]
348
-
349
- return Image.fromarray(img)
350
-
351
- except Exception as e:
352
- logger.error(f"Error creating gradient: {e}")
353
- img = np.full((height, width, 3), [70, 130, 180], dtype=np.uint8)
354
- return Image.fromarray(img)
355
-
356
- def create_solid_color(color: str, width: int, height: int) -> Image.Image:
357
- """Create solid color backgrounds"""
358
- color_map = {
359
- "white": (255, 255, 255),
360
- "black": (0, 0, 0),
361
- "blue": (70, 130, 180),
362
- "green": (0, 128, 0),
363
- "red": (220, 20, 60),
364
- "purple": (128, 0, 128),
365
- "orange": (255, 165, 0),
366
- "yellow": (255, 255, 0)
367
- }
368
-
369
- rgb = color_map.get(color, (70, 130, 180))
370
- img = np.full((height, width, 3), rgb, dtype=np.uint8)
371
- return Image.fromarray(img)
372
-
373
- def generate_ai_background(prompt: str) -> Tuple[Optional[Image.Image], str]:
374
- """Generate AI background using Hugging Face Inference API"""
375
- try:
376
- if not prompt.strip():
377
- return None, "Please enter a prompt"
378
-
379
- models = [
380
- "black-forest-labs/FLUX.1-schnell",
381
- "stabilityai/stable-diffusion-xl-base-1.0",
382
- "runwayml/stable-diffusion-v1-5"
383
- ]
384
-
385
- enhanced_prompt = f"professional video background, {prompt}, high quality, 16:9 aspect ratio, cinematic lighting, detailed"
386
-
387
- for model in models:
388
- try:
389
- logger.info(f"Trying AI generation with {model}...")
390
-
391
- api_url = f"https://api-inference.huggingface.co/models/{model}"
392
- headers = {
393
- "Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN', 'hf_placeholder')}"
394
- }
395
- payload = {
396
- "inputs": enhanced_prompt,
397
- "parameters": {
398
- "width": 1024,
399
- "height": 576,
400
- "num_inference_steps": 20,
401
- "guidance_scale": 7.5
402
- }
403
- }
404
-
405
- response = requests.post(api_url, headers=headers, json=payload, timeout=30)
406
-
407
- if response.status_code == 200:
408
- image = Image.open(io.BytesIO(response.content))
409
- logger.info(f"AI background generated successfully with {model}")
410
- return image, f"AI background generated: {prompt}"
411
- elif response.status_code == 503:
412
- logger.warning(f"Model {model} is loading, trying next...")
413
- continue
414
- else:
415
- logger.warning(f"Error with {model}: {response.status_code}")
416
- continue
417
-
418
- except Exception as e:
419
- logger.warning(f"Error with {model}: {e}")
420
- continue
421
-
422
- logger.info("AI generation failed, creating intelligent gradient fallback...")
423
- return create_intelligent_gradient(prompt), f"Created gradient background inspired by: {prompt}"
424
-
425
- except Exception as e:
426
- logger.error(f"Error in AI background generation: {e}")
427
- return create_gradient_background("default", 1920, 1080), f"Created default background due to error: {str(e)}"
428
-
429
- def create_intelligent_gradient(prompt: str) -> Image.Image:
430
- """Create intelligent gradient based on prompt analysis"""
431
- prompt_lower = prompt.lower()
432
-
433
- if any(word in prompt_lower for word in ["sunset", "orange", "warm", "fire", "autumn"]):
434
- return create_gradient_background("sunset", 1920, 1080)
435
- elif any(word in prompt_lower for word in ["ocean", "sea", "blue", "water", "sky", "calm"]):
436
- return create_gradient_background("ocean", 1920, 1080)
437
- elif any(word in prompt_lower for word in ["forest", "green", "nature", "trees", "jungle"]):
438
- return create_gradient_background("forest", 1920, 1080)
439
- else:
440
- return create_gradient_background("default", 1920, 1080)
441
-
442
- def process_video_with_background_stoppable(
443
- input_video: Optional[str],
444
- myavatar_selection: str,
445
- background_type: str,
446
- gradient_type: str,
447
- solid_color: str,
448
- custom_background: Optional[str],
449
- ai_prompt: str
450
- ):
451
- """Main processing function with stop capability"""
452
- global processing_active
453
- processing_active = True
454
-
455
- try:
456
- # Show stop button, hide process button
457
- yield gr.update(visible=False), gr.update(visible=True), None, "Starting processing..."
458
-
459
- # Determine video source
460
- video_path = None
461
- if input_video:
462
- video_path = input_video
463
- logger.info("Using uploaded video")
464
- elif myavatar_selection and myavatar_selection != "No videos available":
465
- video_url = myavatar_api.get_video_url(myavatar_selection)
466
- if video_url:
467
- response = requests.get(video_url)
468
- if response.status_code == 200:
469
- temp_video = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
470
- temp_video.write(response.content)
471
- temp_video.close()
472
- video_path = temp_video.name
473
- logger.info("Using MyAvatar video")
474
-
475
- if not video_path:
476
- yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
477
- return
478
-
479
- # Generate background
480
- yield gr.update(visible=False), gr.update(visible=True), None, "Generating background..."
481
-
482
- background_image = None
483
- if background_type == "gradient":
484
- background_image = create_gradient_background(gradient_type, 1920, 1080)
485
- elif background_type == "solid":
486
- background_image = create_solid_color(solid_color, 1920, 1080)
487
- elif background_type == "custom" and custom_background:
488
- background_image = Image.open(custom_background)
489
- elif background_type == "ai" and ai_prompt:
490
- bg_img, ai_msg = generate_ai_background(ai_prompt)
491
- background_image = bg_img
492
-
493
- if not background_image:
494
- yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
495
- return
496
-
497
- # Process video
498
- yield gr.update(visible=False), gr.update(visible=True), None, "Processing video with GPU optimization..."
499
-
500
- bg_array = np.array(background_image.resize((1280, 720), Image.Resampling.LANCZOS))
501
-
502
- with tempfile.NamedTemporaryFile(suffix='_processed.mp4', delete=False) as tmp_final:
503
- final_video_path = tmp_final.name
504
-
505
- result_path = process_video_gpu_optimized(video_path, bg_array, final_video_path)
506
-
507
- # Cleanup
508
- try:
509
- if video_path != input_video:
510
- os.unlink(video_path)
511
- except:
512
- pass
513
-
514
- if result_path and processing_active:
515
- yield gr.update(visible=True), gr.update(visible=False), result_path, "Video processing completed successfully!"
516
- else:
517
- yield gr.update(visible=True), gr.update(visible=False), None, "Processing was stopped or failed"
518
-
519
- except Exception as e:
520
- logger.error(f"Error in video processing: {e}")
521
- yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {str(e)}"
522
- finally:
523
- processing_active = False
524
-
525
- def create_interface():
526
- """Create the Gradio interface"""
527
- logger.info("Creating Gradio interface...")
528
- logger.info(f"Device: {device} | GPU: {GPU_NAME} | Memory: {GPU_MEMORY:.1f}GB")
529
-
530
- css = """
531
- .main-container { max-width: 1200px; margin: 0 auto; }
532
- .status-box { border: 2px solid #4CAF50; border-radius: 10px; padding: 15px; }
533
- .gradient-preview { border: 2px solid #ddd; border-radius: 10px; }
534
- """
535
-
536
- with gr.Blocks(css=css, title="BackgroundFX Pro - GPU Optimized") as app:
537
-
538
- gr.Markdown("""
539
- # BackgroundFX Pro - GPU Optimized
540
- ### Professional Video Background Replacement with SAM2 + MatAnyone
541
- """)
542
-
543
- with gr.Row():
544
- sam2_status = "Ready" if SAM2_ENABLED else "Disabled"
545
- matany_status = "Ready" if MATANY_ENABLED else "Disabled"
546
- gr.Markdown(f"""
547
- **System Status:** Online | **GPU:** {GPU_NAME} | **SAM2:** {sam2_status} | **MatAnyone:** {matany_status}
548
- """)
549
-
550
- with gr.Row():
551
- with gr.Column(scale=1):
552
- gr.Markdown("## Video Input")
553
-
554
- with gr.Tabs():
555
- with gr.Tab("Upload Video"):
556
- video_upload = gr.Video(label="Upload Video File", height=300)
557
-
558
- with gr.Tab("MyAvatar Videos"):
559
- refresh_btn = gr.Button("Refresh Videos", size="sm")
560
- myavatar_dropdown = gr.Dropdown(
561
- label="Select MyAvatar Video",
562
- choices=["Click refresh to load videos"],
563
- value=None
564
- )
565
- video_preview = gr.Video(label="Preview", height=200)
566
-
567
- gr.Markdown("## Background Options")
568
-
569
- background_type = gr.Radio(
570
- choices=["gradient", "solid", "custom", "ai"],
571
- value="gradient",
572
- label="Background Type"
573
- )
574
-
575
- with gr.Group():
576
- gradient_type = gr.Dropdown(
577
- choices=["sunset", "ocean", "forest", "default"],
578
- value="sunset",
579
- label="Gradient Type",
580
- visible=True
581
- )
582
- gradient_preview = gr.Image(label="Gradient Preview", height=150)
583
-
584
- solid_color = gr.Dropdown(
585
- choices=["white", "black", "blue", "green", "red", "purple", "orange", "yellow"],
586
- value="blue",
587
- label="Solid Color",
588
- visible=False
589
- )
590
- color_preview = gr.Image(label="Color Preview", height=150, visible=False)
591
-
592
- custom_bg_upload = gr.Image(
593
- label="Upload Custom Background",
594
- type="filepath",
595
- visible=False
596
- )
597
-
598
- ai_prompt = gr.Textbox(
599
- label="AI Background Prompt",
600
- placeholder="Describe the background you want...",
601
- visible=False
602
- )
603
- ai_generate_btn = gr.Button("Generate AI Background", visible=False)
604
- ai_preview = gr.Image(label="AI Generated Background", height=150, visible=False)
605
-
606
- with gr.Row():
607
- process_btn = gr.Button("Process Video", variant="primary", size="lg")
608
- stop_btn = gr.Button("Stop Processing", variant="stop", size="lg", visible=False)
609
-
610
- with gr.Column(scale=1):
611
- gr.Markdown("## Results")
612
-
613
- result_video = gr.Video(label="Processed Video", height=400)
614
-
615
- status_output = gr.Textbox(
616
- label="Processing Status",
617
- lines=5,
618
- max_lines=10,
619
- elem_classes=["status-box"]
620
- )
621
-
622
- gr.Markdown("""
623
- ### Processing Pipeline:
624
- 1. **SAM2 Segmentation** - GPU-accelerated person detection
625
- 2. **MatAnyone Matting** - Professional temporal consistency
626
- 3. **GPU Compositing** - Real-time background replacement
627
- 4. **Memory Optimization** - Chunked processing for efficiency
628
-
629
- **Performance:** ~3-5 minutes per 1000 frames on T4 GPU
630
- """)
631
-
632
- # Event handlers
633
- def update_background_options(bg_type):
634
- return {
635
- gradient_type: gr.update(visible=(bg_type == "gradient")),
636
- gradient_preview: gr.update(visible=(bg_type == "gradient")),
637
- solid_color: gr.update(visible=(bg_type == "solid")),
638
- color_preview: gr.update(visible=(bg_type == "solid")),
639
- custom_bg_upload: gr.update(visible=(bg_type == "custom")),
640
- ai_prompt: gr.update(visible=(bg_type == "ai")),
641
- ai_generate_btn: gr.update(visible=(bg_type == "ai")),
642
- ai_preview: gr.update(visible=(bg_type == "ai"))
643
- }
644
-
645
- def update_gradient_preview(grad_type):
646
- try:
647
- return create_gradient_background(grad_type, 400, 200)
648
- except:
649
- return None
650
-
651
- def update_color_preview(color):
652
- try:
653
- return create_solid_color(color, 400, 200)
654
- except:
655
- return None
656
-
657
- def refresh_myavatar_videos():
658
- try:
659
- choices = myavatar_api.get_video_choices()
660
- return gr.update(choices=choices, value=None)
661
- except Exception as e:
662
- logger.error(f"Error refreshing videos: {e}")
663
- return gr.update(choices=["Error loading videos"])
664
-
665
- def load_video_preview(selection):
666
- try:
667
- if not selection or selection == "No videos available":
668
- return None
669
-
670
- video_url = myavatar_api.get_video_url(selection)
671
- return video_url
672
- except Exception as e:
673
- logger.error(f"Error loading video preview: {e}")
674
- return None
675
-
676
- def generate_ai_bg(prompt):
677
- bg_img, message = generate_ai_background(prompt)
678
- return bg_img
679
-
680
- # Connect event handlers
681
- background_type.change(
682
- fn=update_background_options,
683
- inputs=[background_type],
684
- outputs=[gradient_type, gradient_preview, solid_color, color_preview,
685
- custom_bg_upload, ai_prompt, ai_generate_btn, ai_preview]
686
- )
687
-
688
- gradient_type.change(
689
- fn=update_gradient_preview,
690
- inputs=[gradient_type],
691
- outputs=[gradient_preview]
692
- )
693
-
694
- solid_color.change(
695
- fn=update_color_preview,
696
- inputs=[solid_color],
697
- outputs=[color_preview]
698
- )
699
-
700
- refresh_btn.click(
701
- fn=refresh_myavatar_videos,
702
- outputs=[myavatar_dropdown]
703
- )
704
-
705
- myavatar_dropdown.change(
706
- fn=load_video_preview,
707
- inputs=[myavatar_dropdown],
708
- outputs=[video_preview]
709
- )
710
-
711
- ai_generate_btn.click(
712
- fn=generate_ai_bg,
713
- inputs=[ai_prompt],
714
- outputs=[ai_preview]
715
- )
716
-
717
- process_btn.click(
718
- fn=process_video_with_background_stoppable,
719
- inputs=[
720
- video_upload,
721
- myavatar_dropdown,
722
- background_type,
723
- gradient_type,
724
- solid_color,
725
- custom_bg_upload,
726
- ai_prompt
727
- ],
728
- outputs=[process_btn, stop_btn, result_video, status_output]
729
- )
730
-
731
- stop_btn.click(
732
- fn=stop_processing,
733
- outputs=[stop_btn, status_output]
734
- )
735
-
736
- app.load(
737
- fn=lambda: create_gradient_background("sunset", 400, 200),
738
- outputs=[gradient_preview]
739
- )
740
-
741
- return app
742
 
743
  def main():
744
- """Main application entry point"""
745
  try:
746
- # Pre-warm models
747
- logger.info("Pre-warming GPU models...")
748
  if SAM2_ENABLED:
749
  get_sam2()
750
  if MATANY_ENABLED:
751
  get_matanyone()
752
-
753
  app = create_interface()
754
-
 
755
  app.launch(
756
  server_name="0.0.0.0",
757
- server_port=7860,
758
- share=True,
759
  show_error=True,
760
  quiet=False
761
  )
762
-
763
  except Exception as e:
764
  logger.error(f"Failed to start application: {e}")
765
  sys.exit(1)
766
 
 
767
  if __name__ == "__main__":
768
- main()
 
1
  #!/usr/bin/env python3
2
  """
3
+ BackgroundFX Pro Entrypoint
4
+ Launches the Gradio UI using the processing pipeline.
5
  """
6
 
7
  import os
8
  import sys
 
 
 
 
 
9
  import logging
 
 
 
 
 
 
 
 
 
 
10
 
11
+ from pipeline import (
12
+ get_sam2, get_matanyone,
13
+ SAM2_ENABLED, MATANY_ENABLED,
14
+ GPU_NAME, GPU_MEMORY
15
+ )
16
+ from ui import create_interface
17
 
 
18
  logging.basicConfig(
19
  level=logging.INFO,
20
  format='%(asctime)s - %(levelname)s - %(message)s'
21
  )
22
+ logger = logging.getLogger("app")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  def main():
 
26
  try:
27
+ logger.info(f"System status — GPU: {GPU_NAME}, VRAM: {GPU_MEMORY:.1f} GB")
28
+ # Pre-warm (safe; lazy loaders inside handle errors)
29
  if SAM2_ENABLED:
30
  get_sam2()
31
  if MATANY_ENABLED:
32
  get_matanyone()
33
+
34
  app = create_interface()
35
+
36
+ port = int(os.getenv("PORT", "7860"))
37
  app.launch(
38
  server_name="0.0.0.0",
39
+ server_port=port,
 
40
  show_error=True,
41
  quiet=False
42
  )
 
43
  except Exception as e:
44
  logger.error(f"Failed to start application: {e}")
45
  sys.exit(1)
46
 
47
+
48
  if __name__ == "__main__":
49
+ main()
pipeline.py ADDED
@@ -0,0 +1,669 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ 🍹 Video Background Replacer - IFRAME OPTIMIZED VERSION
3
+ Combining Windsurf's UI improvements with Claude's audio/video processing
4
+ """
5
+ import streamlit as st
6
+ import tempfile
7
+ import os
8
+ import cv2
9
+ import numpy as np
10
+ from PIL import tempfile
11
+ import os
12
+ import timeimport Image
13
+ # ============================================================================
14
+ # IFRAME OPTIMIZATION - From Windsurf + Claude's recommendations
15
+ # ============================================================================
16
+ # Simple two-column layout
17
+ col1, col2 = st.columns(2)
18
+
19
+ with col1:
20
+ st.subheader("Original Video")
21
+ video_file = st.file_uploader("Choose video", type=['mp4', 'avi', 'mov'])
22
+ if video_file:
23
+ st.video(video_file)
24
+ # Configure for iframe embedding
25
+ st.set_page_config(
26
+ page_title="Video Background Replacement",
27
+ page_icon="🍹",
28
+ layout="wide",
29
+ initial_sidebar_state="collapsed"
30
+ )
31
+ # Add iframe-friendly styling
32
+ st.markdown("""
33
+ <style>
34
+ /* Hide Streamlit elements for clean iframe embedding */
35
+ .main > div {
36
+ padding-top: 1rem;
37
+ }
38
+ .stDeployButton {
39
+ display: none;
40
+ }
41
+ header[data-testid="stHeader"] {
42
+ display: none;
43
+ }
44
+ .stMainBlockContainer {
45
+ padding-top: 1rem;
46
+ }
47
+ /* Clean, professional CSS - NO ANIMATIONS for iframe stability */
48
+ .main-header {
49
+ text-align: center;
50
+ background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
51
+ -webkit-background-clip: text;
52
+ -webkit-text-fill-color: transparent;
53
+ font-size: 2.5rem;
54
+ font-weight: bold;
55
+ margin-bottom: 1.5rem;
56
+ }
57
+ .upload-container {
58
+ background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
59
+ border-radius: 20px;
60
+ padding: 25px;
61
+ margin: 15px 0;
62
+ box-shadow: 0 8px 32px rgba(31, 38, 135, 0.37);
63
+ }
64
+ .upload-slot {
65
+ background: rgba(255, 255, 255, 0.8);
66
+ border: 2px dashed #ccc;
67
+ border-radius: 15px;
68
+ padding: 20px;
69
+ text-align: center;
70
+ margin: 10px 0;
71
+ min-height: 300px; /* Prevents height jumping */
72
+ display: flex;
73
+ flex-direction: column;
74
+ justify-content: center;
75
+ transition: all 0.3s ease; /* Smooth transitions */
76
+ }
77
+
78
+ .processing-box {
79
+ border: 2px solid #4ECDC4;
80
+ border-radius: 15px;
81
+ padding: 20px;
82
+ margin: 20px 0;
83
+ background: rgba(78, 205, 196, 0.1);
84
+ }
85
+
86
+ .success-box {
87
+ background: linear-gradient(45deg, #4CAF50, #45a049);
88
+ color: white;
89
+ padding: 15px;
90
+ border-radius: 10px;
91
+ text-align: center;
92
+ font-weight: bold;
93
+ margin: 20px 0;
94
+ }
95
+
96
+ /* Iframe-specific optimizations */
97
+ .block-container {
98
+ padding-top: 1rem;
99
+ padding-bottom: 1rem;
100
+ }
101
+ /* Mobile responsiveness for iframe */
102
+ @media (max-width: 768px) {
103
+ .main-header {
104
+ font-size: 1.8rem;
105
+ }
106
+ .upload-slot {
107
+ min-height: 250px;
108
+ }
109
+ }
110
+ </style>
111
+ """, unsafe_allow_html=True)
112
+
113
+ def main():
114
+ # Compact header for iframe
115
+ st.markdown('<h1 class="main-header">🍹 Video Background Replacer</h1>', unsafe_allow_html=True)
116
+ st.markdown('<p style="text-align: center; font-size: 1.1rem; color: #666; margin-bottom: 2rem;">Replace your video background with AI + Audio!</p>', unsafe_allow_html=True)
117
+
118
+ # Compact upload section
119
+ col1, col2 = st.columns(2)
120
+
121
+ with col1:
122
+ st.markdown("### 🎬 Upload Your Video")
123
+ video_file = st.file_uploader(
124
+ "Choose a video file",
125
+ type=['mp4', 'avi', 'mov'],
126
+ help="Upload the video you want to process",
127
+ key="video_uploader"
128
+ )
129
+ if video_file:
130
+ st.success("✅ Video loaded!")
131
+ st.video(video_file)
132
+
133
+ with col2:
134
+ st.markdown("### 🖼️ Upload Background Image")
135
+ image_file = st.file_uploader(
136
+ "Choose a background image",
137
+ type=['png', 'jpg', 'jpeg'],
138
+ help="Upload the background you want to use",
139
+ key="image_uploader"
140
+ )
141
+ if image_file:
142
+ st.success("✅ Background loaded!")
143
+ st.image(image_file, width=300)
144
+
145
+ # Process button
146
+ if video_file and image_file and st.button("🍹 PROCESS VIDEO", key="process_
147
+ # Clear any previous results
148
+ if 'video_result' in st.session_state:
149
+ del st.session_state['video_result']
150
+
151
+ progress_bar = st.progress(0)
152
+ status_text = st.empty()
153
+
154
+ # Step A: Save uploaded files
155
+ status_text.text("Step A: Saving uploaded files...")
156
+
157
+ video_path = f"temp_video_{int(time.time())}.mp4"
158
+ image_path = f"temp_image_{int(time.time())}.jpg"
159
+
160
+
161
+ with open(video_path, "wb") as f:
162
+ f.write(video_file.read())
163
+ fps = int(cap_orig.get(cv2.CAP_PROP_FPS))
164
+ width = int(cap_orig.get(cv2.CAP_PROP_FRAME_WIDTH))
165
+ height = int(cap_orig.get(cv2.CAP_PROP_FRAME_HEIGHT))
166
+ cap_orig.release()
167
+ try:
168
+ # Step B: Load MatAnyone
169
+ status_text.text("Step B: Loading MatAnyone...")
170
+ from matanyone import InferenceCore
171
+ processor = InferenceCore("PeiqingYang/MatAnyone")
172
+ progress_bar.progress(40)
173
+
174
+ # Step C: Create initial mask
175
+ status_text.text("Step C: Creating segmentation mask...")
176
+ import mediapipe as mp
177
+
178
+ mp_selfie = mp.solutions.selfie_segmentation
179
+ selfie_segmentation = mp_selfie.SelfieSegmentation(model_selection=1)
180
+
181
+ # Get first frame for mask
182
+ cap = cv2.VideoCapture(video_path)
183
+ ret, first_frame = cap.read()
184
+ cap.release()
185
+
186
+ if ret:
187
+ rgb_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
188
+ results = selfie_segmentation.process(rgb_frame)
189
+ mask = (results.segmentation_mask > 0.5).astype(np.uint8) * 255
190
+ mask_path = f"temp_mask_{int(time.time())}.png"
191
+ cv2.imwrite(mask_path, mask)
192
+ else:
193
+ st.error("Could not read video file")
194
+ st.stop()
195
+
196
+ progress_bar.progress(60)
197
+
198
+ # Step D: Run MatAnyone (creates green screen video)
199
+ status_text.text("Step D: Running MatAnyone - extracting person...")
200
+
201
+ foreground_path, alpha_path = processor.process_video(
202
+ input_path=video_path,
203
+ mask_path=mask_path,
204
+ output_path="output"
205
+ )
206
+
207
+ progress_bar.progress(80)
208
+
209
+ # Step E: Replace green screen with new background
210
+ status_text.text("Step E: Adding new background...")
211
+
212
+ # Read background image
213
+ bg_image = cv2.imread(image_path)
214
+
215
+ # Open the videos
216
+ cap_fg = cv2.VideoCapture(foreground_path)
217
+ cap_alpha = cv2.VideoCapture(alpha_path)
218
+
219
+ # Get video properties from ORIGINAL video
220
+ cap_orig = cv2.VideoCapture(video_path)
221
+ fps = int(cap_orig.get(cv2.CAP_PROP_FPS))
222
+ width = int(cap_orig.get(cv2.CAP_PROP_FRAME_WIDTH))
223
+ height = int(cap_orig.get(cv2.CAP_PROP_FRAME_HEIGHT))
224
+ cap_orig.release()
225
+
226
+ # Resize background to match ORIGINAL video
227
+ bg_resized = cv2.resize(bg_image, (width, height))
228
+
229
+ # Step F: Create output video with smart codec selection
230
+ status_text.text("Step F: Creating optimized video...")
231
+
232
+ try:
233
+ # Try to create MP4 directly with H.264
234
+
235
+ fourcc_h264 = cv2.VideoWriter_fourcc(*'H264')
236
+ output_path = f"final_video_{int(time.time())}.mp4"
237
+ out = cv2.VideoWriter(output_path, fourcc_h264, fps, (width, height))
238
+
239
+ if not out.isOpened():
240
+ # Fallback to XVID
241
+ temp_output_path = f"temp_output_{int(time.time())}.avi"
242
+ fourcc_xvid = cv2.VideoWriter_fourcc(*'XVID')
243
+ out = cv2.VideoWriter(temp_output_path, fourcc_xvid, fps, (width, height))
244
+ if not out.isOpened():
245
+ use_temp_file = False
246
+ st.info("✅ Using H.264 codec directly")
247
+
248
+ except Exception as e:
249
+ st.error(f"❌ Setup error: {e}")
250
+ st.stop()
251
+ use_temp_file = True
252
+ temp_output_path = f"temp_output_{st.session_state.get('counter', 0)}.avi"
253
+ fourcc_xvid = cv2.VideoWriter_fourcc(*'XVID')
254
+ out = cv2.VideoWriter(temp_output_path, fourcc_xvid, fps, (width, height))
255
+ if not out.isOpened():
256
+ st.error("❌ Could not create video writer!")
257
+ st.stop()
258
+
259
+ except Exception as e:
260
+ st.error(f"❌ Setup error: {e}")
261
+ st.stop()
262
+
263
+ # Process each frame
264
+ frame_count = 0
265
+ while True:
266
+ ret_fg, frame_fg = cap_fg.read()
267
+ ret_alpha, frame_alpha = cap_alpha.read()
268
+
269
+ if not ret_fg or not ret_alpha:
270
+ break
271
+
272
+ # Convert alpha to single channel if needed
273
+ if len(frame_alpha.shape) == 3:
274
+ alpha = cv2.cvtColor(frame_alpha, cv2.COLOR_BGR2GRAY)
275
+ else:
276
+ alpha = frame_alpha
277
+
278
+ # Normalize alpha
279
+ alpha_norm = alpha.astype(float) / 255.0
280
+
281
+ # Blend: person * alpha + background * (1-alpha)
282
+ result = np.zeros_like(frame_fg, dtype=float)
283
+ for c in range(3):
284
+ result[:,:,c] = (frame_fg[:,:,c] * alpha_norm +
285
+ bg_resized[:,:,c] * (1 - alpha_norm))
286
+
287
+ out.write(result.astype(np.uint8))
288
+ frame_count += 1
289
+
290
+ cap_fg.release()
291
+ cap_alpha.release()
292
+ out.release()
293
+
294
+ st.write(f"✅ Processed {frame_count} frames")
295
+
296
+ progress_bar.progress(90)
297
+
298
+ # Step G: Convert and optimize for web streaming
299
+ if use_temp_file:
300
+ status_text.text("Step G: Converting to web-optimized MP4...")
301
+ # Process each frame
302
+ frame_count = 0
303
+ while True:
304
+ ret_fg, frame_fg = cap_fg.read()
305
+ ret_alpha, frame_alpha = cap_alpha.read()
306
+
307
+ if not ret_fg or not ret_alpha:
308
+ break
309
+ extract_audio_cmd = [
310
+ 'ffmpeg', '-y',
311
+ '-i', video_path,
312
+ '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2',
313
+ audio_path
314
+ ]
315
+ if has_audio:
316
+ st.info("✅ Audio extracted from original video")
317
+ # Combine processed video with original audio
318
+ ffmpeg_cmd = [
319
+ 'ffmpeg', '-y',
320
+ '-i', temp_output_path, # Processed video (no audio)
321
+ '-i', audio_path, # Original audio
322
+ '-c:v', 'libx264',
323
+ '-preset', 'fast',
324
+ '-crf', '23',
325
+ '-pix_fmt', 'yuv420p',
326
+ '-c:a', 'aac', # Audio codec
327
+ '-movflags', '+faststart',
328
+ final_output_path
329
+ ]
330
+ else:# No audio - just convert video
331
+ ffmpeg_cmd = [
332
+ 'ffmpeg', '-y',
333
+ '-i', temp_output_path,
334
+ '-c:v', 'libx264',
335
+ '-preset', 'fast',
336
+ '-crf', '23',
337
+ '-pix_fmt', 'yuv420p',
338
+ '-movflags', '+faststart',
339
+ final_output_path
340
+ ]
341
+ # Normalize alpha
342
+ alpha_norm = alpha.astype(float) / 255.0
343
+
344
+ # Blend: person * alpha + background * (1-alpha)
345
+ result = np.zeros_like(frame_fg, dtype=float)
346
+ for c in range(3):
347
+ result[:,:,c] = (frame_fg[:,:,c] * alpha_norm +
348
+ bg_resized[:,:,c] * (1 - alpha_norm))
349
+
350
+ out.write(result.astype(np.uint8))
351
+ frame_count += 1
352
+
353
+ cap_fg.release()
354
+ cap_alpha.release()
355
+ out.release()
356
+
357
+ st.write(f"✅ Processed {frame_count} frames")
358
+
359
+ progress_bar.progress(90)
360
+
361
+ # Step G: Audio preservation and web optimization
362
+ if use_temp_file:
363
+ status_text.text("Step G: Adding audio and optimizing...")
364
+ st.info("🎵 Using moviepy for audio preservation...")
365
+
366
+ # Load original video to get audio
367
+ original_clip = mp.VideoFileClip(video_path)
368
+
369
+ if original_clip.audio is not None:
370
+ # Load processed video (no audio)
371
+ processed_clip = mp.VideoFileClip(temp_output_path)
372
+
373
+ # Add original audio to processed video
374
+ final_clip = processed_clip.set_audio(original_clip.audio)
375
+
376
+ final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
377
+ final_clip.write_videofile(final_output_path, codec='libx264', audio_codec='aac')
378
+
379
+ # Close clips to free memory
380
+ original_clip.close()
381
+ processed_clip.close()
382
+ final_clip.close()
383
+
384
+ st.success("✅ Video with audio created using moviepy!")
385
+ output_path = final_output_path
386
+
387
+ # Clean up temp file
388
+ if os.path.exists(temp_output_path):
389
+ os.remove(temp_output_path)
390
+ else:
391
+ st.info("ℹ️ Original video has no audio track")
392
+ # No audio - just optimize video
393
+ processed_clip = mp.VideoFileClip(temp_output_path)
394
+ final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
395
+ processed_clip.write_videofile(final_output_path, codec='libx264')
396
+ processed_clip.close()
397
+
398
+ output_path = final_output_path
399
+ if os.path.exists(temp_output_path):
400
+ os.remove(temp_output_path)
401
+
402
+ except ImportError:
403
+ st.warning("⚠️ moviepy not available")
404
+ # Fallback to qtfaststart approach without audio
405
+ try:
406
+ from qtfaststart import processor
407
+
408
+ # Convert AVI to MP4 using OpenCV
409
+ cap_temp = cv2.VideoCapture(temp_output_path)
410
+
411
+ final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
412
+ fourcc_h264 = cv2.VideoWriter_fourcc(*'H264')
413
+ out_final = cv2.VideoWriter(final_output_path, fourcc_h264, fps, (width, height))
414
+ ⚠️ moviepy not available")
415
+ # Fallback to qtfaststart approach without audio
416
+ try:
417
+ from qtfaststart import processor
418
+
419
+ # Convert AVI to MP4 using OpenCV
420
+ cap_temp = cv2.VideoCapture(temp_output_path)
421
+
422
+ final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
423
+ fourcc_h264 = cv2.VideoWriter_fourcc(*'H264')
424
+ out_final = cv2.VideoWriter(final_output_path, fourcc_h264, fps, (width, height))
425
+
426
+ # Copy frames to MP4
427
+ while True:
428
+ ret, frame = cap_temp.read()
429
+ if not ret:
430
+ break
431
+ out_final.write(frame)
432
+
433
+ cap_temp.release()
434
+ out_final.release()
435
+
436
+ # Now fix the moov atom
437
+ temp_fixed_path = f"fixed_{final_output_path}"
438
+ processor.process(final_output_path, temp_fixed_path)
439
+
440
+ # Replace original with fixed version
441
+ os.replace(temp_fixed_path, final_output_path)
442
+
443
+ st.warning("⚠️ Video created without audio (use moviepy for audio support)")
444
+ output_path = final_output_path
445
+
446
+ # Clean up temp file
447
+ if os.path.exists(temp_output_path):
448
+ os.remove(temp_output_path)
449
+
450
+ except Exception as qtfast_error:
451
+ st.warning(f"⚠️ qtfaststart failed: {qtfast_error}")
452
+ st.info("💡 Using original AVI format without audio")
453
+ output_path = temp_output_path
454
+
455
+ except Exception as moviepy_error:
456
+ st.warning(f"⚠️ moviepy failed: {moviepy_error}")
457
+ st.info("💡 Using original AVI format")
458
+ output_path = temp_output_path
459
+ else:
460
+ # H.264 MP4 was created directly - add audio and fix moov atom
461
+ try:
462
+ # First, try to add audio if it exists
463
+ audio_path = f"temp_audio_{st.session_state.get('counter', 0)}.wav"
464
+
465
+ # Try FFmpeg for audio preservation
466
+ import subprocess
467
+ final_output_path = f"final_video_{int(time.time())}.mp4"
468
+
469
+ # Extract audio from original video
470
+ audio_path = f"temp_audio_{int(time.time())}.wav"
471
+ extract_audio_cmd = [
472
+ 'ffmpeg', '-y',
473
+ '-i', video_path,
474
+ has_audio = audio_result.returncode == 0 and os.path.exists(audio_path)
475
+
476
+ if has_audio:
477
+
478
+
479
+ st.info("✅ Audio extracted, combining with video...")
480
+ # Combine processed video with original audio
481
+ ffmpeg_cmd = [
482
+
483
+ 'ffmpeg', '-y',
484
+ '-i', temp_output_path, # Processed video (no audio)
485
+ '-i', audio_path, # Original audio
486
+ '-c:v', 'libx264',
487
+ '-preset', 'fast',
488
+ '-crf', '23',
489
+ '-pix_fmt', 'yuv420p',
490
+ '-c:a', 'aac', # Audio codec
491
+ '-movflags', '+faststart', # Web optimization
492
+ final_output_path
493
+ ]
494
+ else:
495
+ st.info("ℹ️ No audio track found")
496
+ # No audio - just convert video
497
+ ffmpeg_cmd = [
498
+ 'ffmpeg', '-y',
499
+ '-i', temp_output_path,
500
+ '-c:v', 'libx264',
501
+ '-preset', 'fast',
502
+ '-crf', '23',
503
+ '-pix_fmt', 'yuv420p',
504
+ '-movflags', '+faststart',
505
+ final_output_path
506
+ result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
507
+
508
+ if result.returncode == 0:
509
+ st.success("✅ Video with audio optimized!")
510
+ output_path = final_output_path
511
+ # Clean up temp files
512
+ if os.path.exists(temp_output_path):
513
+ os.remove(temp_output_path)
514
+ if has_audio and os.path.exists(audio_path):
515
+ os.remove(audio_path)
516
+ st.success("✅ MP4 with audio optimized for web streaming!")
517
+ else:
518
+ st.warning("⚠️ Failed to add audio, using video-only version")
519
+ if os.path.exists(audio_path):
520
+ os.remove(audio_path)
521
+ else:
522
+ st.warning("⚠️ FFmpeg failed, trying moviepy...")
523
+ raise Exception("FFmpeg failed")
524
+
525
+ except:
526
+ # Try moviepy for audio
527
+ try:
528
+ import moviepy.editor as mp
529
+
530
+ st.info("🎵 Using moviepy for audio...")
531
+ original_clip = mp.VideoFileClip(video_path)
532
+
533
+ if original_clip.audio is not None:
534
+ processed_clip = mp.VideoFileClip(temp_output_path)
535
+ final_clip = processed_clip.set_audio(original_clip.audio)
536
+
537
+ final_output_path = f"final_video_{int(time.time())}.mp4"
538
+ final_clip.write_videofile(final_output_path, codec='libx264', audio_codec='aac')
539
+
540
+
541
+ original_clip.close()
542
+ processed_clip.close()
543
+ final_clip.close()
544
+
545
+ st.success("✅ Video with audio created!")
546
+ output_path = final_output_path
547
+
548
+ if os.path.exists(temp_output_path):
549
+ os.remove(temp_output_path)
550
+ else:
551
+ st.info("ℹ️ No audio in original")
552
+ output_path = temp_output_path
553
+
554
+ except Exception as e:
555
+ st.warning(f"⚠️ Audio processing failed: {e}")
556
+ output_path = temp_output_path
557
+ else:
558
+ # H.264 MP4 created directly - just optimize
559
+ try:
560
+ from qtfaststart import processor as qtfast_processor
561
+
562
+ temp_fixed_path = f"fixed_{output_path}"
563
+ qtfast_processor.process(output_path, temp_fixed_path)
564
+ os.replace(temp_fixed_path, output_path)
565
+ st.info("✅ Video optimized for web streaming")
566
+ except:
567
+ st.info("✅ Video created (basic optimization)")
568
+ except ImportError:
569
+ st.info("✅ MP4 created (qtfaststart not available for optimization)")
570
+ except Exception as e:
571
+ st.warning(f"⚠️ Moov atom optimization failed: {e}")
572
+
573
+ except Exception as e:
574
+ st.warning(f"⚠️ Audio processing failed: {e}")
575
+ st.info("✅ MP4 created but may be missing audio")
576
+
577
+ progress_bar.progress(100)
578
+ status_text.text("✅ Complete!")
579
+
580
+ # Step H: Display and download the video
581
+ if os.path.exists(output_path):
582
+ file_size = os.path.getsize(output_path)
583
+ st.write(f"✅ Video saved successfully: {file_size:,} bytes")
584
+
585
+ # Read the video we just created
586
+ with open(output_path, 'rb') as f:
587
+ video_bytes = f.read()
588
+
589
+ # Display the video
590
+ st.subheader("🎬 Result")
591
+ try:
592
+ st.video(video_bytes)
593
+ st.success("✅ Video display successful!")
594
+ except Exception as video_error:
595
+ st.error(f"❌ Video display error: {video_error}")
596
+ st.info("💡 Video file created successfully but display failed. You can still download it.")
597
+
598
+ # Download button
599
+ file_extension = "mp4" if output_path.endswith('.mp4') else output_path.split('.')[-1]
600
+ st.download_button(
601
+ label="📥 Download Background Replaced Video",
602
+ data=video_bytes,
603
+ file_name=f"background_replaced.{file_extension}",
604
+ mime=f"video/{file_extension}",
605
+ use_container_width=True
606
+ progress_bar.progress(100)
607
+ status_text.text("✅ Complete!")
608
+
609
+ # Step H: Display results
610
+ if os.path.exists(output_path):
611
+ file_size = os.path.getsize(output_path)
612
+ st.write(f"✅ Video saved: {file_size:,} bytes")
613
+
614
+ # Read and store video
615
+ with open(output_path, 'rb') as f:
616
+ st.session_state['video_result'] = f.read()
617
+
618
+ # Clear processing UI
619
+ progress_bar.empty()
620
+ status_text.empty()
621
+
622
+ # Show success
623
+ st.markdown('<div class="success-box">🎉 Video Successfully Processed! 🎉</div>', unsafe_allow_html=True)
624
+
625
+ # Display video
626
+ st.markdown("### 🎬 Your Processed Video:")
627
+ st.video(st.session_state['video_result'])
628
+
629
+ # Download button
630
+ st.download_button(
631
+ label="⬇️ Download Processed Video",
632
+ data=st.session_state['video_result'],
633
+ file_name=f"background_replaced_{int(time.time())}.mp4",
634
+ mime="video/mp4",
635
+ use_container_width=True
636
+ )
637
+
638
+ else:
639
+ st.error("❌ Failed to create video")
640
+
641
+ # Cleanup
642
+ try:
643
+ for temp_file in [video_path, image_path, mask_path, foreground_path, alpha_path, output_path]:
644
+ if os.path.exists(temp_file):
645
+ os.remove(temp_file)
646
+ except:
647
+ pass
648
+
649
+ except Exception as e:
650
+ st.error(f"❌ Error: {str(e)}")
651
+ import traceback
652
+ st.code(traceback.format_exc())
653
+ elif not video_file or not image_file:
654
+ st.info("👆 Upload both a video and background image to start processing!")
655
+
656
+ # Compact footer for iframe
657
+ st.markdown("---")
658
+ st.markdown("""
659
+ <div style="text-align: center; color: #666; padding: 10px;">
660
+ <p><small>🍹 Powered by MatAnyone + Audio Preservation | Optimized for MyAvatar</small></p>
661
+ </div>
662
+ """, unsafe_allow_html=True)
663
+
664
+ if __name__ == "__main__":
665
+ main()
666
+
667
+
668
+
669
+ ]
requirements.txt CHANGED
@@ -1,14 +1,5 @@
1
- # ===== Core runtime =====
2
- # Option A: Keep your current Torch stack (safe for existing builds)
3
- torch==2.2.2
4
- torchvision==0.17.2
5
- torchaudio==2.2.2
6
-
7
- # Option B: Faster CUDA 12.1 wheels for T4 (uncomment to use instead)
8
- # torch==2.3.1+cu121
9
- # torchvision==0.18.1+cu121
10
- # torchaudio==2.3.1+cu121
11
- # --extra-index-url https://download.pytorch.org/whl/cu121
12
 
13
  # ===== Video / image IO =====
14
  opencv-python-headless==4.10.0.84
@@ -40,4 +31,8 @@ huggingface_hub>=0.33.5
40
  ffmpeg-python==0.2.0
41
  psutil==6.0.0
42
  requests==2.31.0
43
- scikit-learn==1.5.1
 
 
 
 
 
1
+ # ===== Core runtime (Torch is installed in Dockerfile with cu121 wheels) =====
2
+ # DO NOT add torch/torchvision/torchaudio here when using the CUDA wheels in Dockerfile.
 
 
 
 
 
 
 
 
 
3
 
4
  # ===== Video / image IO =====
5
  opencv-python-headless==4.10.0.84
 
31
  ffmpeg-python==0.2.0
32
  psutil==6.0.0
33
  requests==2.31.0
34
+ scikit-learn==1.5.1
35
+
36
+ # ===== (Optional) Extras =====
37
+ # safetensors==0.4.5 # if you pull weights that use safetensors
38
+ # aiohttp==3.10.5 # if you later async-fetch assets
ui.py ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ui.py
2
+ """
3
+ BackgroundFX Pro — Gradio UI, background generators, and data sources.
4
+ """
5
+
6
+ import io
7
+ import os
8
+ import time
9
+ import logging
10
+ import tempfile
11
+ import requests
12
+ import numpy as np
13
+ from typing import Optional, Tuple, List, Dict, Any
14
+ from PIL import Image
15
+ import gradio as gr
16
+
17
+ from pipeline import (
18
+ process_video_gpu_optimized, stop_processing, processing_active,
19
+ SAM2_ENABLED, MATANY_ENABLED, GPU_NAME, GPU_MEMORY
20
+ )
21
+
22
+ logger = logging.getLogger("ui")
23
+
24
+
25
+ # ---- Background generators ----
26
+ def create_gradient_background(gradient_type: str, width: int, height: int) -> Image.Image:
27
+ img = np.zeros((height, width, 3), dtype=np.uint8)
28
+ if gradient_type == "sunset":
29
+ for i in range(height):
30
+ r = int(255*(1-i/height) + 128*(i/height))
31
+ g = int(165*(1-i/height) + 64*(i/height))
32
+ b = int(0*(1-i/height) + 128*(i/height))
33
+ img[i, :] = [r, g, b]
34
+ elif gradient_type == "ocean":
35
+ for i in range(height):
36
+ r = int(0*(1-i/height) + 30*(i/height))
37
+ g = int(100*(1-i/height) + 144*(i/height))
38
+ b = int(200*(1-i/height) + 255*(i/height))
39
+ img[i, :] = [r, g, b]
40
+ elif gradient_type == "forest":
41
+ for i in range(height):
42
+ r = int(34*(1-i/height) + 0*(i/height))
43
+ g = int(139*(1-i/height) + 100*(i/height))
44
+ b = int(34*(1-i/height) + 0*(i/height))
45
+ img[i, :] = [r, g, b]
46
+ else:
47
+ for i in range(height):
48
+ r = int(70*(1-i/height) + 20*(i/height))
49
+ g = int(130*(1-i/height) + 100*(i/height))
50
+ b = int(180*(1-i/height) + 255*(i/height))
51
+ img[i, :] = [r, g, b]
52
+ return Image.fromarray(img)
53
+
54
+
55
+ def create_solid_color(color: str, width: int, height: int) -> Image.Image:
56
+ color_map = {
57
+ "white": (255, 255, 255),
58
+ "black": (0, 0, 0),
59
+ "blue": (70, 130, 180),
60
+ "green": (0, 128, 0),
61
+ "red": (220, 20, 60),
62
+ "purple": (128, 0, 128),
63
+ "orange": (255, 165, 0),
64
+ "yellow": (255, 255, 0)
65
+ }
66
+ rgb = color_map.get(color, (70, 130, 180))
67
+ return Image.fromarray(np.full((height, width, 3), rgb, dtype=np.uint8))
68
+
69
+
70
+ def generate_ai_background(prompt: str) -> Tuple[Optional[Image.Image], str]:
71
+ try:
72
+ if not prompt.strip():
73
+ return None, "Please enter a prompt"
74
+ models = [
75
+ "black-forest-labs/FLUX.1-schnell",
76
+ "stabilityai/stable-diffusion-xl-base-1.0",
77
+ "runwayml/stable-diffusion-v1-5"
78
+ ]
79
+ enhanced_prompt = f"professional video background, {prompt}, high quality, 16:9, cinematic lighting, detailed"
80
+ for model in models:
81
+ try:
82
+ url = f"https://api-inference.huggingface.co/models/{model}"
83
+ headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN', 'hf_placeholder')}"}
84
+ payload = {"inputs": enhanced_prompt, "parameters": {"width": 1024, "height": 576, "num_inference_steps": 20, "guidance_scale": 7.5}}
85
+ r = requests.post(url, headers=headers, json=payload, timeout=60, stream=True)
86
+ if r.status_code == 200 and "image" in r.headers.get("content-type", "").lower():
87
+ buf = io.BytesIO(r.content if r.raw is None else r.raw.read())
88
+ return Image.open(buf).convert("RGB"), "AI background generated"
89
+ if r.status_code == 503:
90
+ continue
91
+ except Exception:
92
+ continue
93
+ return create_gradient_background("default", 1920, 1080), "Gradient fallback"
94
+ except Exception as e:
95
+ logger.error(f"AI background error: {e}")
96
+ return create_gradient_background("default", 1920, 1080), "Default due to error"
97
+
98
+
99
+ # ---- MyAvatar API ----
100
+ class MyAvatarAPI:
101
+ def __init__(self):
102
+ self.api_base = "https://app.myavatar.dk/api"
103
+ self.videos_cache: List[Dict[str, Any]] = []
104
+ self.last_refresh = 0
105
+
106
+ def fetch_videos(self) -> List[Dict[str, Any]]:
107
+ try:
108
+ if time.time() - self.last_refresh < 300 and self.videos_cache:
109
+ return self.videos_cache
110
+ r = requests.get(f"{self.api_base}/videos", timeout=10)
111
+ if r.status_code == 200:
112
+ data = r.json()
113
+ self.videos_cache = data.get("videos", [])
114
+ self.last_refresh = time.time()
115
+ return self.videos_cache
116
+ return []
117
+ except Exception as e:
118
+ logger.error(f"Fetch videos failed: {e}")
119
+ return []
120
+
121
+ def get_video_choices(self) -> List[str]:
122
+ vids = self.fetch_videos()
123
+ if not vids:
124
+ return ["No videos available"]
125
+ out = []
126
+ for v in vids:
127
+ out.append(f"{v.get('title','Untitled')} (ID: {v.get('id','?')}) - {v.get('status','?')}")
128
+ return out
129
+
130
+ def get_video_url(self, selection: str) -> Optional[str]:
131
+ if not selection or selection == "No videos available":
132
+ return None
133
+ try:
134
+ if "(ID: " in selection:
135
+ vid = selection.split("(ID: ")[1].split(")")[0]
136
+ for v in self.videos_cache:
137
+ if str(v.get("id")) == vid:
138
+ return v.get("video_url")
139
+ except Exception as e:
140
+ logger.error(f"Parse selection failed: {e}")
141
+ return None
142
+
143
+
144
+ myavatar_api = MyAvatarAPI()
145
+
146
+
147
+ # ---- UI ↔ Pipeline bridge: streaming handler ----
148
+ def process_video_with_background_stoppable(
149
+ input_video: Optional[str],
150
+ myavatar_selection: str,
151
+ background_type: str,
152
+ gradient_type: str,
153
+ solid_color: str,
154
+ custom_background: Optional[str],
155
+ ai_prompt: str
156
+ ):
157
+ # start
158
+ from pipeline import processing_active as _active_ref # ensure we use the module global
159
+ import pipeline # to toggle the flag
160
+
161
+ pipeline.processing_active = True
162
+ try:
163
+ yield gr.update(visible=False), gr.update(visible=True), None, "Starting processing..."
164
+
165
+ # resolve video
166
+ video_path = None
167
+ if input_video:
168
+ video_path = input_video
169
+ elif myavatar_selection and myavatar_selection != "No videos available":
170
+ url = myavatar_api.get_video_url(myavatar_selection)
171
+ if url:
172
+ with requests.get(url, stream=True, timeout=60) as r:
173
+ r.raise_for_status()
174
+ with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
175
+ for chunk in r.iter_content(chunk_size=1 << 20):
176
+ if chunk:
177
+ tmp.write(chunk)
178
+ video_path = tmp.name
179
+
180
+ if not video_path:
181
+ yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
182
+ return
183
+
184
+ # background
185
+ yield gr.update(visible=False), gr.update(visible=True), None, "Generating background..."
186
+ bg_img = None
187
+ if background_type == "gradient":
188
+ bg_img = create_gradient_background(gradient_type, 1920, 1080)
189
+ elif background_type == "solid":
190
+ bg_img = create_solid_color(solid_color, 1920, 1080)
191
+ elif background_type == "custom" and custom_background:
192
+ try:
193
+ from PIL import Image
194
+ bg_img = Image.open(custom_background).convert("RGB")
195
+ except Exception:
196
+ bg_img = None
197
+ elif background_type == "ai" and ai_prompt:
198
+ bg_img, _ = generate_ai_background(ai_prompt)
199
+
200
+ if bg_img is None:
201
+ yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
202
+ return
203
+
204
+ # process
205
+ yield gr.update(visible=False), gr.update(visible=True), None, "Processing video with GPU optimization..."
206
+ bg_array = np.array(bg_img.resize((1280, 720), Image.Resampling.LANCZOS))
207
+ with tempfile.NamedTemporaryFile(suffix="_processed.mp4", delete=False) as tmp_final:
208
+ final_path = tmp_final.name
209
+
210
+ out = process_video_gpu_optimized(video_path, bg_array, final_path)
211
+
212
+ try:
213
+ if video_path != input_video and video_path and os.path.exists(video_path):
214
+ os.unlink(video_path)
215
+ except Exception:
216
+ pass
217
+
218
+ if out and pipeline.processing_active:
219
+ yield gr.update(visible=True), gr.update(visible=False), out, "Video processing completed successfully!"
220
+ else:
221
+ yield gr.update(visible=True), gr.update(visible=False), None, "Processing was stopped or failed"
222
+
223
+ except Exception as e:
224
+ logger.error(f"UI pipeline error: {e}")
225
+ yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {e}"
226
+ finally:
227
+ pipeline.processing_active = False
228
+
229
+
230
+ def stop_processing_button():
231
+ from pipeline import stop_processing
232
+ stop_processing()
233
+ return gr.update(visible=False), "Processing stopped by user"
234
+
235
+
236
+ # ---- UI factory ----
237
+ def create_interface():
238
+ css = """
239
+ .main-container { max-width: 1200px; margin: 0 auto; }
240
+ .status-box { border: 2px solid #4CAF50; border-radius: 10px; padding: 15px; }
241
+ .gradient-preview { border: 2px solid #ddd; border-radius: 10px; }
242
+ """
243
+
244
+ with gr.Blocks(css=css, title="BackgroundFX Pro - GPU Optimized") as app:
245
+ gr.Markdown("# BackgroundFX Pro - GPU Optimized\n### Professional Video Background Replacement with SAM2 + MatAnyone")
246
+
247
+ with gr.Row():
248
+ sam2_status = "Ready" if SAM2_ENABLED else "Disabled"
249
+ matany_status = "Ready" if MATANY_ENABLED else "Disabled"
250
+ gr.Markdown(f"**System Status:** Online | **GPU:** {GPU_NAME} | **SAM2:** {sam2_status} | **MatAnyone:** {matany_status}")
251
+
252
+ with gr.Row():
253
+ with gr.Column(scale=1):
254
+ gr.Markdown("## Video Input")
255
+ with gr.Tabs():
256
+ with gr.Tab("Upload Video"):
257
+ video_upload = gr.Video(label="Upload Video File", height=300)
258
+ with gr.Tab("MyAvatar Videos"):
259
+ refresh_btn = gr.Button("Refresh Videos", size="sm")
260
+ myavatar_dropdown = gr.Dropdown(label="Select MyAvatar Video", choices=["Click refresh to load videos"], value=None)
261
+ video_preview = gr.Video(label="Preview", height=200)
262
+
263
+ gr.Markdown("## Background Options")
264
+ background_type = gr.Radio(choices=["gradient", "solid", "custom", "ai"], value="gradient", label="Background Type")
265
+
266
+ with gr.Group():
267
+ gradient_type = gr.Dropdown(choices=["sunset", "ocean", "forest", "default"], value="sunset", label="Gradient Type", visible=True)
268
+ gradient_preview = gr.Image(label="Gradient Preview", height=150)
269
+
270
+ solid_color = gr.Dropdown(choices=["white", "black", "blue", "green", "red", "purple", "orange", "yellow"], value="blue", label="Solid Color", visible=False)
271
+ color_preview = gr.Image(label="Color Preview", height=150, visible=False)
272
+
273
+ custom_bg_upload = gr.Image(label="Upload Custom Background", type="filepath", visible=False)
274
+
275
+ ai_prompt = gr.Textbox(label="AI Background Prompt", placeholder="Describe the background you want...", visible=False)
276
+ ai_generate_btn = gr.Button("Generate AI Background", visible=False)
277
+ ai_preview = gr.Image(label="AI Generated Background", height=150, visible=False)
278
+
279
+ with gr.Row():
280
+ process_btn = gr.Button("Process Video", variant="primary", size="lg")
281
+ stop_btn = gr.Button("Stop Processing", variant="stop", size="lg", visible=False)
282
+
283
+ with gr.Column(scale=1):
284
+ gr.Markdown("## Results")
285
+ result_video = gr.Video(label="Processed Video", height=400)
286
+ status_output = gr.Textbox(label="Processing Status", lines=5, max_lines=10, elem_classes=["status-box"])
287
+ gr.Markdown("""
288
+ ### Processing Pipeline:
289
+ 1. **SAM2 Segmentation** — GPU-accelerated person detection
290
+ 2. **MatAnyone Matting** — temporal consistency
291
+ 3. **GPU Compositing** — real-time background replacement
292
+ 4. **Memory Optimization** — chunked processing + OOM recovery
293
+ """)
294
+
295
+ # handlers
296
+ def update_background_options(bg_type):
297
+ return {
298
+ gradient_type: gr.update(visible=(bg_type == "gradient")),
299
+ gradient_preview: gr.update(visible=(bg_type == "gradient")),
300
+ solid_color: gr.update(visible=(bg_type == "solid")),
301
+ color_preview: gr.update(visible=(bg_type == "solid")),
302
+ custom_bg_upload: gr.update(visible=(bg_type == "custom")),
303
+ ai_prompt: gr.update(visible=(bg_type == "ai")),
304
+ ai_generate_btn: gr.update(visible=(bg_type == "ai")),
305
+ ai_preview: gr.update(visible=(bg_type == "ai")),
306
+ }
307
+
308
+ def update_gradient_preview(grad_type):
309
+ try:
310
+ return create_gradient_background(grad_type, 400, 200)
311
+ except Exception:
312
+ return None
313
+
314
+ def update_color_preview(color):
315
+ try:
316
+ return create_solid_color(color, 400, 200)
317
+ except Exception:
318
+ return None
319
+
320
+ def refresh_myavatar_videos():
321
+ try:
322
+ return gr.update(choices=myavatar_api.get_video_choices(), value=None)
323
+ except Exception:
324
+ return gr.update(choices=["Error loading videos"], value=None)
325
+
326
+ def load_video_preview(selection):
327
+ try:
328
+ return myavatar_api.get_video_url(selection)
329
+ except Exception:
330
+ return None
331
+
332
+ def generate_ai_bg(prompt):
333
+ bg_img, _ = generate_ai_background(prompt)
334
+ return bg_img
335
+
336
+ background_type.change(
337
+ fn=update_background_options,
338
+ inputs=[background_type],
339
+ outputs=[gradient_type, gradient_preview, solid_color, color_preview, custom_bg_upload, ai_prompt, ai_generate_btn, ai_preview]
340
+ )
341
+ gradient_type.change(fn=update_gradient_preview, inputs=[gradient_type], outputs=[gradient_preview])
342
+ solid_color.change(fn=update_color_preview, inputs=[solid_color], outputs=[color_preview])
343
+ refresh_btn.click(fn=refresh_myavatar_videos, outputs=[myavatar_dropdown])
344
+ myavatar_dropdown.change(fn=load_video_preview, inputs=[myavatar_dropdown], outputs=[video_preview])
345
+ ai_generate_btn.click(fn=generate_ai_bg, inputs=[ai_prompt], outputs=[ai_preview])
346
+
347
+ process_btn.click(
348
+ fn=process_video_with_background_stoppable,
349
+ inputs=[video_upload, myavatar_dropdown, background_type, gradient_type, solid_color, custom_bg_upload, ai_prompt],
350
+ outputs=[process_btn, stop_btn, result_video, status_output]
351
+ )
352
+ stop_btn.click(fn=stop_processing_button, outputs=[stop_btn, status_output])
353
+
354
+ app.load(fn=lambda: create_gradient_background("sunset", 400, 200), outputs=[gradient_preview])
355
+
356
+ return app
utils/oom.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # utils/oom.py
2
+ import os, logging, gc, torch
3
+
4
+ log = logging.getLogger("oom")
5
+
6
+ # Tunables (can be overridden via Space → Environment Variables)
7
+ OOM_MAX_RETRIES = int(os.getenv("OOM_MAX_RETRIES", "3")) # retries per batch
8
+ OOM_DOWNSCALE_RATIO = float(os.getenv("OOM_DOWNSCALE_RATIO", "0.85")) # scale *= 0.85 on OOM
9
+ OOM_MIN_SIDE = int(os.getenv("OOM_MIN_SIDE", "640")) # don’t go below this
10
+ OOM_MIN_CHUNK = int(os.getenv("OOM_MIN_CHUNK", "8")) # don’t go below this
11
+
12
+ def cuda_clear():
13
+ """Aggressively clear CUDA & Python GC."""
14
+ try:
15
+ if torch.cuda.is_available():
16
+ torch.cuda.synchronize()
17
+ torch.cuda.empty_cache()
18
+ try:
19
+ torch.cuda.reset_peak_memory_stats()
20
+ except Exception:
21
+ pass
22
+ except Exception:
23
+ pass
24
+ gc.collect()
25
+
26
+ class OOMRetry:
27
+ """
28
+ Context manager that catches CUDA OOM and cleans up so the caller can retry.
29
+ Usage:
30
+ with OOMRetry() as guard:
31
+ # ... GPU work ...
32
+ if guard.oom:
33
+ # adapt (e.g., downscale/chunk) and retry
34
+ """
35
+ def __init__(self):
36
+ self.oom = False
37
+ def __enter__(self):
38
+ self.oom = False
39
+ return self
40
+ def __exit__(self, exc_type, exc, tb):
41
+ if exc_type is not None and "CUDA out of memory" in str(exc):
42
+ self.oom = True
43
+ log.warning("[OOM] CUDA OOM caught — cleaning caches.")
44
+ cuda_clear()
45
+ return True # suppress the exception so caller can retry
46
+ return False
47
+
48
+ def adapt_resolution(current_max_side: int) -> int:
49
+ """Reduce resolution on OOM, but not below OOM_MIN_SIDE."""
50
+ new_side = max(int(current_max_side * OOM_DOWNSCALE_RATIO), OOM_MIN_SIDE)
51
+ if new_side < current_max_side:
52
+ log.warning(f"[OOM] Downscaling max_side: {current_max_side} -> {new_side}")
53
+ return new_side
54
+
55
+ def adapt_chunk(current_chunk: int) -> int:
56
+ """Reduce chunk size on OOM, but not below OOM_MIN_CHUNK."""
57
+ new_chunk = max(int(current_chunk * OOM_DOWNSCALE_RATIO), OOM_MIN_CHUNK)
58
+ if new_chunk < current_chunk:
59
+ log.warning(f"[OOM] Reducing chunk size: {current_chunk} -> {new_chunk}")
60
+ return new_chunk