Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 12, 2025

Commit

3afce52

1 Parent(s): 8f6e77a

fix3

Browse files

Files changed (7) hide show

Dockerfile +95 -35
README.md +3 -28
app.py +17 -736
pipeline.py +669 -0
requirements.txt +7 -12
ui.py +356 -0
utils/oom.py +60 -0

Dockerfile CHANGED Viewed

@@ -1,46 +1,106 @@
-FROM python:3.10-slim
-# Install system dependencies
-RUN apt-get update && apt-get install -y \
-    git \
-    ffmpeg \
-    libgl1-mesa-glx \
-    libglib2.0-0 \
-    libsm6 \
-    libxext6 \
-    libxrender-dev \
-    libgomp1 \
-    && rm -rf /var/lib/apt/lists/*
-# Set working directory
-WORKDIR /code
-# Copy requirements first for better caching
-COPY ./requirements.txt /code/requirements.txt
-# Install Python dependencies
-RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
-# Clone SAM2 and MatAnyone repositories
-RUN git clone https://github.com/facebookresearch/segment-anything-2.git /code/third_party/sam2
-RUN git clone https://github.com/pq-yang/MatAnyone.git /code/third_party/matanyone
-# Set Python path
-ENV PYTHONPATH="${PYTHONPATH}:/code/third_party/sam2:/code/third_party/matanyone"
-# Copy the rest of the application
-COPY . /code
-# Set environment variables for GPU optimization
-ENV PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128
-ENV OMP_NUM_THREADS=2
-ENV HF_HOME=/code/.cache
-# Create cache directory
-RUN mkdir -p /code/.cache
-# Expose port
-EXPOSE 7860
-# Run the application
-CMD ["python", "app.py"]

+# ===============================
+# BackgroundFX Pro — Dockerfile
+# Hugging Face Spaces Pro (GPU)
+# ===============================
+# CUDA base image (T4-friendly). Build stage has NO GPU access.
+FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
+# --- Build args (override in Space Settings → Build args) ---
+# Pin external repos for reproducible builds
+ARG SAM2_SHA=__PIN_ME__
+ARG MATANYONE_SHA=__PIN_ME__
+# Weights to pre-warm (public models only)
+ARG SAM2_MODEL_ID=facebook/sam2
+ARG SAM2_VARIANT=sam2_hiera_large    # sam2_hiera_small | sam2_hiera_base | sam2_hiera_large
+ARG MATANY_REPO_ID=PeiqingYang/MatAnyone
+ARG MATANY_FILENAME=matanyone_v1.0.pth
+# --- Create non-root user (uid 1000 required by HF) ---
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+    PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+# --- System packages ---
+USER root
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    git ffmpeg libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1 \
+ && rm -rf /var/lib/apt/lists/*
+USER user
+# --- Python & CUDA wheels (Torch cu121) ---
+RUN pip install --no-cache-dir --upgrade pip
+RUN pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu121 \
+    torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
+# --- App Python deps ---
+COPY --chown=user requirements.txt ./requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+# --- Clone external repos (SAM2 & MatAnyone) ---
+RUN git clone https://github.com/facebookresearch/segment-anything-2.git third_party/sam2 && \
+    cd third_party/sam2 && \
+    if [ "${SAM2_SHA}" != "__PIN_ME__" ]; then git checkout ${SAM2_SHA}; fi
+RUN git clone https://github.com/pq-yang/MatAnyone.git third_party/matanyone && \
+    cd third_party/matanyone && \
+    if [ "${MATANYONE_SHA}" != "__PIN_ME__" ]; then git checkout ${MATANYONE_SHA}; fi
+# --- Pre-warm model weights into image cache (public models only) ---
+# NOTE: Build time has no access to private tokens on Spaces for gated models.
+ENV HF_HOME_BUILD=${HOME}/.cache/huggingface
+RUN python - <<'PY'
+import os
+from pathlib import Path
+from huggingface_hub import hf_hub_download
+SAM2_MODEL_ID   = os.environ.get("SAM2_MODEL_ID", "facebook/sam2")
+SAM2_VARIANT    = os.environ.get("SAM2_VARIANT", "sam2_hiera_large")
+MATANY_REPO_ID  = os.environ.get("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
+MATANY_FILENAME = os.environ.get("MATANY_FILENAME", "matanyone_v1.0.pth")
+VARIANT_FILES = {
+    "sam2_hiera_small": "sam2_hiera_small.pt",
+    "sam2_hiera_base":  "sam2_hiera_base.pt",
+    "sam2_hiera_large": "sam2_hiera_large.pt",
+}
+ckpt_name = VARIANT_FILES.get(SAM2_VARIANT, VARIANT_FILES["sam2_hiera_large"])
+cache_dir = os.environ.get("HF_HOME_BUILD", str(Path.home() / ".cache" / "huggingface"))
+Path(cache_dir).mkdir(parents=True, exist_ok=True)
+print(f"[PREWARM] SAM2: repo={SAM2_MODEL_ID}, file={ckpt_name}")
+p1 = hf_hub_download(repo_id=SAM2_MODEL_ID, filename=ckpt_name, local_dir=cache_dir)
+print(f"[PREWARM] -> {p1}")
+print(f"[PREWARM] MatAnyone: repo={MATANY_REPO_ID}, file={MATANY_FILENAME}")
+p2 = hf_hub_download(repo_id=MATANY_REPO_ID, filename=MATANY_FILENAME, local_dir=cache_dir)
+print(f"[PREWARM] -> {p2}")
+print("[PREWARM] Done.")
+PY
+# --- App code ---
+COPY --chown=user . $HOME/app
+# --- Runtime environment ---
+# Caches in /data persist across Space restarts
+ENV PYTHONUNBUFFERED=1 \
+    OMP_NUM_THREADS=2 \
+    TOKENIZERS_PARALLELISM=false \
+    HF_HOME=/data/.cache/huggingface \
+    TORCH_HOME=/data/.cache/torch \
+    MPLCONFIGDIR=/data/.cache/matplotlib \
+    PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
+    PYTHONPATH="$PYTHONPATH:$HOME/app/third_party/sam2:$HOME/app/third_party/matanyone" \
+    PORT=7860 \
+    SAM2_MODEL_ID=${SAM2_MODEL_ID} \
+    SAM2_VARIANT=${SAM2_VARIANT} \
+    MATANY_REPO_ID=${MATANY_REPO_ID} \
+    MATANY_FILENAME=${MATANY_FILENAME}
+# --- Networking / Entrypoint ---
+EXPOSE 7860
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,12 +1,10 @@
 ---
-title: 🎬 BackgroundFX Pro - SAM2 + MatAnyOne
 emoji: 🎥
 colorFrom: indigo
 colorTo: purple
-sdk: gradio
-sdk_version: 5.42.0
-app_file: app.py
-pinned: false
 license: mit
 tags:
   - video
@@ -15,27 +13,4 @@ tags:
   - matting
   - SAM2
   - MatAnyOne
-  - gradio
 ---
-# 🎬 BackgroundFX Pro — SAM2 + MatAnyOne Edition
-**State-of-the-art video background replacement** with a simple Gradio UI.
-- ✅ **Segment Anything Model 2 (SAM2)** video propagation
-- ✅ **MatAnyOne (CVPR 2025)** professional matting refinement
-- ✅ GPU acceleration when available
-- ✅ Robust fallbacks (SAM2-only → GrabCut)
-- ✅ Backgrounds: gradient / solid / custom / AI (HF Inference API)
-## 🚀 How to use
-1. Upload a **video** (left panel)
-2. Choose your **background** (gradient / solid / custom / AI)
-3. Click **Process Video**
-4. Download the **final video** in the Results panel 🎉
-## 📦 Files
-- `app.py` — main app (SAM2 + MatAnyOne + UI)
-- `requirements.txt` — Python dependencies (commit-pinned for stability)
-## 🛡️ Licenses
-Wrapper code: MIT.
-Models: see their repos:
-- [SAM2](https://github.com/facebookresearch/segment-anything-2)
-- [MatAnyOne](https://github.com/pq-yang/MatAnyOne)
-👤 Maintainer: Mogens Rye
-📧 Contact: mogens@ryeoutsourcing.dk

 ---
+title: 🎬 BackgroundFX Pro - SAM2 + MatAnyone
 emoji: 🎥
 colorFrom: indigo
 colorTo: purple
+sdk: docker
+app_port: 7860
 license: mit
 tags:
   - video
   - matting
   - SAM2
   - MatAnyOne
 ---

app.py CHANGED Viewed

@@ -1,768 +1,49 @@
 #!/usr/bin/env python3
 """
-BackgroundFX Pro - GPU Optimized Version
-Professional video background replacement with SAM2 + MatAnyone
 """
 import os
 import sys
-import gc
-import cv2
-import json
-import time
-import torch
 import logging
-import requests
-import tempfile
-import subprocess
-import threading
-import numpy as np
-import io
-from PIL import Image
-from pathlib import Path
-from datetime import datetime
-from typing import Optional, Tuple, List, Dict, Any
-import gradio as gr
-# Import optimized modules
-from utils.accelerator import pick_device, torch_global_tuning, memory_checkpoint, cleanup
-from models.sam2_loader import SAM2Predictor
-from models.matanyone_loader import MatAnyoneSession
-# Configure logging
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
-logger = logging.getLogger(__name__)
-try:
-    from sklearn.cluster import KMeans
-    SKLEARN_AVAILABLE = True
-except ImportError:
-    SKLEARN_AVAILABLE = False
-    logger.warning("sklearn not available, using fallback color detection")
-# Global processing control
-processing_active = False
-processing_thread = None
-# Initialize optimized system
-device = pick_device()
-torch_global_tuning()
-GPU_NAME = torch.cuda.get_device_name(0) if torch.cuda.is_available() else "CPU"
-GPU_MEMORY = torch.cuda.get_device_properties(0).total_memory / (1024**3) if torch.cuda.is_available() else 0
-MODEL_SIZE = "large" if "T4" in GPU_NAME else "base"
-logger.info(f"System initialized - Device: {device} | GPU: {GPU_NAME} | Memory: {GPU_MEMORY:.1f}GB")
-# Environment variables for model control
-SAM2_ENABLED = os.environ.get("ENABLE_SAM2", "1") == "1"
-MATANY_ENABLED = os.environ.get("ENABLE_MATANY", "1") == "1"
-MAX_SIDE = int(os.environ.get("MAX_SIDE", "1280"))
-FRAME_CHUNK = int(os.environ.get("FRAME_CHUNK", "64"))
-# Global optimized model instances
-sam2_predictor = None
-matanyone_session = None
-def get_sam2():
-    """Get SAM2 predictor with lazy loading"""
-    global sam2_predictor
-    if sam2_predictor is None and SAM2_ENABLED:
-        try:
-            sam2_predictor = SAM2Predictor(device).load()
-            logger.info("SAM2 loaded with optimized pipeline")
-        except Exception as e:
-            logger.error(f"SAM2 loading failed: {e}")
-            sam2_predictor = None
-    return sam2_predictor
-def get_matanyone():
-    """Get MatAnyone session with lazy loading"""
-    global matanyone_session
-    if matanyone_session is None and MATANY_ENABLED:
-        try:
-            repo_id = os.environ.get("MATANY_REPO_ID", "PeiqingYang/MatAnyone")
-            filename = os.environ.get("MATANY_FILENAME", "matanyone_v1.0.pth")
-            matanyone_session = MatAnyoneSession(device).load(
-                repo_id=repo_id,
-                filename=filename
-            )
-            logger.info("MatAnyone loaded with optimized pipeline")
-        except Exception as e:
-            logger.error(f"MatAnyone loading failed: {e}")
-            matanyone_session = None
-    return matanyone_session
-def iter_video_frames(path, target_max_side=MAX_SIDE, chunk=FRAME_CHUNK):
-    """Memory-mapped video frame generator"""
-    import cv2
-    cap = cv2.VideoCapture(path)
-    if not cap.isOpened():
-        raise RuntimeError("Cannot open video")
-    # Get video properties
-    w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
-    h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    fps = cap.get(cv2.CAP_PROP_FPS) or 25.0
-    # Scale to fit GPU memory constraints
-    scale = min(1.0, float(target_max_side) / float(max(w, h)))
-    new_w, new_h = (w, h) if scale >= 0.999 else (int(w*scale)//2*2, int(h*scale)//2*2)
-    batch = []
-    while True:
-        if not processing_active:
-            cap.release()
-            return
-        ok, f = cap.read()
-        if not ok:
-            if batch:
-                yield batch, fps, (w, h), (new_w, new_h)
-            break
-        if new_w != w or new_h != h:
-            f = cv2.resize(f, (new_w, new_h), interpolation=cv2.INTER_AREA)
-        f = cv2.cvtColor(f, cv2.COLOR_BGR2RGB)
-        batch.append(f)
-        if len(batch) >= chunk:
-            yield batch, fps, (w, h), (new_w, new_h)
-            batch = []
-    cap.release()
-def composite_frame(frame_rgb, bg_rgb, alpha01):
-    """GPU-optimized frame compositing"""
-    if bg_rgb is None:
-        bg = np.full_like(frame_rgb, 200, dtype=np.uint8)
-    else:
-        bg = bg_rgb
-        if bg.shape[:2] != frame_rgb.shape[:2]:
-            bg = cv2.resize(bg, (frame_rgb.shape[1], frame_rgb.shape[0]), interpolation=cv2.INTER_AREA)
-    a = np.clip(alpha01[..., None], 0.0, 1.0)
-    out = (frame_rgb.astype("float32") * a + bg.astype("float32") * (1.0 - a)).astype("uint8")
-    return out
-def cheap_fallback_alpha(fr, seed_mask=None):
-    """Fast CPU fallback alpha generation"""
-    if seed_mask is not None:
-        return seed_mask
-    # Center-focused soft alpha
-    H, W = fr.shape[:2]
-    yy, xx = np.mgrid[0:H, 0:W].astype("float32")
-    cx, cy = W/2.0, H/2.0
-    r = np.sqrt((xx-cx)**2 + (yy-cy)**2) / max(W, H)
-    a = 1.0 - np.clip((r-0.2)/0.4, 0.0, 1.0)
-    return a.astype("float32")
-def process_video_gpu_optimized(input_path, bg_image_rgb=None, out_path="output.mp4"):
-    """GPU-optimized video processing pipeline"""
-    global processing_active
-    writer = None
-    seed_mask = None
-    total = 0
-    try:
-        for frames, fps, orig_hw, new_hw in iter_video_frames(input_path, MAX_SIDE, FRAME_CHUNK):
-            if not processing_active:
-                logger.info("Processing stopped by user")
-                break
-            H, W = frames[0].shape[:2]
-            if writer is None:
-                writer = cv2.VideoWriter(
-                    out_path, cv2.VideoWriter_fourcc(*"mp4v"), fps, (W, H)
-                )
-            # First frame: try SAM2 for seed mask
-            if seed_mask is None:
-                try:
-                    sam2 = get_sam2()
-                    if sam2:
-                        seed_mask = sam2.first_frame_mask(frames[0].astype("float32") / 255.0)
-                        seed_mask = (cv2.GaussianBlur(seed_mask, (0, 0), 1.0) > 0.5).astype("float32")
-                        logger.info("SAM2 seed mask generated")
-                except Exception as e:
-                    logger.warning(f"SAM2 failed, continuing without: {e}")
-                    seed_mask = None
-            # Professional matting pipeline
-            matany = get_matanyone()
-            if matany and MATANY_ENABLED:
-                try:
-                    with torch.autocast(device_type=str(device).split(":")[0], dtype=torch.float16, enabled=(device.type=="cuda")):
-                        for i, fr in enumerate(frames):
-                            if not processing_active:
-                                break
-                            alpha = matany.step(fr, seed_mask if total == 0 and i == 0 else None)
-                            comp = composite_frame(fr, bg_image_rgb, alpha)
-                            writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-                            total += 1
-                            if total % 64 == 0:
-                                cleanup()
-                                memory_checkpoint(f"frames={total}")
-                except Exception as e:
-                    logger.warning(f"MatAnyone failed: {e}")
-                    matany = None
-            # Fallback if MatAnyone unavailable
-            if not matany:
-                for fr in frames:
-                    if not processing_active:
-                        break
-                    alpha = cheap_fallback_alpha(fr, seed_mask)
-                    comp = composite_frame(fr, bg_image_rgb, alpha)
-                    writer.write(cv2.cvtColor(comp, cv2.COLOR_RGB2BGR))
-                    total += 1
-                    if total % 64 == 0:
-                        cleanup()
-            memory_checkpoint(f"processed={total}")
-    except Exception as e:
-        logger.error(f"Processing error: {e}")
-    finally:
-        if writer:
-            writer.release()
-        cleanup()
-    return out_path if processing_active else None
-def stop_processing():
-    """Stop video processing"""
-    global processing_active
-    processing_active = False
-    return gr.update(visible=False), "Processing stopped by user"
-class MyAvatarAPI:
-    """MyAvatar API integration"""
-    def __init__(self):
-        self.api_base = "https://app.myavatar.dk/api"
-        self.videos_cache = []
-        self.last_refresh = 0
-    def fetch_videos(self) -> List[Dict[str, Any]]:
-        """Fetch videos from MyAvatar API"""
-        try:
-            if time.time() - self.last_refresh < 300 and self.videos_cache:
-                return self.videos_cache
-            response = requests.get(f"{self.api_base}/videos", timeout=10)
-            if response.status_code == 200:
-                data = response.json()
-                self.videos_cache = data.get('videos', [])
-                self.last_refresh = time.time()
-                logger.info(f"Fetched {len(self.videos_cache)} videos from MyAvatar")
-                return self.videos_cache
-            else:
-                logger.error(f"API error: {response.status_code}")
-                return []
-        except Exception as e:
-            logger.error(f"Error fetching videos: {e}")
-            return []
-    def get_video_choices(self) -> List[str]:
-        """Get video choices for dropdown"""
-        videos = self.fetch_videos()
-        if not videos:
-            return ["No videos available"]
-        choices = []
-        for video in videos:
-            title = video.get('title', 'Untitled')
-            video_id = video.get('id', 'unknown')
-            status = video.get('status', 'unknown')
-            choices.append(f"{title} (ID: {video_id}) - {status}")
-        return choices
-    def get_video_url(self, selection: str) -> Optional[str]:
-        """Extract video URL from selection"""
-        if not selection or selection == "No videos available":
-            return None
-        try:
-            if "(ID: " in selection:
-                video_id = selection.split("(ID: ")[1].split(")")[0]
-                for video in self.videos_cache:
-                    if str(video.get('id')) == video_id:
-                        return video.get('video_url')
-            return None
-        except Exception as e:
-            logger.error(f"Error extracting video URL: {e}")
-            return None
-# Initialize API
-myavatar_api = MyAvatarAPI()
-def create_gradient_background(gradient_type: str, width: int, height: int) -> Image.Image:
-    """Create gradient backgrounds"""
-    try:
-        img = np.zeros((height, width, 3), dtype=np.uint8)
-        if gradient_type == "sunset":
-            for i in range(height):
-                ratio = i / height
-                r = int(255 * (1 - ratio) + 128 * ratio)
-                g = int(165 * (1 - ratio) + 64 * ratio)
-                b = int(0 * (1 - ratio) + 128 * ratio)
-                img[i, :] = [r, g, b]
-        elif gradient_type == "ocean":
-            for i in range(height):
-                ratio = i / height
-                r = int(0 * (1 - ratio) + 30 * ratio)
-                g = int(100 * (1 - ratio) + 144 * ratio)
-                b = int(200 * (1 - ratio) + 255 * ratio)
-                img[i, :] = [r, g, b]
-        elif gradient_type == "forest":
-            for i in range(height):
-                ratio = i / height
-                r = int(34 * (1 - ratio) + 0 * ratio)
-                g = int(139 * (1 - ratio) + 100 * ratio)
-                b = int(34 * (1 - ratio) + 0 * ratio)
-                img[i, :] = [r, g, b]
-        else:  # default blue
-            for i in range(height):
-                ratio = i / height
-                r = int(70 * (1 - ratio) + 20 * ratio)
-                g = int(130 * (1 - ratio) + 100 * ratio)
-                b = int(180 * (1 - ratio) + 255 * ratio)
-                img[i, :] = [r, g, b]
-        return Image.fromarray(img)
-    except Exception as e:
-        logger.error(f"Error creating gradient: {e}")
-        img = np.full((height, width, 3), [70, 130, 180], dtype=np.uint8)
-        return Image.fromarray(img)
-def create_solid_color(color: str, width: int, height: int) -> Image.Image:
-    """Create solid color backgrounds"""
-    color_map = {
-        "white": (255, 255, 255),
-        "black": (0, 0, 0),
-        "blue": (70, 130, 180),
-        "green": (0, 128, 0),
-        "red": (220, 20, 60),
-        "purple": (128, 0, 128),
-        "orange": (255, 165, 0),
-        "yellow": (255, 255, 0)
-    }
-    rgb = color_map.get(color, (70, 130, 180))
-    img = np.full((height, width, 3), rgb, dtype=np.uint8)
-    return Image.fromarray(img)
-def generate_ai_background(prompt: str) -> Tuple[Optional[Image.Image], str]:
-    """Generate AI background using Hugging Face Inference API"""
-    try:
-        if not prompt.strip():
-            return None, "Please enter a prompt"
-        models = [
-            "black-forest-labs/FLUX.1-schnell",
-            "stabilityai/stable-diffusion-xl-base-1.0",
-            "runwayml/stable-diffusion-v1-5"
-        ]
-        enhanced_prompt = f"professional video background, {prompt}, high quality, 16:9 aspect ratio, cinematic lighting, detailed"
-        for model in models:
-            try:
-                logger.info(f"Trying AI generation with {model}...")
-                api_url = f"https://api-inference.huggingface.co/models/{model}"
-                headers = {
-                    "Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN', 'hf_placeholder')}"
-                }
-                payload = {
-                    "inputs": enhanced_prompt,
-                    "parameters": {
-                        "width": 1024,
-                        "height": 576,
-                        "num_inference_steps": 20,
-                        "guidance_scale": 7.5
-                    }
-                }
-                response = requests.post(api_url, headers=headers, json=payload, timeout=30)
-                if response.status_code == 200:
-                    image = Image.open(io.BytesIO(response.content))
-                    logger.info(f"AI background generated successfully with {model}")
-                    return image, f"AI background generated: {prompt}"
-                elif response.status_code == 503:
-                    logger.warning(f"Model {model} is loading, trying next...")
-                    continue
-                else:
-                    logger.warning(f"Error with {model}: {response.status_code}")
-                    continue
-            except Exception as e:
-                logger.warning(f"Error with {model}: {e}")
-                continue
-        logger.info("AI generation failed, creating intelligent gradient fallback...")
-        return create_intelligent_gradient(prompt), f"Created gradient background inspired by: {prompt}"
-    except Exception as e:
-        logger.error(f"Error in AI background generation: {e}")
-        return create_gradient_background("default", 1920, 1080), f"Created default background due to error: {str(e)}"
-def create_intelligent_gradient(prompt: str) -> Image.Image:
-    """Create intelligent gradient based on prompt analysis"""
-    prompt_lower = prompt.lower()
-    if any(word in prompt_lower for word in ["sunset", "orange", "warm", "fire", "autumn"]):
-        return create_gradient_background("sunset", 1920, 1080)
-    elif any(word in prompt_lower for word in ["ocean", "sea", "blue", "water", "sky", "calm"]):
-        return create_gradient_background("ocean", 1920, 1080)
-    elif any(word in prompt_lower for word in ["forest", "green", "nature", "trees", "jungle"]):
-        return create_gradient_background("forest", 1920, 1080)
-    else:
-        return create_gradient_background("default", 1920, 1080)
-def process_video_with_background_stoppable(
-    input_video: Optional[str],
-    myavatar_selection: str,
-    background_type: str,
-    gradient_type: str,
-    solid_color: str,
-    custom_background: Optional[str],
-    ai_prompt: str
-):
-    """Main processing function with stop capability"""
-    global processing_active
-    processing_active = True
-    try:
-        # Show stop button, hide process button
-        yield gr.update(visible=False), gr.update(visible=True), None, "Starting processing..."
-        # Determine video source
-        video_path = None
-        if input_video:
-            video_path = input_video
-            logger.info("Using uploaded video")
-        elif myavatar_selection and myavatar_selection != "No videos available":
-            video_url = myavatar_api.get_video_url(myavatar_selection)
-            if video_url:
-                response = requests.get(video_url)
-                if response.status_code == 200:
-                    temp_video = tempfile.NamedTemporaryFile(suffix='.mp4', delete=False)
-                    temp_video.write(response.content)
-                    temp_video.close()
-                    video_path = temp_video.name
-                    logger.info("Using MyAvatar video")
-        if not video_path:
-            yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
-            return
-        # Generate background
-        yield gr.update(visible=False), gr.update(visible=True), None, "Generating background..."
-        background_image = None
-        if background_type == "gradient":
-            background_image = create_gradient_background(gradient_type, 1920, 1080)
-        elif background_type == "solid":
-            background_image = create_solid_color(solid_color, 1920, 1080)
-        elif background_type == "custom" and custom_background:
-            background_image = Image.open(custom_background)
-        elif background_type == "ai" and ai_prompt:
-            bg_img, ai_msg = generate_ai_background(ai_prompt)
-            background_image = bg_img
-        if not background_image:
-            yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
-            return
-        # Process video
-        yield gr.update(visible=False), gr.update(visible=True), None, "Processing video with GPU optimization..."
-        bg_array = np.array(background_image.resize((1280, 720), Image.Resampling.LANCZOS))
-        with tempfile.NamedTemporaryFile(suffix='_processed.mp4', delete=False) as tmp_final:
-            final_video_path = tmp_final.name
-        result_path = process_video_gpu_optimized(video_path, bg_array, final_video_path)
-        # Cleanup
-        try:
-            if video_path != input_video:
-                os.unlink(video_path)
-        except:
-            pass
-        if result_path and processing_active:
-            yield gr.update(visible=True), gr.update(visible=False), result_path, "Video processing completed successfully!"
-        else:
-            yield gr.update(visible=True), gr.update(visible=False), None, "Processing was stopped or failed"
-    except Exception as e:
-        logger.error(f"Error in video processing: {e}")
-        yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {str(e)}"
-    finally:
-        processing_active = False
-def create_interface():
-    """Create the Gradio interface"""
-    logger.info("Creating Gradio interface...")
-    logger.info(f"Device: {device} | GPU: {GPU_NAME} | Memory: {GPU_MEMORY:.1f}GB")
-    css = """
-    .main-container { max-width: 1200px; margin: 0 auto; }
-    .status-box { border: 2px solid #4CAF50; border-radius: 10px; padding: 15px; }
-    .gradient-preview { border: 2px solid #ddd; border-radius: 10px; }
-    """
-    with gr.Blocks(css=css, title="BackgroundFX Pro - GPU Optimized") as app:
-        gr.Markdown("""
-        # BackgroundFX Pro - GPU Optimized
-        ### Professional Video Background Replacement with SAM2 + MatAnyone
-        """)
-        with gr.Row():
-            sam2_status = "Ready" if SAM2_ENABLED else "Disabled"
-            matany_status = "Ready" if MATANY_ENABLED else "Disabled"
-            gr.Markdown(f"""
-            **System Status:** Online | **GPU:** {GPU_NAME} | **SAM2:** {sam2_status} | **MatAnyone:** {matany_status}
-            """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                gr.Markdown("## Video Input")
-                with gr.Tabs():
-                    with gr.Tab("Upload Video"):
-                        video_upload = gr.Video(label="Upload Video File", height=300)
-                    with gr.Tab("MyAvatar Videos"):
-                        refresh_btn = gr.Button("Refresh Videos", size="sm")
-                        myavatar_dropdown = gr.Dropdown(
-                            label="Select MyAvatar Video",
-                            choices=["Click refresh to load videos"],
-                            value=None
-                        )
-                        video_preview = gr.Video(label="Preview", height=200)
-                gr.Markdown("## Background Options")
-                background_type = gr.Radio(
-                    choices=["gradient", "solid", "custom", "ai"],
-                    value="gradient",
-                    label="Background Type"
-                )
-                with gr.Group():
-                    gradient_type = gr.Dropdown(
-                        choices=["sunset", "ocean", "forest", "default"],
-                        value="sunset",
-                        label="Gradient Type",
-                        visible=True
-                    )
-                    gradient_preview = gr.Image(label="Gradient Preview", height=150)
-                    solid_color = gr.Dropdown(
-                        choices=["white", "black", "blue", "green", "red", "purple", "orange", "yellow"],
-                        value="blue",
-                        label="Solid Color",
-                        visible=False
-                    )
-                    color_preview = gr.Image(label="Color Preview", height=150, visible=False)
-                    custom_bg_upload = gr.Image(
-                        label="Upload Custom Background",
-                        type="filepath",
-                        visible=False
-                    )
-                    ai_prompt = gr.Textbox(
-                        label="AI Background Prompt",
-                        placeholder="Describe the background you want...",
-                        visible=False
-                    )
-                    ai_generate_btn = gr.Button("Generate AI Background", visible=False)
-                    ai_preview = gr.Image(label="AI Generated Background", height=150, visible=False)
-                with gr.Row():
-                    process_btn = gr.Button("Process Video", variant="primary", size="lg")
-                    stop_btn = gr.Button("Stop Processing", variant="stop", size="lg", visible=False)
-            with gr.Column(scale=1):
-                gr.Markdown("## Results")
-                result_video = gr.Video(label="Processed Video", height=400)
-                status_output = gr.Textbox(
-                    label="Processing Status",
-                    lines=5,
-                    max_lines=10,
-                    elem_classes=["status-box"]
-                )
-                gr.Markdown("""
-                ### Processing Pipeline:
-                1. **SAM2 Segmentation** - GPU-accelerated person detection
-                2. **MatAnyone Matting** - Professional temporal consistency
-                3. **GPU Compositing** - Real-time background replacement
-                4. **Memory Optimization** - Chunked processing for efficiency
-                **Performance:** ~3-5 minutes per 1000 frames on T4 GPU
-                """)
-        # Event handlers
-        def update_background_options(bg_type):
-            return {
-                gradient_type: gr.update(visible=(bg_type == "gradient")),
-                gradient_preview: gr.update(visible=(bg_type == "gradient")),
-                solid_color: gr.update(visible=(bg_type == "solid")),
-                color_preview: gr.update(visible=(bg_type == "solid")),
-                custom_bg_upload: gr.update(visible=(bg_type == "custom")),
-                ai_prompt: gr.update(visible=(bg_type == "ai")),
-                ai_generate_btn: gr.update(visible=(bg_type == "ai")),
-                ai_preview: gr.update(visible=(bg_type == "ai"))
-            }
-        def update_gradient_preview(grad_type):
-            try:
-                return create_gradient_background(grad_type, 400, 200)
-            except:
-                return None
-        def update_color_preview(color):
-            try:
-                return create_solid_color(color, 400, 200)
-            except:
-                return None
-        def refresh_myavatar_videos():
-            try:
-                choices = myavatar_api.get_video_choices()
-                return gr.update(choices=choices, value=None)
-            except Exception as e:
-                logger.error(f"Error refreshing videos: {e}")
-                return gr.update(choices=["Error loading videos"])
-        def load_video_preview(selection):
-            try:
-                if not selection or selection == "No videos available":
-                    return None
-                video_url = myavatar_api.get_video_url(selection)
-                return video_url
-            except Exception as e:
-                logger.error(f"Error loading video preview: {e}")
-                return None
-        def generate_ai_bg(prompt):
-            bg_img, message = generate_ai_background(prompt)
-            return bg_img
-        # Connect event handlers
-        background_type.change(
-            fn=update_background_options,
-            inputs=[background_type],
-            outputs=[gradient_type, gradient_preview, solid_color, color_preview,
-                    custom_bg_upload, ai_prompt, ai_generate_btn, ai_preview]
-        )
-        gradient_type.change(
-            fn=update_gradient_preview,
-            inputs=[gradient_type],
-            outputs=[gradient_preview]
-        )
-        solid_color.change(
-            fn=update_color_preview,
-            inputs=[solid_color],
-            outputs=[color_preview]
-        )
-        refresh_btn.click(
-            fn=refresh_myavatar_videos,
-            outputs=[myavatar_dropdown]
-        )
-        myavatar_dropdown.change(
-            fn=load_video_preview,
-            inputs=[myavatar_dropdown],
-            outputs=[video_preview]
-        )
-        ai_generate_btn.click(
-            fn=generate_ai_bg,
-            inputs=[ai_prompt],
-            outputs=[ai_preview]
-        )
-        process_btn.click(
-            fn=process_video_with_background_stoppable,
-            inputs=[
-                video_upload,
-                myavatar_dropdown,
-                background_type,
-                gradient_type,
-                solid_color,
-                custom_bg_upload,
-                ai_prompt
-            ],
-            outputs=[process_btn, stop_btn, result_video, status_output]
-        )
-        stop_btn.click(
-            fn=stop_processing,
-            outputs=[stop_btn, status_output]
-        )
-        app.load(
-            fn=lambda: create_gradient_background("sunset", 400, 200),
-            outputs=[gradient_preview]
-        )
-    return app
 def main():
-    """Main application entry point"""
     try:
-        # Pre-warm models
-        logger.info("Pre-warming GPU models...")
         if SAM2_ENABLED:
             get_sam2()
         if MATANY_ENABLED:
             get_matanyone()
         app = create_interface()
         app.launch(
             server_name="0.0.0.0",
-            server_port=7860,
-            share=True,
             show_error=True,
             quiet=False
         )
     except Exception as e:
         logger.error(f"Failed to start application: {e}")
         sys.exit(1)
 if __name__ == "__main__":
-    main()

 #!/usr/bin/env python3
 """
+BackgroundFX Pro — Entrypoint
+Launches the Gradio UI using the processing pipeline.
 """
 import os
 import sys
 import logging
+from pipeline import (
+    get_sam2, get_matanyone,
+    SAM2_ENABLED, MATANY_ENABLED,
+    GPU_NAME, GPU_MEMORY
+)
+from ui import create_interface
 logging.basicConfig(
     level=logging.INFO,
     format='%(asctime)s - %(levelname)s - %(message)s'
 )
+logger = logging.getLogger("app")
 def main():
     try:
+        logger.info(f"System status — GPU: {GPU_NAME}, VRAM: {GPU_MEMORY:.1f} GB")
+        # Pre-warm (safe; lazy loaders inside handle errors)
         if SAM2_ENABLED:
             get_sam2()
         if MATANY_ENABLED:
             get_matanyone()
         app = create_interface()
+        port = int(os.getenv("PORT", "7860"))
         app.launch(
             server_name="0.0.0.0",
+            server_port=port,
             show_error=True,
             quiet=False
         )
     except Exception as e:
         logger.error(f"Failed to start application: {e}")
         sys.exit(1)
 if __name__ == "__main__":
+    main()

pipeline.py ADDED Viewed

	@@ -0,0 +1,669 @@

+"""
+🍹 Video Background Replacer - IFRAME OPTIMIZED VERSION
+Combining Windsurf's UI improvements with Claude's audio/video processing
+"""
+import streamlit as st
+import tempfile
+import os
+import cv2
+import numpy as np
+from PIL import tempfile
+import os
+import timeimport Image
+# ============================================================================
+# IFRAME OPTIMIZATION - From Windsurf + Claude's recommendations
+# ============================================================================
+# Simple two-column layout
+col1, col2 = st.columns(2)
+with col1:
+    st.subheader("Original Video")
+    video_file = st.file_uploader("Choose video", type=['mp4', 'avi', 'mov'])
+    if video_file:
+        st.video(video_file)
+# Configure for iframe embedding
+st.set_page_config(
+    page_title="Video Background Replacement",
+    page_icon="🍹",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+# Add iframe-friendly styling
+st.markdown("""
+<style>
+    /* Hide Streamlit elements for clean iframe embedding */
+    .main > div {
+        padding-top: 1rem;
+    }
+    .stDeployButton {
+        display: none;
+    }
+    header[data-testid="stHeader"] {
+        display: none;
+    }
+    .stMainBlockContainer {
+        padding-top: 1rem;
+    }
+    /* Clean, professional CSS - NO ANIMATIONS for iframe stability */
+    .main-header {
+        text-align: center;
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        -webkit-background-clip: text;
+        -webkit-text-fill-color: transparent;
+        font-size: 2.5rem;
+        font-weight: bold;
+        margin-bottom: 1.5rem;
+    }
+    .upload-container {
+        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+        border-radius: 20px;
+        padding: 25px;
+        margin: 15px 0;
+        box-shadow: 0 8px 32px rgba(31, 38, 135, 0.37);
+    }
+   .upload-slot {
+        background: rgba(255, 255, 255, 0.8);
+        border: 2px dashed #ccc;
+        border-radius: 15px;
+        padding: 20px;
+        text-align: center;
+        margin: 10px 0;
+        min-height: 300px; /* Prevents height jumping */
+        display: flex;
+        flex-direction: column;
+        justify-content: center;
+        transition: all 0.3s ease; /* Smooth transitions */
+    }
+    .processing-box {
+        border: 2px solid #4ECDC4;
+        border-radius: 15px;
+        padding: 20px;
+        margin: 20px 0;
+        background: rgba(78, 205, 196, 0.1);
+    }
+    .success-box {
+        background: linear-gradient(45deg, #4CAF50, #45a049);
+        color: white;
+        padding: 15px;
+        border-radius: 10px;
+        text-align: center;
+        font-weight: bold;
+        margin: 20px 0;
+    }
+    /* Iframe-specific optimizations */
+    .block-container {
+        padding-top: 1rem;
+        padding-bottom: 1rem;
+    }
+    /* Mobile responsiveness for iframe */
+    @media (max-width: 768px) {
+        .main-header {
+            font-size: 1.8rem;
+        }
+        .upload-slot {
+            min-height: 250px;
+        }
+    }
+</style>
+""", unsafe_allow_html=True)
+def main():
+    # Compact header for iframe
+    st.markdown('<h1 class="main-header">🍹 Video Background Replacer</h1>', unsafe_allow_html=True)
+    st.markdown('<p style="text-align: center; font-size: 1.1rem; color: #666; margin-bottom: 2rem;">Replace your video background with AI + Audio!</p>', unsafe_allow_html=True)
+    # Compact upload section
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("### 🎬 Upload Your Video")
+        video_file = st.file_uploader(
+            "Choose a video file",
+            type=['mp4', 'avi', 'mov'],
+            help="Upload the video you want to process",
+            key="video_uploader"
+        )
+        if video_file:
+            st.success("✅ Video loaded!")
+            st.video(video_file)
+    with col2:
+        st.markdown("### 🖼️ Upload Background Image")
+        image_file = st.file_uploader(
+            "Choose a background image",
+            type=['png', 'jpg', 'jpeg'],
+            help="Upload the background you want to use",
+            key="image_uploader"
+        )
+        if image_file:
+            st.success("✅ Background loaded!")
+            st.image(image_file, width=300)
+    # Process button
+    if video_file and image_file and st.button("🍹 PROCESS VIDEO", key="process_
+    # Clear any previous results
+        if 'video_result' in st.session_state:
+            del st.session_state['video_result']
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        # Step A: Save uploaded files
+        status_text.text("Step A: Saving uploaded files...")
+        video_path = f"temp_video_{int(time.time())}.mp4"
+        image_path = f"temp_image_{int(time.time())}.jpg"
+        with open(video_path, "wb") as f:
+            f.write(video_file.read())
+ fps = int(cap_orig.get(cv2.CAP_PROP_FPS))
+        width = int(cap_orig.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(cap_orig.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        cap_orig.release()
+   try:
+            # Step B: Load MatAnyone
+            status_text.text("Step B: Loading MatAnyone...")
+            from matanyone import InferenceCore
+            processor = InferenceCore("PeiqingYang/MatAnyone")
+            progress_bar.progress(40)
+            # Step C: Create initial mask
+            status_text.text("Step C: Creating segmentation mask...")
+            import mediapipe as mp
+            mp_selfie = mp.solutions.selfie_segmentation
+            selfie_segmentation = mp_selfie.SelfieSegmentation(model_selection=1)
+            # Get first frame for mask
+            cap = cv2.VideoCapture(video_path)
+            ret, first_frame = cap.read()
+            cap.release()
+            if ret:
+                rgb_frame = cv2.cvtColor(first_frame, cv2.COLOR_BGR2RGB)
+                results = selfie_segmentation.process(rgb_frame)
+                mask = (results.segmentation_mask > 0.5).astype(np.uint8) * 255
+          mask_path = f"temp_mask_{int(time.time())}.png"
+                cv2.imwrite(mask_path, mask)
+            else:
+                st.error("Could not read video file")
+                st.stop()
+            progress_bar.progress(60)
+            # Step D: Run MatAnyone (creates green screen video)
+            status_text.text("Step D: Running MatAnyone - extracting person...")
+            foreground_path, alpha_path = processor.process_video(
+                input_path=video_path,
+                mask_path=mask_path,
+                output_path="output"
+            )
+            progress_bar.progress(80)
+            # Step E: Replace green screen with new background
+            status_text.text("Step E: Adding new background...")
+            # Read background image
+            bg_image = cv2.imread(image_path)
+            # Open the videos
+            cap_fg = cv2.VideoCapture(foreground_path)
+            cap_alpha = cv2.VideoCapture(alpha_path)
+            # Get video properties from ORIGINAL video
+            cap_orig = cv2.VideoCapture(video_path)
+            fps = int(cap_orig.get(cv2.CAP_PROP_FPS))
+            width = int(cap_orig.get(cv2.CAP_PROP_FRAME_WIDTH))
+            height = int(cap_orig.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            cap_orig.release()
+            # Resize background to match ORIGINAL video
+            bg_resized = cv2.resize(bg_image, (width, height))
+            # Step F: Create output video with smart codec selection
+            status_text.text("Step F: Creating optimized video...")
+            try:
+                # Try to create MP4 directly with H.264
+ fourcc_h264 = cv2.VideoWriter_fourcc(*'H264')
+                output_path = f"final_video_{int(time.time())}.mp4"
+                out = cv2.VideoWriter(output_path, fourcc_h264, fps, (width, height))
+                if not out.isOpened():
+                    # Fallback to XVID
+                    temp_output_path = f"temp_output_{int(time.time())}.avi"
+                    fourcc_xvid = cv2.VideoWriter_fourcc(*'XVID')
+                    out = cv2.VideoWriter(temp_output_path, fourcc_xvid, fps, (width, height))
+                    if not out.isOpened():
+    use_temp_file = False
+                    st.info("✅ Using H.264 codec directly")
+            except Exception as e:
+                st.error(f"❌ Setup error: {e}")
+                st.stop()
+  use_temp_file = True
+                temp_output_path = f"temp_output_{st.session_state.get('counter', 0)}.avi"
+                fourcc_xvid = cv2.VideoWriter_fourcc(*'XVID')
+                out = cv2.VideoWriter(temp_output_path, fourcc_xvid, fps, (width, height))
+                if not out.isOpened():
+                    st.error("❌ Could not create video writer!")
+                    st.stop()
+        except Exception as e:
+            st.error(f"❌ Setup error: {e}")
+            st.stop()
+        # Process each frame
+        frame_count = 0
+        while True:
+            ret_fg, frame_fg = cap_fg.read()
+            ret_alpha, frame_alpha = cap_alpha.read()
+            if not ret_fg or not ret_alpha:
+                break
+            # Convert alpha to single channel if needed
+            if len(frame_alpha.shape) == 3:
+                alpha = cv2.cvtColor(frame_alpha, cv2.COLOR_BGR2GRAY)
+            else:
+                alpha = frame_alpha
+            # Normalize alpha
+            alpha_norm = alpha.astype(float) / 255.0
+            # Blend: person * alpha + background * (1-alpha)
+            result = np.zeros_like(frame_fg, dtype=float)
+            for c in range(3):
+                result[:,:,c] = (frame_fg[:,:,c] * alpha_norm +
+                               bg_resized[:,:,c] * (1 - alpha_norm))
+            out.write(result.astype(np.uint8))
+            frame_count += 1
+        cap_fg.release()
+        cap_alpha.release()
+        out.release()
+        st.write(f"✅ Processed {frame_count} frames")
+        progress_bar.progress(90)
+        # Step G: Convert and optimize for web streaming
+        if use_temp_file:
+            status_text.text("Step G: Converting to web-optimized MP4...")
+         # Process each frame
+            frame_count = 0
+            while True:
+                ret_fg, frame_fg = cap_fg.read()
+                ret_alpha, frame_alpha = cap_alpha.read()
+                if not ret_fg or not ret_alpha:
+                    break
+   extract_audio_cmd = [
+                    'ffmpeg', '-y',
+                    '-i', video_path,
+                    '-vn', '-acodec', 'pcm_s16le', '-ar', '44100', '-ac', '2',
+                    audio_path
+                ]
+ if has_audio:
+                    st.info("✅ Audio extracted from original video")
+                    # Combine processed video with original audio
+                    ffmpeg_cmd = [
+                        'ffmpeg', '-y',
+                        '-i', temp_output_path,  # Processed video (no audio)
+                        '-i', audio_path,        # Original audio
+                        '-c:v', 'libx264',
+                        '-preset', 'fast',
+                        '-crf', '23',
+                        '-pix_fmt', 'yuv420p',
+                        '-c:a', 'aac',           # Audio codec
+                        '-movflags', '+faststart',
+                        final_output_path
+                    ]
+ else:# No audio - just convert video
+                    ffmpeg_cmd = [
+                        'ffmpeg', '-y',
+                        '-i', temp_output_path,
+                        '-c:v', 'libx264',
+                        '-preset', 'fast',
+                        '-crf', '23',
+                        '-pix_fmt', 'yuv420p',
+                        '-movflags', '+faststart',
+                        final_output_path
+                    ]
+ # Normalize alpha
+                alpha_norm = alpha.astype(float) / 255.0
+                # Blend: person * alpha + background * (1-alpha)
+                result = np.zeros_like(frame_fg, dtype=float)
+                for c in range(3):
+                    result[:,:,c] = (frame_fg[:,:,c] * alpha_norm +
+                                   bg_resized[:,:,c] * (1 - alpha_norm))
+                out.write(result.astype(np.uint8))
+                frame_count += 1
+            cap_fg.release()
+            cap_alpha.release()
+            out.release()
+            st.write(f"✅ Processed {frame_count} frames")
+            progress_bar.progress(90)
+            # Step G: Audio preservation and web optimization
+            if use_temp_file:
+                status_text.text("Step G: Adding audio and optimizing...")
+st.info("🎵 Using moviepy for audio preservation...")
+                    # Load original video to get audio
+                    original_clip = mp.VideoFileClip(video_path)
+                    if original_clip.audio is not None:
+                        # Load processed video (no audio)
+                        processed_clip = mp.VideoFileClip(temp_output_path)
+                        # Add original audio to processed video
+                        final_clip = processed_clip.set_audio(original_clip.audio)
+                        final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
+                        final_clip.write_videofile(final_output_path, codec='libx264', audio_codec='aac')
+                        # Close clips to free memory
+                        original_clip.close()
+                        processed_clip.close()
+                        final_clip.close()
+                        st.success("✅ Video with audio created using moviepy!")
+                        output_path = final_output_path
+                        # Clean up temp file
+                        if os.path.exists(temp_output_path):
+                            os.remove(temp_output_path)
+                    else:
+                        st.info("ℹ️ Original video has no audio track")
+                        # No audio - just optimize video
+                        processed_clip = mp.VideoFileClip(temp_output_path)
+                        final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
+                        processed_clip.write_videofile(final_output_path, codec='libx264')
+                        processed_clip.close()
+                        output_path = final_output_path
+                        if os.path.exists(temp_output_path):
+                            os.remove(temp_output_path)
+                except ImportError:
+                    st.warning("⚠️ moviepy not available")
+                    # Fallback to qtfaststart approach without audio
+                    try:
+                        from qtfaststart import processor
+                        # Convert AVI to MP4 using OpenCV
+                        cap_temp = cv2.VideoCapture(temp_output_path)
+                        final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
+                        fourcc_h264 = cv2.VideoWriter_fourcc(*'H264')
+                        out_final = cv2.VideoWriter(final_output_path, fourcc_h264, fps, (width, height))
+⚠️ moviepy not available")
+                    # Fallback to qtfaststart approach without audio
+                    try:
+                        from qtfaststart import processor
+                        # Convert AVI to MP4 using OpenCV
+                        cap_temp = cv2.VideoCapture(temp_output_path)
+                        final_output_path = f"final_video_{st.session_state.get('counter', 0)}.mp4"
+                        fourcc_h264 = cv2.VideoWriter_fourcc(*'H264')
+                        out_final = cv2.VideoWriter(final_output_path, fourcc_h264, fps, (width, height))
+                        # Copy frames to MP4
+                        while True:
+                            ret, frame = cap_temp.read()
+                            if not ret:
+                                break
+                            out_final.write(frame)
+                        cap_temp.release()
+                        out_final.release()
+                        # Now fix the moov atom
+                        temp_fixed_path = f"fixed_{final_output_path}"
+                        processor.process(final_output_path, temp_fixed_path)
+                        # Replace original with fixed version
+                        os.replace(temp_fixed_path, final_output_path)
+                        st.warning("⚠️ Video created without audio (use moviepy for audio support)")
+                        output_path = final_output_path
+                        # Clean up temp file
+                        if os.path.exists(temp_output_path):
+                            os.remove(temp_output_path)
+                    except Exception as qtfast_error:
+                        st.warning(f"⚠️ qtfaststart failed: {qtfast_error}")
+                        st.info("💡 Using original AVI format without audio")
+                        output_path = temp_output_path
+                except Exception as moviepy_error:
+                    st.warning(f"⚠️ moviepy failed: {moviepy_error}")
+                    st.info("💡 Using original AVI format")
+                    output_path = temp_output_path
+        else:
+            # H.264 MP4 was created directly - add audio and fix moov atom
+            try:
+                # First, try to add audio if it exists
+                audio_path = f"temp_audio_{st.session_state.get('counter', 0)}.wav"
+                # Try FFmpeg for audio preservation
+                    import subprocess
+  final_output_path = f"final_video_{int(time.time())}.mp4"
+                    # Extract audio from original video
+                    audio_path = f"temp_audio_{int(time.time())}.wav"
+                    extract_audio_cmd = [
+                        'ffmpeg', '-y',
+                        '-i', video_path,
+has_audio = audio_result.returncode == 0 and os.path.exists(audio_path)
+                    if has_audio:
+ st.info("✅ Audio extracted, combining with video...")
+                        # Combine processed video with original audio
+                        ffmpeg_cmd = [
+                            'ffmpeg', '-y',
+                            '-i', temp_output_path,  # Processed video (no audio)
+                            '-i', audio_path,        # Original audio
+                            '-c:v', 'libx264',
+                            '-preset', 'fast',
+                            '-crf', '23',
+                            '-pix_fmt', 'yuv420p',
+                            '-c:a', 'aac',           # Audio codec
+                            '-movflags', '+faststart',  # Web optimization
+                            final_output_path
+                        ]
+                    else:
+                        st.info("ℹ️ No audio track found")
+                        # No audio - just convert video
+                        ffmpeg_cmd = [
+                            'ffmpeg', '-y',
+                            '-i', temp_output_path,
+                            '-c:v', 'libx264',
+                            '-preset', 'fast',
+                            '-crf', '23',
+                            '-pix_fmt', 'yuv420p',
+                            '-movflags', '+faststart',
+                            final_output_path
+                    result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
+                    if result.returncode == 0:
+                        st.success("✅ Video with audio optimized!")
+                        output_path = final_output_path
+                        # Clean up temp files
+                        if os.path.exists(temp_output_path):
+                            os.remove(temp_output_path)
+                        if has_audio and os.path.exists(audio_path):
+                            os.remove(audio_path)
+                            st.success("✅ MP4 with audio optimized for web streaming!")
+                        else:
+                            st.warning("⚠️ Failed to add audio, using video-only version")
+                            if os.path.exists(audio_path):
+                                os.remove(audio_path)
+ else:
+                        st.warning("⚠️ FFmpeg failed, trying moviepy...")
+                        raise Exception("FFmpeg failed")
+                except:
+                    # Try moviepy for audio
+                    try:
+                        import moviepy.editor as mp
+                        st.info("🎵 Using moviepy for audio...")
+                        original_clip = mp.VideoFileClip(video_path)
+                        if original_clip.audio is not None:
+                            processed_clip = mp.VideoFileClip(temp_output_path)
+                            final_clip = processed_clip.set_audio(original_clip.audio)
+                            final_output_path = f"final_video_{int(time.time())}.mp4"
+                            final_clip.write_videofile(final_output_path, codec='libx264', audio_codec='aac')
+                            original_clip.close()
+                            processed_clip.close()
+                            final_clip.close()
+                            st.success("✅ Video with audio created!")
+                            output_path = final_output_path
+                            if os.path.exists(temp_output_path):
+                                os.remove(temp_output_path)
+                        else:
+                            st.info("ℹ️ No audio in original")
+                            output_path = temp_output_path
+                    except Exception as e:
+                        st.warning(f"⚠️ Audio processing failed: {e}")
+                        output_path = temp_output_path
+            else:
+                # H.264 MP4 created directly - just optimize
+                try:
+                    from qtfaststart import processor as qtfast_processor
+                    temp_fixed_path = f"fixed_{output_path}"
+                    qtfast_processor.process(output_path, temp_fixed_path)
+                    os.replace(temp_fixed_path, output_path)
+                    st.info("✅ Video optimized for web streaming")
+                except:
+                    st.info("✅ Video created (basic optimization)")
+                except ImportError:
+                    st.info("✅ MP4 created (qtfaststart not available for optimization)")
+                except Exception as e:
+                    st.warning(f"⚠️ Moov atom optimization failed: {e}")
+            except Exception as e:
+                st.warning(f"⚠️ Audio processing failed: {e}")
+                st.info("✅ MP4 created but may be missing audio")
+        progress_bar.progress(100)
+        status_text.text("✅ Complete!")
+        # Step H: Display and download the video
+        if os.path.exists(output_path):
+            file_size = os.path.getsize(output_path)
+            st.write(f"✅ Video saved successfully: {file_size:,} bytes")
+            # Read the video we just created
+            with open(output_path, 'rb') as f:
+                video_bytes = f.read()
+            # Display the video
+            st.subheader("🎬 Result")
+            try:
+                st.video(video_bytes)
+                st.success("✅ Video display successful!")
+            except Exception as video_error:
+                st.error(f"❌ Video display error: {video_error}")
+                st.info("💡 Video file created successfully but display failed. You can still download it.")
+            # Download button
+            file_extension = "mp4" if output_path.endswith('.mp4') else output_path.split('.')[-1]
+            st.download_button(
+                label="📥 Download Background Replaced Video",
+                data=video_bytes,
+                file_name=f"background_replaced.{file_extension}",
+                mime=f"video/{file_extension}",
+                use_container_width=True
+  progress_bar.progress(100)
+            status_text.text("✅ Complete!")
+            # Step H: Display results
+            if os.path.exists(output_path):
+                file_size = os.path.getsize(output_path)
+                st.write(f"✅ Video saved: {file_size:,} bytes")
+                # Read and store video
+                with open(output_path, 'rb') as f:
+                    st.session_state['video_result'] = f.read()
+                # Clear processing UI
+                progress_bar.empty()
+                status_text.empty()
+                # Show success
+                st.markdown('<div class="success-box">🎉 Video Successfully Processed! 🎉</div>', unsafe_allow_html=True)
+                # Display video
+                st.markdown("### 🎬 Your Processed Video:")
+                st.video(st.session_state['video_result'])
+                # Download button
+                st.download_button(
+                    label="⬇️ Download Processed Video",
+                    data=st.session_state['video_result'],
+                    file_name=f"background_replaced_{int(time.time())}.mp4",
+                    mime="video/mp4",
+                    use_container_width=True
+                )
+            else:
+                st.error("❌ Failed to create video")
+            # Cleanup
+            try:
+                for temp_file in [video_path, image_path, mask_path, foreground_path, alpha_path, output_path]:
+                    if os.path.exists(temp_file):
+                        os.remove(temp_file)
+            except:
+                pass
+        except Exception as e:
+            st.error(f"❌ Error: {str(e)}")
+            import traceback
+            st.code(traceback.format_exc())
+    elif not video_file or not image_file:
+        st.info("👆 Upload both a video and background image to start processing!")
+    # Compact footer for iframe
+    st.markdown("---")
+    st.markdown("""
+    <div style="text-align: center; color: #666; padding: 10px;">
+        <p><small>🍹 Powered by MatAnyone + Audio Preservation | Optimized for MyAvatar</small></p>
+    </div>
+    """, unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()
+                        ]

requirements.txt CHANGED Viewed

@@ -1,14 +1,5 @@
-# ===== Core runtime =====
-# Option A: Keep your current Torch stack (safe for existing builds)
-torch==2.2.2
-torchvision==0.17.2
-torchaudio==2.2.2
-# Option B: Faster CUDA 12.1 wheels for T4 (uncomment to use instead)
-# torch==2.3.1+cu121
-# torchvision==0.18.1+cu121
-# torchaudio==2.3.1+cu121
-# --extra-index-url https://download.pytorch.org/whl/cu121
 # ===== Video / image IO =====
 opencv-python-headless==4.10.0.84
@@ -40,4 +31,8 @@ huggingface_hub>=0.33.5
 ffmpeg-python==0.2.0
 psutil==6.0.0
 requests==2.31.0
-scikit-learn==1.5.1

+# ===== Core runtime (Torch is installed in Dockerfile with cu121 wheels) =====
+# DO NOT add torch/torchvision/torchaudio here when using the CUDA wheels in Dockerfile.
 # ===== Video / image IO =====
 opencv-python-headless==4.10.0.84
 ffmpeg-python==0.2.0
 psutil==6.0.0
 requests==2.31.0
+scikit-learn==1.5.1
+# ===== (Optional) Extras =====
+# safetensors==0.4.5        # if you pull weights that use safetensors
+# aiohttp==3.10.5           # if you later async-fetch assets

ui.py ADDED Viewed

	@@ -0,0 +1,356 @@

+# ui.py
+"""
+BackgroundFX Pro — Gradio UI, background generators, and data sources.
+"""
+import io
+import os
+import time
+import logging
+import tempfile
+import requests
+import numpy as np
+from typing import Optional, Tuple, List, Dict, Any
+from PIL import Image
+import gradio as gr
+from pipeline import (
+    process_video_gpu_optimized, stop_processing, processing_active,
+    SAM2_ENABLED, MATANY_ENABLED, GPU_NAME, GPU_MEMORY
+)
+logger = logging.getLogger("ui")
+# ---- Background generators ----
+def create_gradient_background(gradient_type: str, width: int, height: int) -> Image.Image:
+    img = np.zeros((height, width, 3), dtype=np.uint8)
+    if gradient_type == "sunset":
+        for i in range(height):
+            r = int(255*(1-i/height) + 128*(i/height))
+            g = int(165*(1-i/height) + 64*(i/height))
+            b = int(0*(1-i/height)   + 128*(i/height))
+            img[i, :] = [r, g, b]
+    elif gradient_type == "ocean":
+        for i in range(height):
+            r = int(0*(1-i/height)   + 30*(i/height))
+            g = int(100*(1-i/height) + 144*(i/height))
+            b = int(200*(1-i/height) + 255*(i/height))
+            img[i, :] = [r, g, b]
+    elif gradient_type == "forest":
+        for i in range(height):
+            r = int(34*(1-i/height)  + 0*(i/height))
+            g = int(139*(1-i/height) + 100*(i/height))
+            b = int(34*(1-i/height)  + 0*(i/height))
+            img[i, :] = [r, g, b]
+    else:
+        for i in range(height):
+            r = int(70*(1-i/height)  + 20*(i/height))
+            g = int(130*(1-i/height) + 100*(i/height))
+            b = int(180*(1-i/height) + 255*(i/height))
+            img[i, :] = [r, g, b]
+    return Image.fromarray(img)
+def create_solid_color(color: str, width: int, height: int) -> Image.Image:
+    color_map = {
+        "white": (255, 255, 255),
+        "black": (0, 0, 0),
+        "blue": (70, 130, 180),
+        "green": (0, 128, 0),
+        "red": (220, 20, 60),
+        "purple": (128, 0, 128),
+        "orange": (255, 165, 0),
+        "yellow": (255, 255, 0)
+    }
+    rgb = color_map.get(color, (70, 130, 180))
+    return Image.fromarray(np.full((height, width, 3), rgb, dtype=np.uint8))
+def generate_ai_background(prompt: str) -> Tuple[Optional[Image.Image], str]:
+    try:
+        if not prompt.strip():
+            return None, "Please enter a prompt"
+        models = [
+            "black-forest-labs/FLUX.1-schnell",
+            "stabilityai/stable-diffusion-xl-base-1.0",
+            "runwayml/stable-diffusion-v1-5"
+        ]
+        enhanced_prompt = f"professional video background, {prompt}, high quality, 16:9, cinematic lighting, detailed"
+        for model in models:
+            try:
+                url = f"https://api-inference.huggingface.co/models/{model}"
+                headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN', 'hf_placeholder')}"}
+                payload = {"inputs": enhanced_prompt, "parameters": {"width": 1024, "height": 576, "num_inference_steps": 20, "guidance_scale": 7.5}}
+                r = requests.post(url, headers=headers, json=payload, timeout=60, stream=True)
+                if r.status_code == 200 and "image" in r.headers.get("content-type", "").lower():
+                    buf = io.BytesIO(r.content if r.raw is None else r.raw.read())
+                    return Image.open(buf).convert("RGB"), "AI background generated"
+                if r.status_code == 503:
+                    continue
+            except Exception:
+                continue
+        return create_gradient_background("default", 1920, 1080), "Gradient fallback"
+    except Exception as e:
+        logger.error(f"AI background error: {e}")
+        return create_gradient_background("default", 1920, 1080), "Default due to error"
+# ---- MyAvatar API ----
+class MyAvatarAPI:
+    def __init__(self):
+        self.api_base = "https://app.myavatar.dk/api"
+        self.videos_cache: List[Dict[str, Any]] = []
+        self.last_refresh = 0
+    def fetch_videos(self) -> List[Dict[str, Any]]:
+        try:
+            if time.time() - self.last_refresh < 300 and self.videos_cache:
+                return self.videos_cache
+            r = requests.get(f"{self.api_base}/videos", timeout=10)
+            if r.status_code == 200:
+                data = r.json()
+                self.videos_cache = data.get("videos", [])
+                self.last_refresh = time.time()
+                return self.videos_cache
+            return []
+        except Exception as e:
+            logger.error(f"Fetch videos failed: {e}")
+            return []
+    def get_video_choices(self) -> List[str]:
+        vids = self.fetch_videos()
+        if not vids:
+            return ["No videos available"]
+        out = []
+        for v in vids:
+            out.append(f"{v.get('title','Untitled')} (ID: {v.get('id','?')}) - {v.get('status','?')}")
+        return out
+    def get_video_url(self, selection: str) -> Optional[str]:
+        if not selection or selection == "No videos available":
+            return None
+        try:
+            if "(ID: " in selection:
+                vid = selection.split("(ID: ")[1].split(")")[0]
+                for v in self.videos_cache:
+                    if str(v.get("id")) == vid:
+                        return v.get("video_url")
+        except Exception as e:
+            logger.error(f"Parse selection failed: {e}")
+        return None
+myavatar_api = MyAvatarAPI()
+# ---- UI ↔ Pipeline bridge: streaming handler ----
+def process_video_with_background_stoppable(
+    input_video: Optional[str],
+    myavatar_selection: str,
+    background_type: str,
+    gradient_type: str,
+    solid_color: str,
+    custom_background: Optional[str],
+    ai_prompt: str
+):
+    # start
+    from pipeline import processing_active as _active_ref  # ensure we use the module global
+    import pipeline  # to toggle the flag
+    pipeline.processing_active = True
+    try:
+        yield gr.update(visible=False), gr.update(visible=True), None, "Starting processing..."
+        # resolve video
+        video_path = None
+        if input_video:
+            video_path = input_video
+        elif myavatar_selection and myavatar_selection != "No videos available":
+            url = myavatar_api.get_video_url(myavatar_selection)
+            if url:
+                with requests.get(url, stream=True, timeout=60) as r:
+                    r.raise_for_status()
+                    with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
+                        for chunk in r.iter_content(chunk_size=1 << 20):
+                            if chunk:
+                                tmp.write(chunk)
+                        video_path = tmp.name
+        if not video_path:
+            yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
+            return
+        # background
+        yield gr.update(visible=False), gr.update(visible=True), None, "Generating background..."
+        bg_img = None
+        if background_type == "gradient":
+            bg_img = create_gradient_background(gradient_type, 1920, 1080)
+        elif background_type == "solid":
+            bg_img = create_solid_color(solid_color, 1920, 1080)
+        elif background_type == "custom" and custom_background:
+            try:
+                from PIL import Image
+                bg_img = Image.open(custom_background).convert("RGB")
+            except Exception:
+                bg_img = None
+        elif background_type == "ai" and ai_prompt:
+            bg_img, _ = generate_ai_background(ai_prompt)
+        if bg_img is None:
+            yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
+            return
+        # process
+        yield gr.update(visible=False), gr.update(visible=True), None, "Processing video with GPU optimization..."
+        bg_array = np.array(bg_img.resize((1280, 720), Image.Resampling.LANCZOS))
+        with tempfile.NamedTemporaryFile(suffix="_processed.mp4", delete=False) as tmp_final:
+            final_path = tmp_final.name
+        out = process_video_gpu_optimized(video_path, bg_array, final_path)
+        try:
+            if video_path != input_video and video_path and os.path.exists(video_path):
+                os.unlink(video_path)
+        except Exception:
+            pass
+        if out and pipeline.processing_active:
+            yield gr.update(visible=True), gr.update(visible=False), out, "Video processing completed successfully!"
+        else:
+            yield gr.update(visible=True), gr.update(visible=False), None, "Processing was stopped or failed"
+    except Exception as e:
+        logger.error(f"UI pipeline error: {e}")
+        yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {e}"
+    finally:
+        pipeline.processing_active = False
+def stop_processing_button():
+    from pipeline import stop_processing
+    stop_processing()
+    return gr.update(visible=False), "Processing stopped by user"
+# ---- UI factory ----
+def create_interface():
+    css = """
+    .main-container { max-width: 1200px; margin: 0 auto; }
+    .status-box { border: 2px solid #4CAF50; border-radius: 10px; padding: 15px; }
+    .gradient-preview { border: 2px solid #ddd; border-radius: 10px; }
+    """
+    with gr.Blocks(css=css, title="BackgroundFX Pro - GPU Optimized") as app:
+        gr.Markdown("# BackgroundFX Pro - GPU Optimized\n### Professional Video Background Replacement with SAM2 + MatAnyone")
+        with gr.Row():
+            sam2_status = "Ready" if SAM2_ENABLED else "Disabled"
+            matany_status = "Ready" if MATANY_ENABLED else "Disabled"
+            gr.Markdown(f"**System Status:** Online | **GPU:** {GPU_NAME} | **SAM2:** {sam2_status} | **MatAnyone:** {matany_status}")
+        with gr.Row():
+            with gr.Column(scale=1):
+                gr.Markdown("## Video Input")
+                with gr.Tabs():
+                    with gr.Tab("Upload Video"):
+                        video_upload = gr.Video(label="Upload Video File", height=300)
+                    with gr.Tab("MyAvatar Videos"):
+                        refresh_btn = gr.Button("Refresh Videos", size="sm")
+                        myavatar_dropdown = gr.Dropdown(label="Select MyAvatar Video", choices=["Click refresh to load videos"], value=None)
+                        video_preview = gr.Video(label="Preview", height=200)
+                gr.Markdown("## Background Options")
+                background_type = gr.Radio(choices=["gradient", "solid", "custom", "ai"], value="gradient", label="Background Type")
+                with gr.Group():
+                    gradient_type = gr.Dropdown(choices=["sunset", "ocean", "forest", "default"], value="sunset", label="Gradient Type", visible=True)
+                    gradient_preview = gr.Image(label="Gradient Preview", height=150)
+                    solid_color = gr.Dropdown(choices=["white", "black", "blue", "green", "red", "purple", "orange", "yellow"], value="blue", label="Solid Color", visible=False)
+                    color_preview = gr.Image(label="Color Preview", height=150, visible=False)
+                    custom_bg_upload = gr.Image(label="Upload Custom Background", type="filepath", visible=False)
+                    ai_prompt = gr.Textbox(label="AI Background Prompt", placeholder="Describe the background you want...", visible=False)
+                    ai_generate_btn = gr.Button("Generate AI Background", visible=False)
+                    ai_preview = gr.Image(label="AI Generated Background", height=150, visible=False)
+                with gr.Row():
+                    process_btn = gr.Button("Process Video", variant="primary", size="lg")
+                    stop_btn = gr.Button("Stop Processing", variant="stop", size="lg", visible=False)
+            with gr.Column(scale=1):
+                gr.Markdown("## Results")
+                result_video = gr.Video(label="Processed Video", height=400)
+                status_output = gr.Textbox(label="Processing Status", lines=5, max_lines=10, elem_classes=["status-box"])
+                gr.Markdown("""
+                ### Processing Pipeline:
+                1. **SAM2 Segmentation** — GPU-accelerated person detection
+                2. **MatAnyone Matting** — temporal consistency
+                3. **GPU Compositing** — real-time background replacement
+                4. **Memory Optimization** — chunked processing + OOM recovery
+                """)
+        # handlers
+        def update_background_options(bg_type):
+            return {
+                gradient_type: gr.update(visible=(bg_type == "gradient")),
+                gradient_preview: gr.update(visible=(bg_type == "gradient")),
+                solid_color: gr.update(visible=(bg_type == "solid")),
+                color_preview: gr.update(visible=(bg_type == "solid")),
+                custom_bg_upload: gr.update(visible=(bg_type == "custom")),
+                ai_prompt: gr.update(visible=(bg_type == "ai")),
+                ai_generate_btn: gr.update(visible=(bg_type == "ai")),
+                ai_preview: gr.update(visible=(bg_type == "ai")),
+            }
+        def update_gradient_preview(grad_type):
+            try:
+                return create_gradient_background(grad_type, 400, 200)
+            except Exception:
+                return None
+        def update_color_preview(color):
+            try:
+                return create_solid_color(color, 400, 200)
+            except Exception:
+                return None
+        def refresh_myavatar_videos():
+            try:
+                return gr.update(choices=myavatar_api.get_video_choices(), value=None)
+            except Exception:
+                return gr.update(choices=["Error loading videos"], value=None)
+        def load_video_preview(selection):
+            try:
+                return myavatar_api.get_video_url(selection)
+            except Exception:
+                return None
+        def generate_ai_bg(prompt):
+            bg_img, _ = generate_ai_background(prompt)
+            return bg_img
+        background_type.change(
+            fn=update_background_options,
+            inputs=[background_type],
+            outputs=[gradient_type, gradient_preview, solid_color, color_preview, custom_bg_upload, ai_prompt, ai_generate_btn, ai_preview]
+        )
+        gradient_type.change(fn=update_gradient_preview, inputs=[gradient_type], outputs=[gradient_preview])
+        solid_color.change(fn=update_color_preview, inputs=[solid_color], outputs=[color_preview])
+        refresh_btn.click(fn=refresh_myavatar_videos, outputs=[myavatar_dropdown])
+        myavatar_dropdown.change(fn=load_video_preview, inputs=[myavatar_dropdown], outputs=[video_preview])
+        ai_generate_btn.click(fn=generate_ai_bg, inputs=[ai_prompt], outputs=[ai_preview])
+        process_btn.click(
+            fn=process_video_with_background_stoppable,
+            inputs=[video_upload, myavatar_dropdown, background_type, gradient_type, solid_color, custom_bg_upload, ai_prompt],
+            outputs=[process_btn, stop_btn, result_video, status_output]
+        )
+        stop_btn.click(fn=stop_processing_button, outputs=[stop_btn, status_output])
+        app.load(fn=lambda: create_gradient_background("sunset", 400, 200), outputs=[gradient_preview])
+    return app

utils/oom.py ADDED Viewed

	@@ -0,0 +1,60 @@

+# utils/oom.py
+import os, logging, gc, torch
+log = logging.getLogger("oom")
+# Tunables (can be overridden via Space → Environment Variables)
+OOM_MAX_RETRIES      = int(os.getenv("OOM_MAX_RETRIES", "3"))          # retries per batch
+OOM_DOWNSCALE_RATIO  = float(os.getenv("OOM_DOWNSCALE_RATIO", "0.85")) # scale *= 0.85 on OOM
+OOM_MIN_SIDE         = int(os.getenv("OOM_MIN_SIDE", "640"))           # don’t go below this
+OOM_MIN_CHUNK        = int(os.getenv("OOM_MIN_CHUNK", "8"))            # don’t go below this
+def cuda_clear():
+    """Aggressively clear CUDA & Python GC."""
+    try:
+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+            torch.cuda.empty_cache()
+            try:
+                torch.cuda.reset_peak_memory_stats()
+            except Exception:
+                pass
+    except Exception:
+        pass
+    gc.collect()
+class OOMRetry:
+    """
+    Context manager that catches CUDA OOM and cleans up so the caller can retry.
+    Usage:
+        with OOMRetry() as guard:
+            # ... GPU work ...
+        if guard.oom:
+            # adapt (e.g., downscale/chunk) and retry
+    """
+    def __init__(self):
+        self.oom = False
+    def __enter__(self):
+        self.oom = False
+        return self
+    def __exit__(self, exc_type, exc, tb):
+        if exc_type is not None and "CUDA out of memory" in str(exc):
+            self.oom = True
+            log.warning("[OOM] CUDA OOM caught — cleaning caches.")
+            cuda_clear()
+            return True  # suppress the exception so caller can retry
+        return False
+def adapt_resolution(current_max_side: int) -> int:
+    """Reduce resolution on OOM, but not below OOM_MIN_SIDE."""
+    new_side = max(int(current_max_side * OOM_DOWNSCALE_RATIO), OOM_MIN_SIDE)
+    if new_side < current_max_side:
+        log.warning(f"[OOM] Downscaling max_side: {current_max_side} -> {new_side}")
+    return new_side
+def adapt_chunk(current_chunk: int) -> int:
+    """Reduce chunk size on OOM, but not below OOM_MIN_CHUNK."""
+    new_chunk = max(int(current_chunk * OOM_DOWNSCALE_RATIO), OOM_MIN_CHUNK)
+    if new_chunk < current_chunk:
+        log.warning(f"[OOM] Reducing chunk size: {current_chunk} -> {new_chunk}")
+    return new_chunk