Spaces:

MogensR
/

VideoBackgroundReplacer2

Paused

App Files Files Community

MogensR commited on Sep 14, 2025

Commit

c30b921

1 Parent(s): bc9a991

final run?

Browse files

Files changed (8) hide show

.dockerignore +11 -24
Dockerfile +36 -50
app.py +77 -110
models/__init__.py +66 -53
perf_tuning.py +99 -49
pipeline.py +180 -111
requirements.txt +4 -3
ui.py +95 -63

.dockerignore CHANGED Viewed

@@ -2,16 +2,12 @@
 # .dockerignore for HF Spaces
 # ===========================
-# ---------------------------
-# VCS (never needed in image)
-# ---------------------------
 .git
 .gitignore
 .gitattributes
-# ---------------------------
-# Python cache / build files
-# ---------------------------
 __pycache__/
 *.py[cod]
 *.pyo
@@ -20,49 +16,41 @@ __pycache__/
 *.egg-info/
 dist/
 build/
-# ---------------------------
 # Virtual environments
-# ---------------------------
 .env
 .venv/
 env/
 venv/
-# ---------------------------
 # External repos (cloned in Docker, not copied from local)
-# ---------------------------
 third_party/
-# ---------------------------
-# Hugging Face / Torch caches (but allow model files that might be needed)
-# ---------------------------
 .cache/
 huggingface/
 torch/
 data/
-# ---------------------------
 # HF Space metadata/state
-# ---------------------------
 .hf_space/
 space.log
 gradio_cached_examples/
 gradio_static/
 __outputs__/
-# ---------------------------
 # Logs & temp files
-# ---------------------------
 *.log
 logs/
 tmp/
 temp/
 *.swp
-# ---------------------------
 # Media test assets
-# ---------------------------
 *.mp4
 *.avi
 *.mov
@@ -72,9 +60,7 @@ temp/
 *.jpeg
 *.gif
-# ---------------------------
 # OS / IDE cruft
-# ---------------------------
 .DS_Store
 Thumbs.db
 .vscode/
@@ -82,10 +68,11 @@ Thumbs.db
 *.sublime-project
 *.sublime-workspace
-# ---------------------------
 # Node / frontend (if present)
-# ---------------------------
 node_modules/
 npm-debug.log
 yarn-debug.log
-yarn-error.log

 # .dockerignore for HF Spaces
 # ===========================
+# VCS
 .git
 .gitignore
 .gitattributes
+# Python cache / build
 __pycache__/
 *.py[cod]
 *.pyo
 *.egg-info/
 dist/
 build/
+.pytest_cache/
+.python-version
 # Virtual environments
 .env
 .venv/
 env/
 venv/
 # External repos (cloned in Docker, not copied from local)
 third_party/
+# Hugging Face / Torch caches
 .cache/
 huggingface/
 torch/
 data/
 # HF Space metadata/state
 .hf_space/
 space.log
 gradio_cached_examples/
 gradio_static/
 __outputs__/
 # Logs & temp files
 *.log
 logs/
 tmp/
 temp/
 *.swp
+.coverage
+coverage.xml
 # Media test assets
 *.mp4
 *.avi
 *.mov
 *.jpeg
 *.gif
 # OS / IDE cruft
 .DS_Store
 Thumbs.db
 .vscode/
 *.sublime-project
 *.sublime-workspace
 # Node / frontend (if present)
 node_modules/
 npm-debug.log
 yarn-debug.log
+yarn-error.log
+# ---- Optional: allow specific checkpoints if needed ----
+!checkpoints/

Dockerfile CHANGED Viewed

@@ -1,119 +1,105 @@
 # ===============================
-# BackgroundFX Pro — Dockerfile (Updated with Debug)
-# Hugging Face Spaces Pro (GPU)
 # ===============================
-# CUDA base image (T4-friendly). Build stage has NO GPU access.
-FROM nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
-# --- Build args (override in Space Settings → Build args) ---
-# Pin external repos for reproducible builds
 ARG SAM2_SHA=__PIN_ME__
 ARG MATANYONE_SHA=__PIN_ME__
-# (legacy/optional) Model IDs — you can still use these elsewhere if you want
 ARG SAM2_MODEL_ID=facebook/sam2
-ARG SAM2_VARIANT=sam2_hiera_large    # sam2_hiera_small | sam2_hiera_base | sam2_hiera_large
 ARG MATANY_REPO_ID=PeiqingYang/MatAnyone
 ARG MATANY_FILENAME=matanyone_v1.0.pth
-# --- Create non-root user (uid 1000 required by HF) ---
 RUN useradd -m -u 1000 user
 ENV HOME=/home/user
 ENV PATH=/home/user/.local/bin:$PATH
-RUN mkdir -p /home/user/app && chown -R user:user /home/user
 WORKDIR /home/user/app
 # --- System packages ---
 USER root
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
     git ffmpeg python3 python3-pip python3-venv \
     libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1 \
  && rm -rf /var/lib/apt/lists/*
-# Persistent cache dir for HF weights / torch / matplotlib
 RUN mkdir -p /data/.cache && chown -R user:user /data
 USER user
-# --- Python & CUDA wheels (Torch cu121) ---
 RUN python3 -m pip install --no-cache-dir --upgrade pip
 RUN python3 -m pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu121 \
     torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
-# --- App Python deps ---
 COPY --chown=user requirements.txt ./requirements.txt
 RUN python3 -m pip install --no-cache-dir -r requirements.txt
-# Optional (recommended) nicer fallback segmentation:
 RUN python3 -m pip install --no-cache-dir mediapipe==0.10.14
-# --- Clone external repos (SAM2 & MatAnyone) ---
 RUN mkdir -p third_party
-# SAM2
-RUN git clone https://github.com/facebookresearch/segment-anything-2.git third_party/sam2 && \
     cd third_party/sam2 && \
-    if [ "${SAM2_SHA}" != "__PIN_ME__" ]; then git checkout "${SAM2_SHA}"; fi
-# DEBUG: Check what was actually cloned
-RUN echo "=== DEBUG: SAM2 directory contents ===" && \
     ls -la third_party/sam2/ && \
-    echo "=== DEBUG: Config directory ===" && \
-    ls -la third_party/sam2/configs/ || echo "configs directory not found" && \
     echo "=== DEBUG: SAM2 configs ===" && \
-    ls -la third_party/sam2/configs/sam2/ || echo "sam2 configs directory not found"
-# Install SAM2 requirements
 RUN cd third_party/sam2 && python3 -m pip install --no-cache-dir -e .
-# MatAnyone (pq-yang fork as per your previous setup)
-RUN git clone https://github.com/pq-yang/MatAnyone.git third_party/matanyone && \
     cd third_party/matanyone && \
-    if [ "${MATANYONE_SHA}" != "__PIN_ME__" ]; then git checkout "${MATANYONE_SHA}"; fi
-# Install MatAnyone requirements if they exist
 RUN cd third_party/matanyone && \
     if [ -f requirements.txt ]; then python3 -m pip install --no-cache-dir -r requirements.txt; fi
-# --- App code ---
 COPY --chown=user . /home/user/app
-# DEBUG: Check if app code copy overwrote the cloned repos
-RUN echo "=== DEBUG: After app code copy - SAM2 status ===" && \
     ls -la third_party/sam2/ && \
-    echo "=== DEBUG: Config files after copy ===" && \
-    ls -la third_party/sam2/configs/sam2/ || echo "Config directory missing after copy"
-# --- Runtime environment (aligned with pipeline.py) ---
 ENV PYTHONUNBUFFERED=1 \
-    OMP_NUM_THREADS=2 \
     TOKENIZERS_PARALLELISM=false \
     HF_HOME=/data/.cache/huggingface \
     TORCH_HOME=/data/.cache/torch \
     MPLCONFIGDIR=/data/.cache/matplotlib \
     PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
     PYTHONPATH="$PYTHONPATH:/home/user/app/third_party/sam2:/home/user/app/third_party/matanyone" \
-    PORT=7860 \
     FFMPEG_BIN=ffmpeg \
-    \
-    # Let pipeline.py discover these dynamically (no hard-coded paths)
     THIRD_PARTY_SAM2_DIR=/home/user/app/third_party/sam2 \
     THIRD_PARTY_MATANY_DIR=/home/user/app/third_party/matanyone \
-    \
-    # --- SAM2 dynamic config (FIXED: relative path within SAM2 repo) ---
     SAM2_MODEL_CFG="configs/sam2/sam2_hiera_l.yaml" \
     SAM2_CHECKPOINT="" \
-    \
-    # --- MatAnyone dynamic config (used by pipeline.py) ---
     MATANY_REPO_ID=PeiqingYang/MatAnyone \
     MATANY_CHECKPOINT="" \
     ENABLE_MATANY=1
-# DEBUG: Final check of SAM2 installation
-RUN echo "=== FINAL DEBUG: SAM2 status ===" && \
-    pwd && \
-    ls -la /home/user/app/third_party/sam2/ || echo "SAM2 directory missing" && \
-    ls -la /home/user/app/third_party/sam2/configs/sam2/ || echo "Config dir missing"
-# --- Networking / Entrypoint ---
 EXPOSE 7860
-CMD ["python3", "app.py"]

 # ===============================
+# BackgroundFX Pro — Dockerfile (Hardened for Spaces GPU)
 # ===============================
+# Match PyTorch cu121 wheels (critical to avoid CUDA probe stalls)
+FROM nvidia/cuda:12.1.1-cudnn8-runtime-ubuntu22.04
+# --- Build args (optional pins) ---
 ARG SAM2_SHA=__PIN_ME__
 ARG MATANYONE_SHA=__PIN_ME__
 ARG SAM2_MODEL_ID=facebook/sam2
+ARG SAM2_VARIANT=sam2_hiera_large
 ARG MATANY_REPO_ID=PeiqingYang/MatAnyone
 ARG MATANY_FILENAME=matanyone_v1.0.pth
+# --- Non-root user (HF expects uid 1000) ---
 RUN useradd -m -u 1000 user
 ENV HOME=/home/user
 ENV PATH=/home/user/.local/bin:$PATH
 WORKDIR /home/user/app
 # --- System packages ---
 USER root
 RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
     git ffmpeg python3 python3-pip python3-venv \
+    wget curl ca-certificates \
     libgl1-mesa-glx libglib2.0-0 libsm6 libxext6 libxrender-dev libgomp1 \
  && rm -rf /var/lib/apt/lists/*
+# Caches (writable)
 RUN mkdir -p /data/.cache && chown -R user:user /data
 USER user
+# --- Python + Torch (cu121) ---
 RUN python3 -m pip install --no-cache-dir --upgrade pip
 RUN python3 -m pip install --no-cache-dir --index-url https://download.pytorch.org/whl/cu121 \
     torch==2.3.1 torchvision==0.18.1 torchaudio==2.3.1
+# --- App deps ---
 COPY --chown=user requirements.txt ./requirements.txt
 RUN python3 -m pip install --no-cache-dir -r requirements.txt
+# Optional nice fallback
 RUN python3 -m pip install --no-cache-dir mediapipe==0.10.14
+# --- Third-party repos (build-time, never at runtime) ---
 RUN mkdir -p third_party
+# SAM2 (shallow clone; optional SHA pin)
+RUN git clone --depth=1 https://github.com/facebookresearch/segment-anything-2.git third_party/sam2 && \
     cd third_party/sam2 && \
+    if [ "${SAM2_SHA}" != "__PIN_ME__" ]; then git fetch --depth=1 origin ${SAM2_SHA} && git checkout ${SAM2_SHA}; fi
+# Show what we got
+RUN echo "=== DEBUG: SAM2 contents ===" && \
     ls -la third_party/sam2/ && \
     echo "=== DEBUG: SAM2 configs ===" && \
+    (ls -la third_party/sam2/configs/sam2/ || echo "configs missing")
+# Install SAM2 (editable ok)
 RUN cd third_party/sam2 && python3 -m pip install --no-cache-dir -e .
+# MatAnyone (pq-yang fork per your setup)
+RUN git clone --depth=1 https://github.com/pq-yang/MatAnyone.git third_party/matanyone && \
     cd third_party/matanyone && \
+    if [ "${MATANYONE_SHA}" != "__PIN_ME__" ]; then git fetch --depth=1 origin ${MATANYONE_SHA} && git checkout ${MATANYONE_SHA}; fi
+# Install MatAnyone requirements if present
 RUN cd third_party/matanyone && \
     if [ -f requirements.txt ]; then python3 -m pip install --no-cache-dir -r requirements.txt; fi
+# --- App code last (so code changes don't invalidate heavy layers) ---
 COPY --chown=user . /home/user/app
+# Verify clone not overwritten by COPY
+RUN echo "=== DEBUG: After COPY ===" && \
     ls -la third_party/sam2/ && \
+    (ls -la third_party/sam2/configs/sam2/ || echo "SAM2 configs missing")
+# --- Runtime environment ---
 ENV PYTHONUNBUFFERED=1 \
+    OMP_NUM_THREADS=1 OPENBLAS_NUM_THREADS=1 MKL_NUM_THREADS=1 NUMEXPR_NUM_THREADS=1 \
     TOKENIZERS_PARALLELISM=false \
     HF_HOME=/data/.cache/huggingface \
     TORCH_HOME=/data/.cache/torch \
     MPLCONFIGDIR=/data/.cache/matplotlib \
     PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:128 \
     PYTHONPATH="$PYTHONPATH:/home/user/app/third_party/sam2:/home/user/app/third_party/matanyone" \
     FFMPEG_BIN=ffmpeg \
     THIRD_PARTY_SAM2_DIR=/home/user/app/third_party/sam2 \
     THIRD_PARTY_MATANY_DIR=/home/user/app/third_party/matanyone \
     SAM2_MODEL_CFG="configs/sam2/sam2_hiera_l.yaml" \
     SAM2_CHECKPOINT="" \
     MATANY_REPO_ID=PeiqingYang/MatAnyone \
     MATANY_CHECKPOINT="" \
     ENABLE_MATANY=1
+# Do NOT set PORT here; Spaces injects it.
 EXPOSE 7860
+# Optional: basic health check to see if the server bound
+HEALTHCHECK --interval=30s --timeout=5s --retries=5 CMD wget -qO- "http://127.0.0.1:${PORT:-7860}/" || exit 1
+# Use exec form + unbuffered
+CMD ["python3","-u","app.py"]

app.py CHANGED Viewed

@@ -1,28 +1,21 @@
-# app.py
 #!/usr/bin/env python3
 """
-BackgroundFX Pro - Gradio App (dynamic, GPU-ready, no hard-coded checkpoints)
-=============================================================================
-- Uses pipeline.process() which orchestrates:
-  SAM2 first-frame segmentation → MatAnyone temporal matting → compositing
-- Robust fallbacks (MediaPipe / GrabCut; static-mask compositing)
-- Diagnostics JSON shows which engines ran and on which device
-- All paths/devices set by environment variables (see pipeline.py header)
 """
 import os
-import json
 import logging
 import subprocess
-from pathlib import Path
-from typing import Optional, Tuple
 import gradio as gr
-# --------------------------------------------------------------------------------------
-# Early GPU/perf diagnostics (IMPORT FIRST so logs show even if pipeline import fails)
-# --------------------------------------------------------------------------------------
 logger = logging.getLogger("backgroundfx_pro")
 if not logger.handlers:
     h = logging.StreamHandler()
@@ -30,116 +23,90 @@
     logger.addHandler(h)
 logger.setLevel(logging.INFO)
-# Try to load perf_tuning (forces CUDA or warns; sets cuDNN/TF32; logs banner)
 try:
     import perf_tuning  # noqa: F401
     logger.info("perf_tuning imported successfully.")
 except Exception as e:
-    logger.warning(f"perf_tuning not loaded: {e}")
-def _log_gpu_diag():
-    # Torch info
     try:
         import torch
-        logger.info(f"torch.__version__={torch.__version__} | torch.version.cuda={getattr(torch.version, 'cuda', None)}")
-        logger.info(f"torch.cuda.is_available()={torch.cuda.is_available()}")
-        if torch.cuda.is_available():
             try:
                 idx = torch.cuda.current_device()
                 name = torch.cuda.get_device_name(idx)
                 cap = torch.cuda.get_device_capability(idx)
-                logger.info(f"Current CUDA device: {idx} | {name} | cc {cap[0]}.{cap[1]}")
             except Exception as e:
-                logger.info(f"CUDA device query failed: {e}")
-    except Exception as e:
-        logger.warning(f"Could not import torch for GPU diag: {e}")
-    # nvidia-smi
-    try:
-        out = subprocess.run(["nvidia-smi", "-L"], capture_output=True, text=True)
-        if out.returncode == 0:
-            logger.info("nvidia-smi -L:\n" + out.stdout.strip())
-        else:
-            logger.warning("nvidia-smi -L failed or unavailable.")
     except Exception as e:
-        logger.warning(f"nvidia-smi not runnable: {e}")
-_log_gpu_diag()
-# --------------------------------------------------------------------------------------
-# Local pipeline
-# --------------------------------------------------------------------------------------
-import pipeline
-def _process_entry(video, bg_image, point_x, point_y, auto_box, progress=gr.Progress(track_tqdm=True)):
-    """
-    Gradio wrapper → returns (video_path, diagnostics_json_str)
-    """
-    if video is None or bg_image is None:
-        return None, json.dumps({"error": "Please provide both a video and a background image."}, indent=2)
-    # Gradio can pass dict-like objects for file with 'name' key, normalize to path
-    vpath = video if isinstance(video, (str, Path)) else getattr(video, "name", None) or video.get("name")
-    bpath = bg_image if isinstance(bg_image, (str, Path)) else getattr(bg_image, "name", None) or bg_image.get("name")
-    progress(0.05, desc="Starting…")
-    out_path, diag = pipeline.process(
-        video_path=vpath,
-        bg_image_path=bpath,
-        point_x=point_x if point_x not in (None, "") else None,
-        point_y=point_y if point_y not in (None, "") else None,
-        auto_box=bool(auto_box),
-        work_dir=None  # pipeline will create a temp dir
-    )
-    progress(0.95, desc="Finalizing…")
-    return (out_path if out_path else None), json.dumps(diag, indent=2)
-with gr.Blocks(title="BackgroundFX Pro (SAM2 + MatAnyone)", theme=gr.themes.Soft()) as demo:
-    gr.Markdown(
-        """
-        # 🎬 BackgroundFX Pro
-        **SAM2 + MatAnyone** with robust fallbacks. All configs/devices are dynamic via environment variables.
-        - Upload a video and a background image.
-        - Optionally provide a foreground point (x, y) in pixels for the first frame **or** tick *Auto subject box*.
-        - Click **Process**. The app will try SAM2 → MatAnyone; if anything fails, it falls back automatically.
-        """
-    )
-    with gr.Row():
-        with gr.Column(scale=2):
-            in_video = gr.Video(label="Input Video", sources=["upload"], interactive=True)
-            in_bg = gr.Image(label="Background Image", type="filepath", interactive=True)
-        with gr.Column(scale=1):
-            point_x = gr.Number(label="Foreground point X (optional)", value=None, precision=0)
-            point_y = gr.Number(label="Foreground point Y (optional)", value=None, precision=0)
-            auto_box = gr.Checkbox(label="Auto subject box (ignore point)", value=True)
-            process_btn = gr.Button("Process", variant="primary")
-    with gr.Row():
-        out_video = gr.Video(label="Output (H.264 MP4)")
-        out_diag = gr.JSON(label="Diagnostics")
-    def _on_click(video, bg, px, py, auto):
-        v, d = _process_entry(video, bg, px, py, auto)
-        try:
-            d_dict = json.loads(d)
-        except Exception:
-            d_dict = {"raw": d}
-        return v, d_dict
-    process_btn.click(
-        _on_click,
-        inputs=[in_video, in_bg, point_x, point_y, auto_box],
-        outputs=[out_video, out_diag]
-    )
 if __name__ == "__main__":
-    # Dynamic host/port via env; suitable defaults for Hugging Face Spaces
     host = os.environ.get("HOST", "0.0.0.0")
     port = int(os.environ.get("PORT", "7860"))
-    # NOTE: gradio>=5 removed concurrency_count; use max_size only
-    demo.queue(max_size=16).launch(server_name=host, server_port=port, show_error=True)

 #!/usr/bin/env python3
 """
+BackgroundFX Pro — App Entrypoint (UI separated)
+- UI is built in ui.py (create_interface)
+- Hardened startup: heartbeat, safe diag, bind to $PORT
 """
 import os
 import logging
+import threading
+import time
 import subprocess
 import gradio as gr
+# -----------------------------------------------------------------------------
+# Logging early
+# -----------------------------------------------------------------------------
 logger = logging.getLogger("backgroundfx_pro")
 if not logger.handlers:
     h = logging.StreamHandler()
     logger.addHandler(h)
 logger.setLevel(logging.INFO)
+# Heartbeat so logs never go silent during startup/imports
+def _heartbeat():
+    i = 0
+    while True:
+        i += 1
+        print(f"[startup-heartbeat] {i*5}s…", flush=True)
+        time.sleep(5)
+threading.Thread(target=_heartbeat, daemon=True).start()
+# -----------------------------------------------------------------------------
+# Safe, minimal startup diagnostics (no long CUDA probes)
+# -----------------------------------------------------------------------------
+def _safe_startup_diag():
+    # Torch version only; defer CUDA availability checks to post-launch
+    try:
+        import torch  # noqa: F401
+        import importlib
+        t = importlib.import_module("torch")
+        logger.info("torch imported: %s | torch.version.cuda=%s",
+                    getattr(t, "__version__", "?"),
+                    getattr(getattr(t, "version", None), "cuda", None))
+    except Exception as e:
+        logger.warning("Torch not available at startup: %s", e)
+    # nvidia-smi with short timeout (avoid indefinite block)
+    try:
+        out = subprocess.run(["nvidia-smi", "-L"], capture_output=True, text=True, timeout=2)
+        if out.returncode == 0:
+            logger.info("nvidia-smi -L:\n%s", out.stdout.strip())
+        else:
+            logger.warning("nvidia-smi -L failed or unavailable (rc=%s).", out.returncode)
+    except subprocess.TimeoutExpired:
+        logger.warning("nvidia-smi -L timed out (skipping).")
+    except Exception as e:
+        logger.warning("nvidia-smi not runnable: %s", e)
+# Optional perf tuning; never block startup
 try:
     import perf_tuning  # noqa: F401
     logger.info("perf_tuning imported successfully.")
 except Exception as e:
+    logger.warning("perf_tuning not loaded: %s", e)
+_safe_startup_diag()
+# -----------------------------------------------------------------------------
+# Post-launch CUDA diag in background (so it never blocks binding the port)
+# -----------------------------------------------------------------------------
+def _post_launch_diag():
     try:
         import torch
+        try:
+            avail = torch.cuda.is_available()
+        except Exception as e:
+            logger.warning("torch.cuda.is_available() failed: %s", e)
+            avail = False
+        logger.info("CUDA available: %s", avail)
+        if avail:
             try:
                 idx = torch.cuda.current_device()
                 name = torch.cuda.get_device_name(idx)
                 cap = torch.cuda.get_device_capability(idx)
+                logger.info("CUDA device %d: %s (cc %d.%d)", idx, name, cap[0], cap[1])
             except Exception as e:
+                logger.warning("CUDA device query failed: %s", e)
     except Exception as e:
+        logger.warning("Post-launch torch diag failed: %s", e)
+# -----------------------------------------------------------------------------
+# Build UI (in separate module) and launch
+# -----------------------------------------------------------------------------
+def build_ui() -> gr.Blocks:
+    # Import here so any heavy imports inside ui.py (it shouldn’t) would show up after logs are configured
+    from ui import create_interface
+    return create_interface()
 if __name__ == "__main__":
     host = os.environ.get("HOST", "0.0.0.0")
     port = int(os.environ.get("PORT", "7860"))
+    logger.info("Launching Gradio on %s:%s …", host, port)
+    demo = build_ui()
+    demo.queue(max_size=16)
+    threading.Thread(target=_post_launch_diag, daemon=True).start()
+    demo.launch(server_name=host, server_port=port, show_error=True)

models/__init__.py CHANGED Viewed

@@ -1,9 +1,10 @@
 #!/usr/bin/env python3
 """
-BackgroundFX Pro - Model Loading & Utilities
-===========================================
-Contains all model loading, inference functions, and utility functions
-moved from the main pipeline for better organization.
 """
 from __future__ import annotations
@@ -19,12 +20,24 @@
 import numpy as np
 import yaml
-import torch  # For memory management and CUDA operations
 # --------------------------------------------------------------------------------------
-# Logging
 # --------------------------------------------------------------------------------------
 logger = logging.getLogger("backgroundfx_pro")
 # --------------------------------------------------------------------------------------
 # Optional dependencies
@@ -38,35 +51,40 @@
 # --------------------------------------------------------------------------------------
 # Path setup for third_party repos
 # --------------------------------------------------------------------------------------
-ROOT = Path(__file__).resolve().parent.parent  # Go up from models/ to project root
 TP_SAM2 = Path(os.environ.get("THIRD_PARTY_SAM2_DIR", ROOT / "third_party" / "sam2")).resolve()
 TP_MATANY = Path(os.environ.get("THIRD_PARTY_MATANY_DIR", ROOT / "third_party" / "matanyone")).resolve()
 def _add_sys_path(p: Path) -> None:
-    p_str = str(p)
-    if p_str not in sys.path:
-        sys.path.insert(0, p_str)
 _add_sys_path(TP_SAM2)
 _add_sys_path(TP_MATANY)
 # --------------------------------------------------------------------------------------
-# Basic Utilities
 # --------------------------------------------------------------------------------------
-def _ffmpeg_bin() -> str:
-    return os.environ.get("FFMPEG_BIN", "ffmpeg")
-def _probe_ffmpeg() -> bool:
     try:
-        subprocess.run([_ffmpeg_bin(), "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
-        return True
-    except Exception:
-        return False
 def _has_cuda() -> bool:
     try:
-        return torch.cuda.is_available()
-    except Exception:
         return False
 def _pick_device(env_key: str) -> str:
@@ -75,6 +93,19 @@ def _pick_device(env_key: str) -> str:
         return requested
     return "cuda" if _has_cuda() else "cpu"
 def _ensure_dir(p: Path) -> None:
     p.mkdir(parents=True, exist_ok=True)
@@ -141,7 +172,6 @@ def _mux_audio(src_video: Union[str, Path], silent_video: Union[str, Path], out_
 # Compositing & Image Processing
 # --------------------------------------------------------------------------------------
 def _refine_alpha(alpha: np.ndarray, erode_px: int = 1, dilate_px: int = 2, blur_px: float = 1.5) -> np.ndarray:
-    """Erode→dilate + gentle blur → float alpha in [0,1]."""
     if alpha.dtype != np.float32:
         a = alpha.astype(np.float32)
         if a.max() > 1.0:
@@ -173,7 +203,6 @@ def _to_srgb(lin: np.ndarray, gamma: float = 2.2) -> np.ndarray:
     return np.clip(np.power(x, 1.0 / gamma) * 255.0, 0, 255).astype(np.uint8)
 def _light_wrap(bg_rgb: np.ndarray, alpha01: np.ndarray, radius: int = 5, amount: float = 0.18) -> np.ndarray:
-    """Simple light wrap from background into subject edges."""
     r = max(1, int(radius))
     inv = 1.0 - alpha01
     inv_blur = cv2.GaussianBlur(inv, (r | 1, r | 1), 0)
@@ -181,8 +210,7 @@ def _light_wrap(bg_rgb: np.ndarray, alpha01: np.ndarray, radius: int = 5, amount
     return lw
 def _despill_edges(fg_rgb: np.ndarray, alpha01: np.ndarray, amount: float = 0.35) -> np.ndarray:
-    """Reduce saturation in boundary band (alpha≈0.5) to remove old-background tint."""
-    w = 1.0 - 2.0 * np.abs(alpha01 - 0.5)  # bell-shaped weight
     w = np.clip(w, 0.0, 1.0)
     hsv = cv2.cvtColor(fg_rgb.astype(np.uint8), cv2.COLOR_RGB2HSV).astype(np.float32)
     H, S, V = cv2.split(hsv)
@@ -191,11 +219,11 @@ def _despill_edges(fg_rgb: np.ndarray, alpha01: np.ndarray, amount: float = 0.35
     out = cv2.cvtColor(hsv2.astype(np.uint8), cv2.COLOR_HSV2RGB)
     return out
-def _composite_frame_pro(fg_rgb: np.ndarray, alpha: np.ndarray, bg_rgb: np.ndarray,
-                         erode_px: int = None, dilate_px: int = None, blur_px: float = None,
-                         lw_radius: int = None, lw_amount: float = None,
-                         despill_amount: float = None) -> np.ndarray:
-    """Gamma-aware composite + edge refinement + light wrap + boundary de-spill."""
     erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
     dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
     blur_px   = blur_px   if blur_px   is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
@@ -203,17 +231,11 @@ def _composite_frame_pro(fg_rgb: np.ndarray, alpha: np.ndarray, bg_rgb: np.ndarr
     lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
     despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
-    # refine alpha [0,1]
     a = _refine_alpha(alpha, erode_px=erode_px, dilate_px=dilate_px, blur_px=blur_px)
-    # edge de-spill: temper saturation where a≈0.5
     fg_rgb = _despill_edges(fg_rgb, a, amount=despill_amount)
-    # linearize for better blending
     fg_lin = _to_linear(fg_rgb)
     bg_lin = _to_linear(bg_rgb)
-    # light wrap
     lw = _light_wrap(bg_rgb, a, radius=lw_radius, amount=lw_amount)
     lw_lin = _to_linear(np.clip(lw, 0, 255).astype(np.uint8))
@@ -233,30 +255,27 @@ def _resolve_sam2_cfg(cfg_str: str) -> str:
             return str(candidate)
     if cfg_path.exists():
         return str(cfg_path)
-    # Last resort: common defaults inside the repo
     for name in ["configs/sam2/sam2_hiera_l.yaml", "configs/sam2/sam2_hiera_b.yaml", "configs/sam2/sam2_hiera_s.yaml"]:
         p = TP_SAM2 / name
         if p.exists():
             return str(p)
-    return str(cfg_str)  # let build_sam2 raise a clear error
 def _find_hiera_config_if_hieradet(cfg_path: str) -> Optional[str]:
     """If config references 'hieradet', try to find a 'hiera' config."""
     try:
         with open(cfg_path, "r") as f:
             data = yaml.safe_load(f)
-        target = None
-        model = data.get("model", {})
-        enc = (model.get("image_encoder") or {})
-        trunk = (enc.get("trunk") or {})
         target = trunk.get("_target_") or trunk.get("target")
         if isinstance(target, str) and "hieradet" in target:
             for y in TP_SAM2.rglob("*.yaml"):
                 try:
                     with open(y, "r") as f2:
-                        d2 = yaml.safe_load(f2)
-                    m2 = (d2 or {}).get("model", {})
-                    e2 = (m2.get("image_encoder") or {})
                     t2 = (e2.get("trunk") or {})
                     tgt2 = t2.get("_target_") or t2.get("target")
                     if isinstance(tgt2, str) and ".hiera." in tgt2:
@@ -313,7 +332,7 @@ def _try_build(cfg_path: str):
     try:
         try:
             sam = _try_build(cfg)
-        except Exception as e1:
             alt_cfg = _find_hiera_config_if_hieradet(cfg)
             if alt_cfg:
                 logger.info(f"SAM2: retrying with alt config: {alt_cfg}")
@@ -426,7 +445,6 @@ def load_matany() -> Tuple[Optional[object], bool, Dict[str, Any]]:
     repo_id = os.environ.get("MATANY_REPO_ID", "")
     ckpt = os.environ.get("MATANY_CHECKPOINT", "")
-    # Check if this fork needs a prebuilt network
     try:
         sig = inspect.signature(InferenceCore)
         if "network" in sig.parameters and sig.parameters["network"].default is inspect._empty:
@@ -656,7 +674,6 @@ def fallback_composite(video_path: Union[str, Path],
 # Stage-A (Transparent Export) Functions
 # --------------------------------------------------------------------------------------
 def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
-    """RGB checkerboard for preview when no real alpha is possible."""
     y, x = np.mgrid[0:h, 0:w]
     c = ((x // tile) + (y // tile)) % 2
     a = np.where(c == 0, 200, 150).astype(np.uint8)
@@ -670,7 +687,6 @@ def _build_stage_a_rgba_vp9_from_fg_alpha(
     size: Tuple[int, int],
     src_audio: Optional[Union[str, Path]] = None,
 ) -> bool:
-    """Merge FG+ALPHA → RGBA WebM (VP9 with alpha)."""
     if not _probe_ffmpeg():
         return False
     w, h = size
@@ -702,7 +718,6 @@ def _build_stage_a_rgba_vp9_from_mask(
     fps: int,
     size: Tuple[int, int],
 ) -> bool:
-    """Merge original video + static mask → RGBA WebM (VP9 with alpha)."""
     if not _probe_ffmpeg():
         return False
     w, h = size
@@ -733,7 +748,6 @@ def _build_stage_a_checkerboard_from_fg_alpha(
     fps: int,
     size: Tuple[int, int],
 ) -> bool:
-    """Preview: FG+ALPHA over checkerboard → MP4 (no real alpha)."""
     fg_cap = cv2.VideoCapture(str(fg_path))
     al_cap = cv2.VideoCapture(str(alpha_path))
     if not fg_cap.isOpened() or not al_cap.isOpened():
@@ -766,7 +780,6 @@ def _build_stage_a_checkerboard_from_mask(
     fps: int,
     size: Tuple[int, int],
 ) -> bool:
-    """Preview: original video + static mask over checkerboard → MP4."""
     cap = cv2.VideoCapture(str(video_path))
     if not cap.isOpened():
         return False
@@ -790,4 +803,4 @@ def _build_stage_a_checkerboard_from_mask(
     finally:
         cap.release()
         writer.release()
-    return ok_any

 #!/usr/bin/env python3
 """
+BackgroundFX Pro - Model Loading & Utilities (Hardened)
+======================================================
+- Avoids heavy CUDA/Hydra work at import time
+- Adds timeouts to subprocess probes
+- Safer sys.path wiring for third_party repos
 """
 from __future__ import annotations
 import numpy as np
 import yaml
 # --------------------------------------------------------------------------------------
+# Logging (ensure a handler exists very early)
 # --------------------------------------------------------------------------------------
 logger = logging.getLogger("backgroundfx_pro")
+if not logger.handlers:
+    _h = logging.StreamHandler()
+    _h.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s"))
+    logger.addHandler(_h)
+logger.setLevel(logging.INFO)
+# Pin OpenCV threads (helps libgomp stability in Spaces)
+try:
+    cv_threads = int(os.environ.get("CV_THREADS", "1"))
+    if hasattr(cv2, "setNumThreads"):
+        cv2.setNumThreads(cv_threads)
+except Exception:
+    pass
 # --------------------------------------------------------------------------------------
 # Optional dependencies
 # --------------------------------------------------------------------------------------
 # Path setup for third_party repos
 # --------------------------------------------------------------------------------------
+ROOT = Path(__file__).resolve().parent.parent  # project root
 TP_SAM2 = Path(os.environ.get("THIRD_PARTY_SAM2_DIR", ROOT / "third_party" / "sam2")).resolve()
 TP_MATANY = Path(os.environ.get("THIRD_PARTY_MATANY_DIR", ROOT / "third_party" / "matanyone")).resolve()
 def _add_sys_path(p: Path) -> None:
+    if p.exists():
+        p_str = str(p)
+        if p_str not in sys.path:
+            sys.path.insert(0, p_str)
+    else:
+        logger.warning(f"third_party path not found: {p}")
 _add_sys_path(TP_SAM2)
 _add_sys_path(TP_MATANY)
 # --------------------------------------------------------------------------------------
+# Safe Torch accessors (no top-level import)
 # --------------------------------------------------------------------------------------
+def _torch():
     try:
+        import torch  # local import avoids early CUDA init during module import
+        return torch
+    except Exception as e:
+        logger.warning(f"[models.safe-torch] import failed: {e}")
+        return None
 def _has_cuda() -> bool:
+    t = _torch()
+    if t is None:
+        return False
     try:
+        return bool(t.cuda.is_available())
+    except Exception as e:
+        logger.warning(f"[models.safe-torch] cuda.is_available() failed: {e}")
         return False
 def _pick_device(env_key: str) -> str:
         return requested
     return "cuda" if _has_cuda() else "cpu"
+# --------------------------------------------------------------------------------------
+# Basic Utilities
+# --------------------------------------------------------------------------------------
+def _ffmpeg_bin() -> str:
+    return os.environ.get("FFMPEG_BIN", "ffmpeg")
+def _probe_ffmpeg(timeout: int = 2) -> bool:
+    try:
+        subprocess.run([_ffmpeg_bin(), "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True, timeout=timeout)
+        return True
+    except Exception:
+        return False
 def _ensure_dir(p: Path) -> None:
     p.mkdir(parents=True, exist_ok=True)
 # Compositing & Image Processing
 # --------------------------------------------------------------------------------------
 def _refine_alpha(alpha: np.ndarray, erode_px: int = 1, dilate_px: int = 2, blur_px: float = 1.5) -> np.ndarray:
     if alpha.dtype != np.float32:
         a = alpha.astype(np.float32)
         if a.max() > 1.0:
     return np.clip(np.power(x, 1.0 / gamma) * 255.0, 0, 255).astype(np.uint8)
 def _light_wrap(bg_rgb: np.ndarray, alpha01: np.ndarray, radius: int = 5, amount: float = 0.18) -> np.ndarray:
     r = max(1, int(radius))
     inv = 1.0 - alpha01
     inv_blur = cv2.GaussianBlur(inv, (r | 1, r | 1), 0)
     return lw
 def _despill_edges(fg_rgb: np.ndarray, alpha01: np.ndarray, amount: float = 0.35) -> np.ndarray:
+    w = 1.0 - 2.0 * np.abs(alpha01 - 0.5)
     w = np.clip(w, 0.0, 1.0)
     hsv = cv2.cvtColor(fg_rgb.astype(np.uint8), cv2.COLOR_RGB2HSV).astype(np.float32)
     H, S, V = cv2.split(hsv)
     out = cv2.cvtColor(hsv2.astype(np.uint8), cv2.COLOR_HSV2RGB)
     return out
+def _composite_frame_pro(
+    fg_rgb: np.ndarray, alpha: np.ndarray, bg_rgb: np.ndarray,
+    erode_px: int = None, dilate_px: int = None, blur_px: float = None,
+    lw_radius: int = None, lw_amount: float = None, despill_amount: float = None
+) -> np.ndarray:
     erode_px = erode_px if erode_px is not None else int(os.environ.get("EDGE_ERODE", "1"))
     dilate_px = dilate_px if dilate_px is not None else int(os.environ.get("EDGE_DILATE", "2"))
     blur_px   = blur_px   if blur_px   is not None else float(os.environ.get("EDGE_BLUR", "1.5"))
     lw_amount = lw_amount if lw_amount is not None else float(os.environ.get("LIGHTWRAP_AMOUNT", "0.18"))
     despill_amount = despill_amount if despill_amount is not None else float(os.environ.get("DESPILL_AMOUNT", "0.35"))
     a = _refine_alpha(alpha, erode_px=erode_px, dilate_px=dilate_px, blur_px=blur_px)
     fg_rgb = _despill_edges(fg_rgb, a, amount=despill_amount)
     fg_lin = _to_linear(fg_rgb)
     bg_lin = _to_linear(bg_rgb)
     lw = _light_wrap(bg_rgb, a, radius=lw_radius, amount=lw_amount)
     lw_lin = _to_linear(np.clip(lw, 0, 255).astype(np.uint8))
             return str(candidate)
     if cfg_path.exists():
         return str(cfg_path)
     for name in ["configs/sam2/sam2_hiera_l.yaml", "configs/sam2/sam2_hiera_b.yaml", "configs/sam2/sam2_hiera_s.yaml"]:
         p = TP_SAM2 / name
         if p.exists():
             return str(p)
+    return str(cfg_str)
 def _find_hiera_config_if_hieradet(cfg_path: str) -> Optional[str]:
     """If config references 'hieradet', try to find a 'hiera' config."""
     try:
         with open(cfg_path, "r") as f:
             data = yaml.safe_load(f)
+        model = data.get("model", {}) or {}
+        enc = model.get("image_encoder") or {}
+        trunk = enc.get("trunk") or {}
         target = trunk.get("_target_") or trunk.get("target")
         if isinstance(target, str) and "hieradet" in target:
             for y in TP_SAM2.rglob("*.yaml"):
                 try:
                     with open(y, "r") as f2:
+                        d2 = yaml.safe_load(f2) or {}
+                    e2 = (d2.get("model", {}) or {}).get("image_encoder") or {}
                     t2 = (e2.get("trunk") or {})
                     tgt2 = t2.get("_target_") or t2.get("target")
                     if isinstance(tgt2, str) and ".hiera." in tgt2:
     try:
         try:
             sam = _try_build(cfg)
+        except Exception:
             alt_cfg = _find_hiera_config_if_hieradet(cfg)
             if alt_cfg:
                 logger.info(f"SAM2: retrying with alt config: {alt_cfg}")
     repo_id = os.environ.get("MATANY_REPO_ID", "")
     ckpt = os.environ.get("MATANY_CHECKPOINT", "")
     try:
         sig = inspect.signature(InferenceCore)
         if "network" in sig.parameters and sig.parameters["network"].default is inspect._empty:
 # Stage-A (Transparent Export) Functions
 # --------------------------------------------------------------------------------------
 def _checkerboard_bg(w: int, h: int, tile: int = 32) -> np.ndarray:
     y, x = np.mgrid[0:h, 0:w]
     c = ((x // tile) + (y // tile)) % 2
     a = np.where(c == 0, 200, 150).astype(np.uint8)
     size: Tuple[int, int],
     src_audio: Optional[Union[str, Path]] = None,
 ) -> bool:
     if not _probe_ffmpeg():
         return False
     w, h = size
     fps: int,
     size: Tuple[int, int],
 ) -> bool:
     if not _probe_ffmpeg():
         return False
     w, h = size
     fps: int,
     size: Tuple[int, int],
 ) -> bool:
     fg_cap = cv2.VideoCapture(str(fg_path))
     al_cap = cv2.VideoCapture(str(alpha_path))
     if not fg_cap.isOpened() or not al_cap.isOpened():
     fps: int,
     size: Tuple[int, int],
 ) -> bool:
     cap = cv2.VideoCapture(str(video_path))
     if not cap.isOpened():
         return False
     finally:
         cap.release()
         writer.release()
+    return ok_any

perf_tuning.py CHANGED Viewed

@@ -1,8 +1,10 @@
-# perf_tuning.py
 #!/usr/bin/env python3
 """
-Forces CUDA use (or fails fast), configures cuDNN/TF32, and logs a clear GPU banner.
-Loaded automatically because pipeline.py does: `import perf_tuning` (best-effort).
 """
 import os
@@ -15,59 +17,107 @@
     log.addHandler(h)
 log.setLevel(logging.INFO)
-try:
-    import torch
-except Exception as e:
-    raise RuntimeError(f"PyTorch not importable: {e}")
-require_cuda = os.environ.get("REQUIRE_CUDA", "0").strip() == "1"
-force_idx_env = os.environ.get("FORCE_CUDA_DEVICE", "").strip()
-mem_frac = float(os.environ.get("CUDA_MEMORY_FRACTION", "0.98"))
-if not torch.cuda.is_available():
-    if require_cuda:
-        raise RuntimeError("CUDA is NOT available, but REQUIRE_CUDA=1. "
-                           "Make sure the Space is on GPU and the container runs with --gpus all.")
-    else:
-        log.warning("CUDA not available; running on CPU. Set REQUIRE_CUDA=1 to fail fast.")
 else:
-    # Choose device
     try:
-        idx = int(force_idx_env) if force_idx_env != "" else 0
-    except Exception:
-        idx = 0
-    if idx >= torch.cuda.device_count() or idx < 0:
-        idx = 0
-    torch.cuda.set_device(idx)
-    # Perf knobs
-    try:
-        torch.backends.cuda.matmul.allow_tf32 = True
-    except Exception:
-        pass
-    try:
-        torch.backends.cudnn.allow_tf32 = True
-        torch.backends.cudnn.benchmark = True
-    except Exception:
-        pass
-    # Reserve VRAM fraction (best effort)
-    try:
-        torch.cuda.set_per_process_memory_fraction(mem_frac, idx)
-    except Exception:
-        pass
-    # Log a clear banner
-    try:
-        name = torch.cuda.get_device_name(idx)
-        cap = torch.cuda.get_device_capability(idx)
-        total_gb = torch.cuda.get_device_properties(idx).total_memory / (1024**3)
-        free_gb = torch.cuda.mem_get_info()[0] / (1024**3)
-        log.info(f"Using CUDA device {idx}: {name} | cc {cap[0]}.{cap[1]} | "
-                 f"VRAM {total_gb:.2f} GB (free ~{free_gb:.2f} GB) | TF32:ON | cuDNN benchmark:ON")
-    except Exception:
-        log.info("Using CUDA; device info unavailable (but cuda.is_available()==True).")
 # Optional: limit OpenCV threads if provided
 threads = os.environ.get("OPENCV_NUM_THREADS")

 #!/usr/bin/env python3
 """
+perf_tuning.py (Hardened)
+- No hard CUDA touching at import time (prevents startup hangs on Spaces).
+- Optional "strict" modes via env if you *really* want fail-fast behavior.
+- Applies safe flags (TF32/cudnn.benchmark) best-effort.
+- Short, defensive GPU banner (only if explicitly enabled).
 """
 import os
     log.addHandler(h)
 log.setLevel(logging.INFO)
+# ---- Feature flags (env) -----------------------------------------------------
+DISABLED                 = os.getenv("PERF_TUNING_DISABLED", "0").strip() == "1"
+STRICT_IMPORT_FAIL       = os.getenv("PERF_TUNING_IMPORT_STRICT", "0").strip() == "1"   # if 1, may raise on import
+EAGER_CUDA               = os.getenv("PERF_TUNING_EAGER_CUDA", "0").strip() == "1"      # if 1, do CUDA probing now
+REQUIRE_CUDA             = os.getenv("REQUIRE_CUDA", "0").strip() == "1"               # prefer not to fail at import
+FORCE_IDX_ENV            = os.getenv("FORCE_CUDA_DEVICE", "").strip()
+MEM_FRAC_STR             = os.getenv("CUDA_MEMORY_FRACTION", "0.98").strip()
+if DISABLED:
+    log.info("perf_tuning: disabled by PERF_TUNING_DISABLED=1")
 else:
+    # Import torch defensively (do NOT crash the app if it's not there)
     try:
+        import importlib
+        torch = importlib.import_module("torch")
+    except Exception as e:
+        msg = f"perf_tuning: PyTorch not importable at import-time: {e}"
+        if STRICT_IMPORT_FAIL:
+            raise RuntimeError(msg)
+        else:
+            log.warning(msg)
+            torch = None
+    def _bool_cuda_available():
+        if torch is None:
+            return False
+        try:
+            return bool(torch.cuda.is_available())
+        except Exception as e:
+            log.warning(f"perf_tuning: cuda.is_available() failed: {e}")
+            return False
+    # Soft gating: if user *requires* CUDA, set a marker we can read later
+    if REQUIRE_CUDA and not _bool_cuda_available():
+        os.environ["BFX_REQUIRE_CUDA_FAILED"] = "1"
+        msg = "CUDA NOT available but REQUIRE_CUDA=1 (will run on CPU unless app checks this later)."
+        if STRICT_IMPORT_FAIL:
+            raise RuntimeError(msg)
+        else:
+            log.warning(msg)
+    # Always try “cheap” flags that won’t touch devices
+    if torch is not None:
+        try:
+            # These do not require an active CUDA context
+            if hasattr(torch.backends, "cuda") and hasattr(torch.backends.cuda, "matmul"):
+                try:
+                    torch.backends.cuda.matmul.allow_tf32 = True
+                except Exception:
+                    pass
+            if hasattr(torch.backends, "cudnn"):
+                try:
+                    torch.backends.cudnn.allow_tf32 = True
+                    torch.backends.cudnn.benchmark = True
+                except Exception:
+                    pass
+        except Exception as e:
+            log.debug(f"perf_tuning: backend flags suppressed: {e}")
+    # Only do potentially blocking CUDA work if explicitly requested
+    if EAGER_CUDA and torch is not None:
+        try:
+            # Choose device (optional)
+            try:
+                idx = int(FORCE_IDX_ENV) if FORCE_IDX_ENV != "" else 0
+            except Exception:
+                idx = 0
+            try:
+                torch.cuda.set_device(idx)
+            except Exception as e:
+                log.warning(f"perf_tuning: set_device({idx}) failed: {e}")
+            # Memory fraction is optional and sometimes flaky—guard it
+            try:
+                mem_frac = float(MEM_FRAC_STR)
+                torch.cuda.set_per_process_memory_fraction(mem_frac, idx)
+            except Exception as e:
+                log.debug(f"perf_tuning: set_per_process_memory_fraction skipped: {e}")
+            # Best-effort banner; every call is wrapped so nothing blocks startup
+            try:
+                name = torch.cuda.get_device_name(idx)
+            except Exception as e:
+                name = f"? ({e})"
+            try:
+                cap = torch.cuda.get_device_capability(idx)
+                cap_s = f"{cap[0]}.{cap[1]}"
+            except Exception as e:
+                cap_s = f"? ({e})"
+            try:
+                total_gb = torch.cuda.get_device_properties(idx).total_memory / (1024**3)
+            except Exception as e:
+                total_gb = f"? ({e})"
+            try:
+                free_gb = torch.cuda.mem_get_info()[0] / (1024**3)
+            except Exception as e:
+                free_gb = f"? ({e})"
+            log.info(f"CUDA device {idx}: {name} | cc {cap_s} | VRAM {total_gb} GB (free ~{free_gb} GB) | TF32:ON | cuDNN benchmark:ON")
+        except Exception as e:
+            log.warning(f"perf_tuning: eager CUDA probe failed (non-fatal): {e}")
 # Optional: limit OpenCV threads if provided
 threads = os.environ.get("OPENCV_NUM_THREADS")

pipeline.py CHANGED Viewed

@@ -1,9 +1,11 @@
 #!/usr/bin/env python3
 """
-BackgroundFX Pro - Memory-Optimized Pipeline
-===========================================
-Orchestrates SAM2 → MatAnyone → Compositing with aggressive memory management.
-Models are loaded sequentially and freed immediately after use.
 """
 from __future__ import annotations
@@ -13,85 +15,128 @@
 import time
 import tempfile
 import logging
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union
-import torch
-from models import (
-    load_sam2, run_sam2_mask, load_matany, run_matany,
-    fallback_mask, fallback_composite, composite_video,
-    _cv_read_first_frame, _save_mask_png, _ensure_dir, _mux_audio, _probe_ffmpeg,
-    _refine_mask_grabcut, _build_stage_a_rgba_vp9_from_fg_alpha,
-    _build_stage_a_rgba_vp9_from_mask, _build_stage_a_checkerboard_from_fg_alpha,
-    _build_stage_a_checkerboard_from_mask
-)
-# Try to apply GPU/perf tuning early
-try:
-    import perf_tuning  # noqa: F401
-except Exception:
-    pass
 # --------------------------------------------------------------------------------------
 # Logging
 # --------------------------------------------------------------------------------------
 logger = logging.getLogger("backgroundfx_pro")
-logger.setLevel(logging.INFO)
 if not logger.handlers:
     _h = logging.StreamHandler()
     _h.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s"))
     logger.addHandler(_h)
 # --------------------------------------------------------------------------------------
-# Memory Management Utilities
 # --------------------------------------------------------------------------------------
 def _cleanup_temp_files(tmp_root: Path) -> None:
-    """Clean up temporary files aggressively"""
     try:
-        for pattern in ["*.tmp", "*.temp", "*.bak"]:
             for f in tmp_root.glob(pattern):
                 f.unlink(missing_ok=True)
     except Exception:
         pass
 def _log_memory() -> float:
-    """Log current GPU memory usage and return allocated GB"""
-    if torch.cuda.is_available():
-        try:
-            allocated = torch.cuda.memory_allocated() / 1e9
-            reserved = torch.cuda.memory_reserved() / 1e9
             logger.info(f"GPU memory: {allocated:.1f}GB allocated, {reserved:.1f}GB reserved")
-            return allocated
-        except Exception:
-            pass
     return 0.0
 def _force_cleanup() -> None:
-    """Aggressive memory cleanup"""
     try:
         gc.collect()
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.synchronize()
     except Exception as e:
-        logger.warning(f"Cleanup warning: {e}")
 # --------------------------------------------------------------------------------------
-# Main Processing Function (Memory-Optimized)
 # --------------------------------------------------------------------------------------
-def process(video_path: Union[str, Path],
-            bg_image_path: Union[str, Path],
-            point_x: Optional[float] = None,
-            point_y: Optional[float] = None,
-            auto_box: bool = False,
-            work_dir: Optional[Union[str, Path]] = None) -> Tuple[Optional[str], Dict[str, Any]]:
     """
     Memory-optimized orchestration: lazy loading, sequential model usage, aggressive cleanup.
     Flow:
-    1. Load SAM2 → get mask → FREE SAM2 immediately
-    2. Load MatAnyone → process → FREE MatAnyone immediately
-    3. Composite & finalize (CPU-based operations)
     """
     t0 = time.time()
     diagnostics: Dict[str, Any] = {
@@ -110,105 +155,130 @@ def process(video_path: Union[str, Path],
     tmp_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="bfx_"))
     _ensure_dir(tmp_root)
     try:
         # 0) Basic video info
-        logger.info("Reading video metadata...")
         first_frame, fps, (vw, vh) = _cv_read_first_frame(video_path)
         diagnostics["fps"] = int(fps or 25)
         diagnostics["resolution"] = [int(vw), int(vh)]
         if first_frame is None or vw == 0 or vh == 0:
             diagnostics["fallback_used"] = "invalid_video"
             return None, diagnostics
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
-        # 1) PHASE 1: SAM2 Loading & Processing → IMMEDIATE CLEANUP
-        logger.info("=== PHASE 1: Loading SAM2 for segmentation ===")
         predictor, sam2_ok, sam_meta = load_sam2()
-        diagnostics["sam2_meta"] = sam_meta
-        diagnostics["device_sam2"] = sam_meta.get("sam2_device") if sam_meta else None
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         seed_mask = None
         mask_png = tmp_root / "seed_mask.png"
         if sam2_ok and predictor is not None:
-            logger.info("Running SAM2 segmentation...")
             px = int(point_x) if point_x is not None else None
             py = int(point_y) if point_y is not None else None
             seed_mask, ok_mask = run_sam2_mask(
                 predictor, first_frame,
                 point=(px, py) if (px is not None and py is not None) else None,
                 auto=auto_box
             )
             diagnostics["sam2_ok"] = bool(ok_mask)
-            # CRITICAL: Free SAM2 immediately after getting the mask
-            logger.info("Freeing SAM2 memory...")
-            del predictor
-            predictor = None
-            _force_cleanup()
-            diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         else:
-            ok_mask = False
-            logger.info("SAM2 not available or failed to load")
         # Fallback mask generation if SAM2 failed
         if not ok_mask or seed_mask is None:
-            logger.info("Using fallback mask generation...")
             seed_mask = fallback_mask(first_frame)
             diagnostics["fallback_used"] = "mask_generation"
             _force_cleanup()
         # Optional GrabCut refinement
         if int(os.environ.get("REFINE_GRABCUT", "1")) == 1:
-            logger.info("Refining mask with GrabCut...")
             seed_mask = _refine_mask_grabcut(first_frame, seed_mask)
             _force_cleanup()
         _save_mask_png(seed_mask, mask_png)
-        # Clean up the first frame from memory
-        del first_frame
         _force_cleanup()
         _cleanup_temp_files(tmp_root)
-        # 2) PHASE 2: MatAnyone Loading & Processing → IMMEDIATE CLEANUP
-        logger.info("=== PHASE 2: Loading MatAnyone for temporal processing ===")
         matany, mat_ok, mat_meta = load_matany()
-        diagnostics["matany_meta"] = mat_meta
-        diagnostics["device_matany"] = mat_meta.get("matany_device") if mat_meta else None
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         fg_path, al_path = None, None
         out_dir = tmp_root / "matany_out"
         _ensure_dir(out_dir)
         if mat_ok and matany is not None:
-            logger.info("Running MatAnyone processing...")
             fg_path, al_path, ran = run_matany(matany, video_path, mask_png, out_dir)
             diagnostics["matany_ok"] = bool(ran)
-            # CRITICAL: Free MatAnyone immediately after processing
-            logger.info("Freeing MatAnyone memory...")
-            del matany
-            matany = None
-            _force_cleanup()
-            diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         else:
-            ran = False
-            logger.info("MatAnyone not available, disabled, or failed to load")
-        # 3) PHASE 3: Stage-A Creation (lightweight, CPU-based)
-        logger.info("=== PHASE 3: Building Stage-A (transparent export) ===")
         stageA_path = None
         stageA_ok = False
         if diagnostics["matany_ok"] and fg_path and al_path:
             stageA_path = tmp_root / "stageA_transparent.webm"
             if _probe_ffmpeg():
@@ -238,57 +308,56 @@ def process(video_path: Union[str, Path],
             else ("MP4 checkerboard preview (no real alpha)" if stageA_ok else "Stage-A build failed")
         )
-        # Optional: return Stage-A instead of final composite
         if os.environ.get("RETURN_STAGE_A", "0").strip() == "1" and stageA_ok:
             _force_cleanup()
             _cleanup_temp_files(tmp_root)
             return str(stageA_path), diagnostics
-        # 4) PHASE 4: Final Compositing (CPU-based, memory-efficient)
-        logger.info("=== PHASE 4: Creating final composite ===")
         output_path = tmp_root / "output.mp4"
         if diagnostics["matany_ok"] and fg_path and al_path:
-            logger.info("Compositing with MatAnyone outputs...")
             ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
             if not ok_comp:
-                logger.info("MatAnyone composite failed; falling back to static mask composite.")
                 fallback_composite(video_path, mask_png, bg_image_path, output_path)
                 diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
         else:
-            logger.info("Using static mask composite...")
             fallback_composite(video_path, mask_png, bg_image_path, output_path)
             diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") or "composite_static"
-        # Clean up intermediate files
         _cleanup_temp_files(tmp_root)
         _force_cleanup()
-        # 5) PHASE 5: Audio Muxing (final step)
-        logger.info("=== PHASE 5: Adding audio track ===")
         final_path = tmp_root / "output_with_audio.mp4"
         if _probe_ffmpeg():
             mux_ok = _mux_audio(video_path, output_path, final_path)
             if mux_ok:
-                # Clean up the silent version
                 output_path.unlink(missing_ok=True)
                 _force_cleanup()
                 diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
-                logger.info(f"Processing completed successfully in {diagnostics['elapsed_sec']}s")
-                logger.info(f"Peak GPU memory usage: {diagnostics['memory_peak_gb']:.1f}GB")
                 return str(final_path), diagnostics
-        # Final cleanup
         _force_cleanup()
         diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
-        logger.info(f"Processing completed in {diagnostics['elapsed_sec']}s (no audio)")
-        logger.info(f"Peak GPU memory usage: {diagnostics['memory_peak_gb']:.1f}GB")
         return str(output_path), diagnostics
     except Exception as e:
-        logger.error(f"Processing failed: {e}")
         import traceback
-        logger.error(f"Traceback: {traceback.format_exc()}")
         _force_cleanup()
         diagnostics["error"] = str(e)
         diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
@@ -297,4 +366,4 @@ def process(video_path: Union[str, Path],
     finally:
         # Ensure cleanup even if something goes wrong
         _force_cleanup()
-        _cleanup_temp_files(tmp_root)

 #!/usr/bin/env python3
 """
+BackgroundFX Pro - Memory-Optimized Pipeline (Hardened)
+======================================================
+- Lazy-imports heavy 'models' module to avoid Space boot stalls
+- Sequential load → run → free (SAM2 then MatAnyone)
+- Aggressive but non-blocking GPU cleanup (no synchronize())
+- Verbose breadcrumbs for pinpointing stalls
 """
 from __future__ import annotations
 import time
 import tempfile
 import logging
+import importlib
 from pathlib import Path
 from typing import Optional, Tuple, Dict, Any, Union
 # --------------------------------------------------------------------------------------
 # Logging
 # --------------------------------------------------------------------------------------
 logger = logging.getLogger("backgroundfx_pro")
 if not logger.handlers:
     _h = logging.StreamHandler()
     _h.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s"))
     logger.addHandler(_h)
+logger.setLevel(logging.INFO)
 # --------------------------------------------------------------------------------------
+# Safe Torch accessors (avoid import-time CUDA touches)
 # --------------------------------------------------------------------------------------
+def _torch():
+    try:
+        import torch  # local import to avoid early CUDA init in module scope
+        return torch
+    except Exception as e:
+        logger.warning(f"[safe-torch] import failed: {e}")
+        return None
+def _cuda_available() -> Optional[bool]:
+    t = _torch()
+    if t is None:
+        return None
+    try:
+        return t.cuda.is_available()
+    except Exception as e:
+        logger.warning(f"[safe-torch] torch.cuda.is_available() failed: {e}")
+        return None
+# --------------------------------------------------------------------------------------
+# Lightweight utilities
+# --------------------------------------------------------------------------------------
+def _ensure_dir(p: Union[str, Path]) -> None:
+    Path(p).mkdir(parents=True, exist_ok=True)
 def _cleanup_temp_files(tmp_root: Path) -> None:
+    """Clean up temporary files aggressively."""
     try:
+        for pattern in ("*.tmp", "*.temp", "*.bak"):
             for f in tmp_root.glob(pattern):
                 f.unlink(missing_ok=True)
     except Exception:
         pass
 def _log_memory() -> float:
+    """Best-effort GPU mem log (never block)."""
+    t = _torch()
+    if t is None:
+        return 0.0
+    try:
+        avail = _cuda_available()
+        if avail:
+            allocated = t.cuda.memory_allocated() / 1e9
+            reserved = t.cuda.memory_reserved() / 1e9
             logger.info(f"GPU memory: {allocated:.1f}GB allocated, {reserved:.1f}GB reserved")
+            return float(allocated)
+    except Exception as e:
+        logger.debug(f"[mem-log] suppressed: {e}")
     return 0.0
 def _force_cleanup() -> None:
+    """Aggressive memory cleanup (non-blocking)."""
     try:
         gc.collect()
+    except Exception:
+        pass
+    t = _torch()
+    if t is None:
+        return
+    try:
+        if _cuda_available():
+            # Avoid torch.cuda.synchronize() — can hang on driver issues
+            t.cuda.empty_cache()
+    except Exception as e:
+        logger.debug(f"[cleanup] suppressed: {e}")
+# --------------------------------------------------------------------------------------
+# Lazy import of heavy models module
+# --------------------------------------------------------------------------------------
+_models_ref = None
+def _models():
+    """Import 'models' only when needed to avoid startup stalls."""
+    global _models_ref
+    if _models_ref is not None:
+        return _models_ref
+    logger.info("[init] Importing models module lazily…")
+    try:
+        _models_ref = importlib.import_module("models")
+        logger.info("[init] models imported OK.")
+        return _models_ref
     except Exception as e:
+        logger.exception(f"[init] Failed to import models: {e}")
+        raise
 # --------------------------------------------------------------------------------------
+# Main Processing Function
 # --------------------------------------------------------------------------------------
+def process(
+    video_path: Union[str, Path],
+    bg_image_path: Union[str, Path],
+    point_x: Optional[float] = None,
+    point_y: Optional[float] = None,
+    auto_box: bool = False,
+    work_dir: Optional[Union[str, Path]] = None
+) -> Tuple[Optional[str], Dict[str, Any]]:
     """
     Memory-optimized orchestration: lazy loading, sequential model usage, aggressive cleanup.
     Flow:
+      0. Read video metadata
+      1. SAM2 → mask (free immediately)
+      2. MatAnyone → FG/alpha (free immediately)
+      3. Stage-A build (transparent or checkerboard)
+      4. Final composite
+      5. Audio mux
     """
     t0 = time.time()
     diagnostics: Dict[str, Any] = {
     tmp_root = Path(work_dir) if work_dir else Path(tempfile.mkdtemp(prefix="bfx_"))
     _ensure_dir(tmp_root)
+    # Defer heavy function imports until inside the call
+    M = _models()
+    # pull only the needed callables
+    _cv_read_first_frame = M._cv_read_first_frame
+    _save_mask_png = M._save_mask_png
+    _probe_ffmpeg = M._probe_ffmpeg
+    _mux_audio = M._mux_audio
+    _refine_mask_grabcut = M._refine_mask_grabcut
+    fallback_mask = M.fallback_mask
+    fallback_composite = M.fallback_composite
+    composite_video = M.composite_video
+    load_sam2 = M.load_sam2
+    run_sam2_mask = M.run_sam2_mask
+    load_matany = M.load_matany
+    run_matany = M.run_matany
+    _build_stage_a_rgba_vp9_from_fg_alpha = M._build_stage_a_rgba_vp9_from_fg_alpha
+    _build_stage_a_rgba_vp9_from_mask = M._build_stage_a_rgba_vp9_from_mask
+    _build_stage_a_checkerboard_from_fg_alpha = M._build_stage_a_checkerboard_from_fg_alpha
+    _build_stage_a_checkerboard_from_mask = M._build_stage_a_checkerboard_from_mask
     try:
         # 0) Basic video info
+        logger.info("[0] Reading video metadata…")
         first_frame, fps, (vw, vh) = _cv_read_first_frame(video_path)
         diagnostics["fps"] = int(fps or 25)
         diagnostics["resolution"] = [int(vw), int(vh)]
         if first_frame is None or vw == 0 or vh == 0:
             diagnostics["fallback_used"] = "invalid_video"
             return None, diagnostics
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        # 1) PHASE 1: SAM2
+        logger.info("[1] Loading SAM2…")
         predictor, sam2_ok, sam_meta = load_sam2()
+        diagnostics["sam2_meta"] = sam_meta or {}
+        diagnostics["device_sam2"] = (sam_meta or {}).get("sam2_device")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         seed_mask = None
         mask_png = tmp_root / "seed_mask.png"
+        ok_mask = False
         if sam2_ok and predictor is not None:
+            logger.info("[1] Running SAM2 segmentation…")
             px = int(point_x) if point_x is not None else None
             py = int(point_y) if point_y is not None else None
             seed_mask, ok_mask = run_sam2_mask(
                 predictor, first_frame,
                 point=(px, py) if (px is not None and py is not None) else None,
                 auto=auto_box
             )
             diagnostics["sam2_ok"] = bool(ok_mask)
         else:
+            logger.info("[1] SAM2 unavailable or failed to load.")
+        # Free SAM2 ASAP
+        try:
+            del predictor
+        except Exception:
+            pass
+        predictor = None
+        _force_cleanup()
+        diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         # Fallback mask generation if SAM2 failed
         if not ok_mask or seed_mask is None:
+            logger.info("[1] Using fallback mask generation…")
             seed_mask = fallback_mask(first_frame)
             diagnostics["fallback_used"] = "mask_generation"
             _force_cleanup()
         # Optional GrabCut refinement
         if int(os.environ.get("REFINE_GRABCUT", "1")) == 1:
+            logger.info("[1] Refining mask with GrabCut…")
             seed_mask = _refine_mask_grabcut(first_frame, seed_mask)
             _force_cleanup()
         _save_mask_png(seed_mask, mask_png)
+        # Free first frame
+        try:
+            del first_frame
+        except Exception:
+            pass
         _force_cleanup()
         _cleanup_temp_files(tmp_root)
+        # 2) PHASE 2: MatAnyone
+        logger.info("[2] Loading MatAnyone…")
         matany, mat_ok, mat_meta = load_matany()
+        diagnostics["matany_meta"] = mat_meta or {}
+        diagnostics["device_matany"] = (mat_meta or {}).get("matany_device")
         diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
         fg_path, al_path = None, None
         out_dir = tmp_root / "matany_out"
         _ensure_dir(out_dir)
+        ran = False
         if mat_ok and matany is not None:
+            logger.info("[2] Running MatAnyone processing…")
             fg_path, al_path, ran = run_matany(matany, video_path, mask_png, out_dir)
             diagnostics["matany_ok"] = bool(ran)
         else:
+            logger.info("[2] MatAnyone unavailable/disabled/failed to load.")
+        # Free MatAnyone ASAP
+        try:
+            del matany
+        except Exception:
+            pass
+        matany = None
+        _force_cleanup()
+        diagnostics["memory_peak_gb"] = max(diagnostics["memory_peak_gb"], _log_memory())
+        # 3) PHASE 3: Stage-A
+        logger.info("[3] Building Stage-A (transparent or checkerboard)…")
         stageA_path = None
         stageA_ok = False
         if diagnostics["matany_ok"] and fg_path and al_path:
             stageA_path = tmp_root / "stageA_transparent.webm"
             if _probe_ffmpeg():
             else ("MP4 checkerboard preview (no real alpha)" if stageA_ok else "Stage-A build failed")
         )
         if os.environ.get("RETURN_STAGE_A", "0").strip() == "1" and stageA_ok:
             _force_cleanup()
             _cleanup_temp_files(tmp_root)
+            diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
+            logger.info(f"[done] Returned Stage-A in {diagnostics['elapsed_sec']}s")
             return str(stageA_path), diagnostics
+        # 4) PHASE 4: Final Compositing
+        logger.info("[4] Creating final composite…")
         output_path = tmp_root / "output.mp4"
         if diagnostics["matany_ok"] and fg_path and al_path:
+            logger.info("[4] Compositing with MatAnyone outputs…")
             ok_comp = composite_video(fg_path, al_path, bg_image_path, output_path, diagnostics["fps"], (vw, vh))
             if not ok_comp:
+                logger.info("[4] Composite failed; falling back to static mask composite.")
                 fallback_composite(video_path, mask_png, bg_image_path, output_path)
                 diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") + "+composite_static"
         else:
+            logger.info("[4] Using static mask composite…")
             fallback_composite(video_path, mask_png, bg_image_path, output_path)
             diagnostics["fallback_used"] = (diagnostics["fallback_used"] or "") or "composite_static"
         _cleanup_temp_files(tmp_root)
         _force_cleanup()
+        # 5) PHASE 5: Audio Mux
+        logger.info("[5] Adding audio track…")
         final_path = tmp_root / "output_with_audio.mp4"
         if _probe_ffmpeg():
             mux_ok = _mux_audio(video_path, output_path, final_path)
             if mux_ok:
                 output_path.unlink(missing_ok=True)
                 _force_cleanup()
                 diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
+                logger.info(f"[done] Success in {diagnostics['elapsed_sec']}s")
+                logger.info(f"[done] Peak GPU memory usage: {diagnostics['memory_peak_gb']:.1f}GB")
                 return str(final_path), diagnostics
+        # Fallback return without audio
         _force_cleanup()
         diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
+        logger.info(f"[done] Completed (no audio) in {diagnostics['elapsed_sec']}s")
+        logger.info(f"[done] Peak GPU memory usage: {diagnostics['memory_peak_gb']:.1f}GB")
         return str(output_path), diagnostics
     except Exception as e:
+        logger.error(f"[error] Processing failed: {e}")
         import traceback
+        logger.error(f"[error] Traceback:\n{traceback.format_exc()}")
         _force_cleanup()
         diagnostics["error"] = str(e)
         diagnostics["elapsed_sec"] = round(time.time() - t0, 3)
     finally:
         # Ensure cleanup even if something goes wrong
         _force_cleanup()
+        _cleanup_temp_files(tmp_root)

requirements.txt CHANGED Viewed

@@ -9,7 +9,8 @@ moviepy==1.0.3
 decord==0.6.0
 Pillow==10.4.0
 numpy==1.26.4
-mediapipe==0.10.14
 # ===== Gradio UI =====
 gradio==5.42.0
@@ -28,10 +29,10 @@ scikit-image==0.24.0
 tqdm==4.66.5
 # ===== Helpers / caching =====
-huggingface_hub>=0.33.5
 ffmpeg-python==0.2.0
 psutil==6.0.0
-requests==2.31.0
 scikit-learn==1.5.1
 # ===== (Optional) Extras =====

 decord==0.6.0
 Pillow==10.4.0
 numpy==1.26.4
+mediapipe==0.10.14
+protobuf==4.25.3
 # ===== Gradio UI =====
 gradio==5.42.0
 tqdm==4.66.5
 # ===== Helpers / caching =====
+huggingface_hub==0.33.5
 ffmpeg-python==0.2.0
 psutil==6.0.0
+requests==2.32.3
 scikit-learn==1.5.1
 # ===== (Optional) Extras =====

ui.py CHANGED Viewed

@@ -1,6 +1,8 @@
-# ui.py
 """
-BackgroundFX Pro — Gradio UI, background generators, and data sources.
 """
 import io
@@ -14,13 +16,12 @@
 from PIL import Image
 import gradio as gr
-from pipeline import (
-    process_video_gpu_optimized, stop_processing, processing_active,
-    SAM2_ENABLED, MATANY_ENABLED, GPU_NAME, GPU_MEMORY
-)
 logger = logging.getLogger("ui")
 # ---- Background generators ----
 def create_gradient_background(gradient_type: str, width: int, height: int) -> Image.Image:
@@ -51,7 +52,6 @@ def create_gradient_background(gradient_type: str, width: int, height: int) -> I
             img[i, :] = [r, g, b]
     return Image.fromarray(img)
 def create_solid_color(color: str, width: int, height: int) -> Image.Image:
     color_map = {
         "white": (255, 255, 255),
@@ -66,22 +66,25 @@ def create_solid_color(color: str, width: int, height: int) -> Image.Image:
     rgb = color_map.get(color, (70, 130, 180))
     return Image.fromarray(np.full((height, width, 3), rgb, dtype=np.uint8))
 def generate_ai_background(prompt: str) -> Tuple[Optional[Image.Image], str]:
     try:
-        if not prompt.strip():
             return None, "Please enter a prompt"
         models = [
             "black-forest-labs/FLUX.1-schnell",
             "stabilityai/stable-diffusion-xl-base-1.0",
-            "runwayml/stable-diffusion-v1-5"
         ]
         enhanced_prompt = f"professional video background, {prompt}, high quality, 16:9, cinematic lighting, detailed"
         for model in models:
             try:
                 url = f"https://api-inference.huggingface.co/models/{model}"
-                headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_TOKEN', 'hf_placeholder')}"}
-                payload = {"inputs": enhanced_prompt, "parameters": {"width": 1024, "height": 576, "num_inference_steps": 20, "guidance_scale": 7.5}}
                 r = requests.post(url, headers=headers, json=payload, timeout=60, stream=True)
                 if r.status_code == 200 and "image" in r.headers.get("content-type", "").lower():
                     buf = io.BytesIO(r.content if r.raw is None else r.raw.read())
@@ -95,11 +98,10 @@ def generate_ai_background(prompt: str) -> Tuple[Optional[Image.Image], str]:
         logger.error(f"AI background error: {e}")
         return create_gradient_background("default", 1920, 1080), "Default due to error"
 # ---- MyAvatar API ----
 class MyAvatarAPI:
     def __init__(self):
-        self.api_base = "https://app.myavatar.dk/api"
         self.videos_cache: List[Dict[str, Any]] = []
         self.last_refresh = 0
@@ -140,11 +142,20 @@ def get_video_url(self, selection: str) -> Optional[str]:
             logger.error(f"Parse selection failed: {e}")
         return None
 myavatar_api = MyAvatarAPI()
-# ---- UI ↔ Pipeline bridge: streaming handler ----
 def process_video_with_background_stoppable(
     input_video: Optional[str],
     myavatar_selection: str,
@@ -154,15 +165,12 @@ def process_video_with_background_stoppable(
     custom_background: Optional[str],
     ai_prompt: str
 ):
-    # start
-    from pipeline import processing_active as _active_ref  # ensure we use the module global
-    import pipeline  # to toggle the flag
-    pipeline.processing_active = True
     try:
-        yield gr.update(visible=False), gr.update(visible=True), None, "Starting processing..."
-        # resolve video
         video_path = None
         if input_video:
             video_path = input_video
@@ -173,16 +181,23 @@ def process_video_with_background_stoppable(
                     r.raise_for_status()
                     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
                         for chunk in r.iter_content(chunk_size=1 << 20):
                             if chunk:
                                 tmp.write(chunk)
                         video_path = tmp.name
         if not video_path:
             yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
             return
-        # background
-        yield gr.update(visible=False), gr.update(visible=True), None, "Generating background..."
         bg_img = None
         if background_type == "gradient":
             bg_img = create_gradient_background(gradient_type, 1920, 1080)
@@ -190,50 +205,68 @@ def process_video_with_background_stoppable(
             bg_img = create_solid_color(solid_color, 1920, 1080)
         elif background_type == "custom" and custom_background:
             try:
-                from PIL import Image
                 bg_img = Image.open(custom_background).convert("RGB")
             except Exception:
                 bg_img = None
         elif background_type == "ai" and ai_prompt:
             bg_img, _ = generate_ai_background(ai_prompt)
         if bg_img is None:
             yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
             return
-        # process
-        yield gr.update(visible=False), gr.update(visible=True), None, "Processing video with GPU optimization..."
-        bg_array = np.array(bg_img.resize((1280, 720), Image.Resampling.LANCZOS))
-        with tempfile.NamedTemporaryFile(suffix="_processed.mp4", delete=False) as tmp_final:
-            final_path = tmp_final.name
-        out = process_video_gpu_optimized(video_path, bg_array, final_path)
-        try:
-            if video_path != input_video and video_path and os.path.exists(video_path):
-                os.unlink(video_path)
-        except Exception:
-            pass
-        if out and pipeline.processing_active:
-            yield gr.update(visible=True), gr.update(visible=False), out, "Video processing completed successfully!"
         else:
-            yield gr.update(visible=True), gr.update(visible=False), None, "Processing was stopped or failed"
     except Exception as e:
         logger.error(f"UI pipeline error: {e}")
         yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {e}"
     finally:
-        pipeline.processing_active = False
-def stop_processing_button():
-    from pipeline import stop_processing
-    stop_processing()
-    return gr.update(visible=False), "Processing stopped by user"
 # ---- UI factory ----
 def create_interface():
     css = """
     .main-container { max-width: 1200px; margin: 0 auto; }
@@ -241,13 +274,12 @@ def create_interface():
     .gradient-preview { border: 2px solid #ddd; border-radius: 10px; }
     """
-    with gr.Blocks(css=css, title="BackgroundFX Pro - GPU Optimized") as app:
-        gr.Markdown("# BackgroundFX Pro - GPU Optimized\n### Professional Video Background Replacement with SAM2 + MatAnyone")
         with gr.Row():
-            sam2_status = "Ready" if SAM2_ENABLED else "Disabled"
-            matany_status = "Ready" if MATANY_ENABLED else "Disabled"
-            gr.Markdown(f"**System Status:** Online | **GPU:** {GPU_NAME} | **SAM2:** {sam2_status} | **MatAnyone:** {matany_status}")
         with gr.Row():
             with gr.Column(scale=1):
@@ -277,19 +309,19 @@ def create_interface():
                     ai_preview = gr.Image(label="AI Generated Background", height=150, visible=False)
                 with gr.Row():
-                    process_btn = gr.Button("Process Video", variant="primary", size="lg")
-                    stop_btn = gr.Button("Stop Processing", variant="stop", size="lg", visible=False)
             with gr.Column(scale=1):
                 gr.Markdown("## Results")
                 result_video = gr.Video(label="Processed Video", height=400)
                 status_output = gr.Textbox(label="Processing Status", lines=5, max_lines=10, elem_classes=["status-box"])
                 gr.Markdown("""
-                ### Processing Pipeline:
-                1. **SAM2 Segmentation** — GPU-accelerated person detection
-                2. **MatAnyone Matting** — temporal consistency
-                3. **GPU Compositing** — real-time background replacement
-                4. **Memory Optimization** — chunked processing + OOM recovery
                 """)
         # handlers

+#!/usr/bin/env python3
 """
+BackgroundFX Pro — Gradio UI, background generators, and data sources (Hardened)
+- No top-level import of pipeline (lazy import in handlers)
+- Compatible with pipeline.process()
 """
 import io
 from PIL import Image
 import gradio as gr
 logger = logging.getLogger("ui")
+if not logger.handlers:
+    h = logging.StreamHandler()
+    h.setFormatter(logging.Formatter("[%(asctime)s] %(levelname)s: %(message)s"))
+    logger.addHandler(h)
+logger.setLevel(logging.INFO)
 # ---- Background generators ----
 def create_gradient_background(gradient_type: str, width: int, height: int) -> Image.Image:
             img[i, :] = [r, g, b]
     return Image.fromarray(img)
 def create_solid_color(color: str, width: int, height: int) -> Image.Image:
     color_map = {
         "white": (255, 255, 255),
     rgb = color_map.get(color, (70, 130, 180))
     return Image.fromarray(np.full((height, width, 3), rgb, dtype=np.uint8))
 def generate_ai_background(prompt: str) -> Tuple[Optional[Image.Image], str]:
     try:
+        if not prompt or not prompt.strip():
             return None, "Please enter a prompt"
         models = [
             "black-forest-labs/FLUX.1-schnell",
             "stabilityai/stable-diffusion-xl-base-1.0",
+            "runwayml/stable-diffusion-v1-5",
         ]
         enhanced_prompt = f"professional video background, {prompt}, high quality, 16:9, cinematic lighting, detailed"
+        token = os.getenv("HUGGINGFACE_TOKEN", "")
+        headers = {"Authorization": f"Bearer {token}"} if token else {}
         for model in models:
             try:
                 url = f"https://api-inference.huggingface.co/models/{model}"
+                payload = {
+                    "inputs": enhanced_prompt,
+                    "parameters": {"width": 1024, "height": 576, "num_inference_steps": 20, "guidance_scale": 7.5},
+                }
                 r = requests.post(url, headers=headers, json=payload, timeout=60, stream=True)
                 if r.status_code == 200 and "image" in r.headers.get("content-type", "").lower():
                     buf = io.BytesIO(r.content if r.raw is None else r.raw.read())
         logger.error(f"AI background error: {e}")
         return create_gradient_background("default", 1920, 1080), "Default due to error"
 # ---- MyAvatar API ----
 class MyAvatarAPI:
     def __init__(self):
+        self.api_base = os.getenv("MYAVATAR_API_BASE", "https://app.myavatar.dk/api")
         self.videos_cache: List[Dict[str, Any]] = []
         self.last_refresh = 0
             logger.error(f"Parse selection failed: {e}")
         return None
 myavatar_api = MyAvatarAPI()
+# ---- Minimal stop flag (request-scoped) ----
+# We avoid pipeline globals; this just short-circuits the generator.
+class Stopper:
+    def __init__(self):
+        self.stop = False
+STOP = Stopper()
+def stop_processing_button():
+    STOP.stop = True
+    return gr.update(visible=False), "Processing stopped by user"
+# ---- UI ↔ Pipeline bridge ----
 def process_video_with_background_stoppable(
     input_video: Optional[str],
     myavatar_selection: str,
     custom_background: Optional[str],
     ai_prompt: str
 ):
+    import importlib
     try:
+        STOP.stop = False
+        yield gr.update(visible=False), gr.update(visible=True), None, "Starting…"
+        # Resolve video
         video_path = None
         if input_video:
             video_path = input_video
                     r.raise_for_status()
                     with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as tmp:
                         for chunk in r.iter_content(chunk_size=1 << 20):
+                            if STOP.stop:
+                                yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
+                                return
                             if chunk:
                                 tmp.write(chunk)
                         video_path = tmp.name
+        if STOP.stop:
+            yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
+            return
         if not video_path:
             yield gr.update(visible=True), gr.update(visible=False), None, "No video provided"
             return
+        # Background
+        yield gr.update(visible=False), gr.update(visible=True), None, "Preparing background…"
         bg_img = None
         if background_type == "gradient":
             bg_img = create_gradient_background(gradient_type, 1920, 1080)
             bg_img = create_solid_color(solid_color, 1920, 1080)
         elif background_type == "custom" and custom_background:
             try:
                 bg_img = Image.open(custom_background).convert("RGB")
             except Exception:
                 bg_img = None
         elif background_type == "ai" and ai_prompt:
             bg_img, _ = generate_ai_background(ai_prompt)
+        if STOP.stop:
+            yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
+            return
         if bg_img is None:
             yield gr.update(visible=True), gr.update(visible=False), None, "No background generated"
             return
+        # Save background to a temp file for pipeline.process()
+        with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp_bg:
+            bg_img.save(tmp_bg.name, format="PNG")
+            bg_path = tmp_bg.name
+        # Run pipeline lazily
+        yield gr.update(visible=False), gr.update(visible=True), None, "Processing video…"
+        pipe = importlib.import_module("pipeline")
+        out_path, diag = pipe.process(
+            video_path=video_path,
+            bg_image_path=bg_path,
+            point_x=None,
+            point_y=None,
+            auto_box=True,
+            work_dir=None
+        )
+        if STOP.stop:
+            yield gr.update(visible=True), gr.update(visible=False), None, "Stopped."
+            return
+        if out_path:
+            yield gr.update(visible=True), gr.update(visible=False), out_path, "Video processing completed successfully!"
         else:
+            yield gr.update(visible=True), gr.update(visible=False), None, f"Processing failed: {diag.get('error','unknown error')}"
     except Exception as e:
         logger.error(f"UI pipeline error: {e}")
         yield gr.update(visible=True), gr.update(visible=False), None, f"Processing error: {e}"
     finally:
+        # Best-effort cleanup of any temp download
+        try:
+            if input_video is None and 'video_path' in locals() and video_path and os.path.exists(video_path):
+                os.unlink(video_path)
+        except Exception:
+            pass
 # ---- UI factory ----
+def _system_status():
+    # Avoid early CUDA probing: only show torch version if available
+    try:
+        import torch
+        tver = getattr(torch, "__version__", "?")
+        cver = getattr(getattr(torch, "version", None), "cuda", None)
+        return f"torch {tver} (CUDA {cver})"
+    except Exception:
+        return "torch not available"
 def create_interface():
     css = """
     .main-container { max-width: 1200px; margin: 0 auto; }
     .gradient-preview { border: 2px solid #ddd; border-radius: 10px; }
     """
+    with gr.Blocks(css=css, title="BackgroundFX Pro") as app:
+        gr.Markdown("# BackgroundFX Pro — SAM2 + MatAnyone (Hardened)")
         with gr.Row():
+            status = _system_status()
+            gr.Markdown(f"**System Status:** Online | **Runtime:** {status}")
         with gr.Row():
             with gr.Column(scale=1):
                     ai_preview = gr.Image(label="AI Generated Background", height=150, visible=False)
                 with gr.Row():
+                    process_btn = gr.Button("Process Video", variant="primary")
+                    stop_btn = gr.Button("Stop Processing", variant="stop", visible=False)
             with gr.Column(scale=1):
                 gr.Markdown("## Results")
                 result_video = gr.Video(label="Processed Video", height=400)
                 status_output = gr.Textbox(label="Processing Status", lines=5, max_lines=10, elem_classes=["status-box"])
                 gr.Markdown("""
+                ### Pipeline
+                1. SAM2 Segmentation → mask
+                2. MatAnyone Matting → FG + ALPHA
+                3. Stage-A export (transparent WebM or checkerboard)
+                4. Final compositing (H.264)
                 """)
         # handlers