Spaces:

MogensR
/

VideoBackgroundReplacer

Paused

App Files Files Community

MogensR commited on Aug 27, 2025

Commit

32853be

1 Parent(s): 8fd190a

Update utils/init.py

Browse files

Files changed (1) hide show

utils/__init__.py +163 -27

utils/__init__.py CHANGED Viewed

@@ -2,13 +2,20 @@
 """
 BackgroundFX Pro - CSP-Safe Application Entry Point
 Now with: live background preview + sources: Preset / Upload / Gradient / AI Generate
 """
 import early_env  # <<< must be FIRST
-import os, time
 from typing import Optional, Dict, Any, Callable, Tuple
 # 1) CSP-safe Gradio env
 os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
@@ -45,14 +52,14 @@ def _patched_get_type(schema):
 from processing.video.video_processor import CoreVideoProcessor, ProcessorConfig
 from processing.audio.audio_processor import AudioProcessor
-# Background helpers
 from utils import PROFESSIONAL_BACKGROUNDS, validate_video_file, create_professional_background
 # Gradient helper (add to utils; fallback here for preview only if missing)
 try:
     from utils import create_gradient_background
 except Exception:
     def create_gradient_background(spec: Dict[str, Any], width: int, height: int):
-        # Lightweight fallback preview (linear only)
         import numpy as np
         import cv2
         def _to_rgb(c):
@@ -122,6 +129,12 @@ def _np_to_pil(arr: np.ndarray) -> Image.Image:
         arr = arr.clip(0, 255).astype(np.uint8)
     return Image.fromarray(arr)
 # ---------- main app ----------
 class VideoBackgroundApp:
     def __init__(self):
@@ -132,6 +145,9 @@ def __init__(self):
         self.audio_proc = AudioProcessor()
         self.models_loaded = False
         self.core_processor: Optional[CoreVideoProcessor] = None
         logger.info("VideoBackgroundApp initialized (device=%s)", self.device_mgr.get_optimal_device())
     def load_models(self, progress_callback: Optional[Callable] = None) -> str:
@@ -194,29 +210,144 @@ def preview_gradient(self, gtype: str, color1: str, color2: str, angle: int) ->
         bg = create_gradient_background(spec, PREVIEW_W, PREVIEW_H)
         return _np_to_pil(bg)
-    def ai_generate_background(self, prompt: str, seed: int, width: int, height: int) -> Tuple[Optional[Image.Image], Optional[str], str]:
         """
-        Try generating a background with diffusers; save to /tmp and return (img, path, status).
         """
         try:
-            from diffusers import StableDiffusionPipeline
             import torch
-            model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/stable-diffusion-2-1")
-            dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-            device = "cuda" if torch.cuda.is_available() else "cpu"
-            pipe = StableDiffusionPipeline.from_pretrained(model_id, torch_dtype=dtype).to(device)
-            g = torch.Generator(device=device).manual_seed(int(seed)) if seed is not None else None
-            if device == "cuda":
-                with torch.autocast("cuda"):
-                    img = pipe(prompt, height=height, width=width, guidance_scale=7.0, num_inference_steps=25, generator=g).images[0]
             else:
-                img = pipe(prompt, height=height, width=width, guidance_scale=7.0, num_inference_steps=25, generator=g).images[0]
             tmp_path = f"/tmp/ai_bg_{int(time.time())}.png"
             img.save(tmp_path)
-            return img.resize((PREVIEW_W, PREVIEW_H), Image.LANCZOS), tmp_path, f"AI background generated ✓ ({os.path.basename(tmp_path)})"
         except Exception as e:
-            logger.warning("AI generation unavailable: %s", e)
-            return None, None, f"AI generation unavailable: {e}"
     # ---- PROCESS VIDEO ----
     def process_video(
@@ -233,14 +364,18 @@ def process_video(
     ):
         if not self.models_loaded:
             return None, "Models not loaded yet"
         if not video:
             return None, "Please upload a video first."
-        logger.info("process_video called (video=%s, source=%s, preset=%s, file=%s, grad=%s, ai=%s)",
-                    video, bg_source, preset_key, getattr(custom_bg_file, "name", None) if custom_bg_file else None,
-                    {"type": grad_type, "c1": grad_color1, "c2": grad_color2, "angle": grad_angle},
-                    ai_bg_path)
         output_path = f"/tmp/output_{int(time.time())}.mp4"
@@ -273,7 +408,7 @@ def process_video(
             result = self.core_processor.process_video(
                 input_path=video,
                 output_path=output_path,
-                bg_config=bg_cfg
             )
             logger.info("Core processing done → %s", output_path)
@@ -367,7 +502,7 @@ def on_source_toggle(src):
         )
         # ✅ Clear any previous AI image path when switching source (avoids stale AI background)
-        def _clear_ai_state(_):
             return None
         bg_source.change(fn=_clear_ai_state, inputs=[bg_source], outputs=[ai_bg_path_state])
@@ -399,12 +534,13 @@ def on_source_preview(src, pkey, gt, c1, c2, ang):
         # AI generate
         def ai_generate(prompt, seed, size):
             try:
-                w, h = map(int, size.split("x"))
             except Exception:
                 w, h = PREVIEW_W, PREVIEW_H
             img, path, msg = app.ai_generate_background(
                 prompt or "professional modern office background, neutral colors, depth of field",
-                int(seed), w, h
             )
             return img, (path or None), msg
         ai_go.click(fn=ai_generate, inputs=[ai_prompt, ai_seed, ai_size], outputs=[bg_preview, ai_bg_path_state, ai_status])

 """
 BackgroundFX Pro - CSP-Safe Application Entry Point
 Now with: live background preview + sources: Preset / Upload / Gradient / AI Generate
+- Lazy-loaded Diffusers pipeline (VRAM-aware: sd-turbo / sdxl-turbo / sd-2.1 CPU)
+- Preview shows the exact background used
+- Clears stale AI image when switching sources
 """
 import early_env  # <<< must be FIRST
+import os, time, math
 from typing import Optional, Dict, Any, Callable, Tuple
+# Prefer a writable cache in constrained environments (e.g., HF Spaces)
+os.environ.setdefault("HF_HOME", "/tmp/hf")
+os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")
 # 1) CSP-safe Gradio env
 os.environ['GRADIO_ALLOW_FLAGGING'] = 'never'
 os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
 from processing.video.video_processor import CoreVideoProcessor, ProcessorConfig
 from processing.audio.audio_processor import AudioProcessor
+# Background helpers (kept lightweight to avoid cycles)
 from utils import PROFESSIONAL_BACKGROUNDS, validate_video_file, create_professional_background
 # Gradient helper (add to utils; fallback here for preview only if missing)
 try:
     from utils import create_gradient_background
 except Exception:
     def create_gradient_background(spec: Dict[str, Any], width: int, height: int):
+        # Lightweight fallback (linear+rotate only)
         import numpy as np
         import cv2
         def _to_rgb(c):
         arr = arr.clip(0, 255).astype(np.uint8)
     return Image.fromarray(arr)
+def _div8(n: int) -> int:
+    # Ensure sizes are multiples of 8 for SD/VAEs (min 256)
+    n = int(n)
+    if n < 256: n = 256
+    return int(math.floor(n / 8.0) * 8)
 # ---------- main app ----------
 class VideoBackgroundApp:
     def __init__(self):
         self.audio_proc = AudioProcessor()
         self.models_loaded = False
         self.core_processor: Optional[CoreVideoProcessor] = None
+        # Text-to-image cache
+        self.t2i_pipe = None
+        self.t2i_model_id = None
         logger.info("VideoBackgroundApp initialized (device=%s)", self.device_mgr.get_optimal_device())
     def load_models(self, progress_callback: Optional[Callable] = None) -> str:
         bg = create_gradient_background(spec, PREVIEW_W, PREVIEW_H)
         return _np_to_pil(bg)
+    # ---- AI BG: lazy-load + reuse pipe ----
+    def _ensure_t2i(self):
         """
+        Load a text-to-image pipeline once with memory-efficient settings.
+        Returns (pipe, model_id, msg).
         """
+        if self.t2i_pipe is not None:
+            return self.t2i_pipe, self.t2i_model_id, "AI generator ready"
         try:
             import torch
+            from diffusers import AutoPipelineForText2Image, StableDiffusionPipeline
+        except Exception as e:
+            return None, None, f"AI generation unavailable (missing deps): {e}"
+        token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
+        device = "cuda" if getattr(__import__("torch"), "cuda", None) and __import__("torch").cuda.is_available() else "cpu"
+        # Try to estimate VRAM to pick a model
+        vram_gb = None
+        try:
+            vram_gb = self.device_mgr.get_device_memory_gb()
+        except Exception:
+            pass
+        if device == "cuda":
+            if vram_gb and vram_gb >= 12:
+                model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/sdxl-turbo")
             else:
+                model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/sd-turbo")
+        else:
+            model_id = os.environ.get("BGFX_T2I_MODEL", "stabilityai/stable-diffusion-2-1")
+        logger.info("Loading text-to-image model: %s (device=%s, VRAM=%s GB)", model_id, device, vram_gb)
+        dtype = __import__("torch").float16 if device == "cuda" else __import__("torch").float32
+        pipe = None
+        try:
+            # Unified API for turbo/SDXL/SD
+            pipe = AutoPipelineForText2Image.from_pretrained(
+                model_id,
+                torch_dtype=dtype,
+                use_safetensors=True,
+                token=token,
+            )
+        except Exception as e1:
+            try:
+                pipe = StableDiffusionPipeline.from_pretrained(
+                    model_id,
+                    torch_dtype=dtype,
+                    use_safetensors=True,
+                    safety_checker=None,
+                    feature_extractor=None,
+                    use_auth_token=token,
+                )
+            except Exception as e2:
+                return None, None, f"AI model load failed: {e1} / {e2}"
+        # Memory/perf knobs
+        try: pipe.set_progress_bar_config(disable=True)
+        except Exception: pass
+        try: pipe.enable_attention_slicing()
+        except Exception: pass
+        try: pipe.enable_vae_slicing()
+        except Exception: pass
+        if device == "cuda":
+            try: pipe.enable_xformers_memory_efficient_attention()
+            except Exception: pass
+            pipe = pipe.to(device)
+        else:
+            try: pipe.enable_sequential_cpu_offload()
+            except Exception: pass
+        self.t2i_pipe = pipe
+        self.t2i_model_id = model_id
+        return pipe, model_id, f"AI model loaded: {model_id}"
+    def ai_generate_background(self, prompt: str, seed: int, width: int, height: int) -> Tuple[Optional[Image.Image], Optional[str], str]:
+        """
+        Generate a background and save to /tmp. Returns (preview_img, path, status).
+        """
+        pipe, model_id, status_msg = self._ensure_t2i()
+        if pipe is None:
+            logger.warning(status_msg)
+            return None, None, status_msg
+        # Sizes: multiples of 8, clamped to safe range
+        w = _div8(width or PREVIEW_W)
+        h = _div8(height or PREVIEW_H)
+        w = max(256, min(w, 1536))
+        h = max(256, min(h, 1536))
+        # Prompt defaults aimed at "office-like" backgrounds
+        prompt = (prompt or "professional modern office background, neutral colors, soft depth of field, clean, minimal, photorealistic")
+        negative = "text, watermark, logo, people, person, artifact, noisy, blurry"
+        try:
+            import torch
+            device = "cuda" if getattr(torch, "cuda", None) and torch.cuda.is_available() else "cpu"
+            try:
+                g = torch.Generator(device=device).manual_seed(int(seed)) if seed is not None else None
+            except Exception:
+                g = None
+            steps = 4 if ("turbo" in (self.t2i_model_id or "").lower()) else 25
+            guidance = 1.0 if ("turbo" in (self.t2i_model_id or "").lower()) else 7.0
+            with torch.inference_mode():
+                if device == "cuda":
+                    with torch.autocast("cuda"):
+                        out = pipe(
+                            prompt=prompt,
+                            negative_prompt=negative,
+                            height=h,
+                            width=w,
+                            guidance_scale=guidance,
+                            num_inference_steps=steps,
+                            generator=g,
+                        )
+                else:
+                    out = pipe(
+                        prompt=prompt,
+                        negative_prompt=negative,
+                        height=h,
+                        width=w,
+                        guidance_scale=guidance,
+                        num_inference_steps=steps,
+                        generator=g,
+                    )
+            img = out.images[0]
             tmp_path = f"/tmp/ai_bg_{int(time.time())}.png"
             img.save(tmp_path)
+            return img.resize((PREVIEW_W, PREVIEW_H), Image.LANCZOS), tmp_path, f"{status_msg} • Generated {w}x{h}"
         except Exception as e:
+            logger.exception("AI generation error")
+            return None, None, f"AI generation failed: {e}"
     # ---- PROCESS VIDEO ----
     def process_video(
     ):
         if not self.models_loaded:
             return None, "Models not loaded yet"
         if not video:
             return None, "Please upload a video first."
+        logger.info(
+            "process_video called (video=%s, source=%s, preset=%s, file=%s, grad=%s, ai=%s)",
+            video,
+            bg_source,
+            preset_key,
+            getattr(custom_bg_file, "name", None) if custom_bg_file else None,
+            {"type": grad_type, "c1": grad_color1, "c2": grad_color2, "angle": grad_angle},
+            ai_bg_path,
+        )
         output_path = f"/tmp/output_{int(time.time())}.mp4"
             result = self.core_processor.process_video(
                 input_path=video,
                 output_path=output_path,
+                bg_config=bg_cfg,
             )
             logger.info("Core processing done → %s", output_path)
         )
         # ✅ Clear any previous AI image path when switching source (avoids stale AI background)
+        def _clear_ai_state(_):
             return None
         bg_source.change(fn=_clear_ai_state, inputs=[bg_source], outputs=[ai_bg_path_state])
         # AI generate
         def ai_generate(prompt, seed, size):
             try:
+                w, h = map(int, (size or "640x360").split("x"))
             except Exception:
                 w, h = PREVIEW_W, PREVIEW_H
             img, path, msg = app.ai_generate_background(
                 prompt or "professional modern office background, neutral colors, depth of field",
+                int(seed) if seed is not None else 42,
+                w, h
             )
             return img, (path or None), msg
         ai_go.click(fn=ai_generate, inputs=[ai_prompt, ai_seed, ai_size], outputs=[bg_preview, ai_bg_path_state, ai_status])

Update utils/__init__.py

Update utils/init.py