Spaces:

Luminia
/

CorridorKey

Running on Zero

App Files Files Community

Nekochu commited on 17 days ago

Commit

c2d53e4

1 Parent(s): 16d8be4

professional output package (Comp+FG+Matte+Processed)

Browse files

Files changed (1) hide show

app.py +140 -295

app.py CHANGED Viewed

@@ -29,17 +29,16 @@ import gradio as gr
 import onnxruntime as ort
 # Workaround: Gradio cache_examples bug with None outputs.
-# CSVLogger.flag() writes "" for None, read_from_flag("") calls json.loads("") -> crash.
 _original_read_from_flag = gr.components.Component.read_from_flag
 def _patched_read_from_flag(self, payload):
     if payload is None or (isinstance(payload, str) and payload.strip() == ""):
         return None
     return _original_read_from_flag(self, payload)
 gr.components.Component.read_from_flag = _patched_read_from_flag
 from huggingface_hub import hf_hub_download
 cv2.setNumThreads(2)
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger(__name__)
@@ -48,55 +47,40 @@ logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 BIREFNET_REPO = "onnx-community/BiRefNet_lite-ONNX"
 BIREFNET_FILE = "onnx/model.onnx"
 MODELS_DIR = os.path.join(os.path.dirname(__file__), "models")
 CORRIDORKEY_MODELS = {
     "1024": os.path.join(MODELS_DIR, "corridorkey_1024.onnx"),
     "2048": os.path.join(MODELS_DIR, "corridorkey_2048.onnx"),
 }
 IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)
 IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3)
 MAX_DURATION_CPU = 5
 MAX_DURATION_GPU = 30
 MAX_FRAMES = 150
-# GPU auto-detect via ONNX Runtime (no torch dependency)
 HAS_CUDA = "CUDAExecutionProvider" in ort.get_available_providers()
 # ---------------------------------------------------------------------------
-# Color utilities (numpy-only, from CorridorKeyModule/core/color_utils.py)
 # ---------------------------------------------------------------------------
 def linear_to_srgb(x):
     x = np.clip(x, 0.0, None)
     return np.where(x <= 0.0031308, x * 12.92, 1.055 * np.power(x, 1.0 / 2.4) - 0.055)
 def srgb_to_linear(x):
     x = np.clip(x, 0.0, None)
     return np.where(x <= 0.04045, x / 12.92, np.power((x + 0.055) / 1.055, 2.4))
 def composite_straight(fg, bg, alpha):
     return fg * alpha + bg * (1.0 - alpha)
 def despill(image, green_limit_mode="average", strength=1.0):
     if strength <= 0.0:
         return image
     r, g, b = image[..., 0], image[..., 1], image[..., 2]
     limit = (r + b) / 2.0 if green_limit_mode == "average" else np.maximum(r, b)
-    spill_amount = np.maximum(g - limit, 0.0)
-    g_new = g - spill_amount
-    r_new = r + spill_amount * 0.5
-    b_new = b + spill_amount * 0.5
-    despilled = np.stack([r_new, g_new, b_new], axis=-1)
-    if strength < 1.0:
-        return image * (1.0 - strength) + despilled * strength
-    return despilled
 def clean_matte(alpha_np, area_threshold=300, dilation=15, blur_size=5):
     is_3d = alpha_np.ndim == 3
@@ -104,39 +88,30 @@ def clean_matte(alpha_np, area_threshold=300, dilation=15, blur_size=5):
         alpha_np = alpha_np[:, :, 0]
     mask_8u = (alpha_np > 0.5).astype(np.uint8) * 255
     num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_8u, connectivity=8)
-    # Vectorized: find valid labels in one pass
     valid = np.zeros(num_labels, dtype=bool)
     valid[1:] = stats[1:, cv2.CC_STAT_AREA] >= area_threshold
     cleaned = (valid[labels].astype(np.uint8) * 255)
     if dilation > 0:
         k = int(dilation * 2 + 1)
-        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k))
-        cleaned = cv2.dilate(cleaned, kernel)
     if blur_size > 0:
         b = int(blur_size * 2 + 1)
         cleaned = cv2.GaussianBlur(cleaned, (b, b), 0)
-    safe_zone = cleaned.astype(np.float32) / 255.0
-    result = alpha_np * safe_zone
     return result[:, :, np.newaxis] if is_3d else result
 def create_checkerboard(w, h, checker_size=64, color1=0.15, color2=0.55):
-    x_tiles = np.arange(w) // checker_size
-    y_tiles = np.arange(h) // checker_size
-    xg, yg = np.meshgrid(x_tiles, y_tiles)
-    checker = ((xg + yg) % 2).astype(np.float32)
-    bg = np.where(checker == 0, color1, color2).astype(np.float32)
     return np.stack([bg, bg, bg], axis=-1)
 # ---------------------------------------------------------------------------
-# Fast classical green-screen mask (alternative to BiRefNet)
 # ---------------------------------------------------------------------------
 def fast_greenscreen_mask(frame_rgb_f32):
-    """Fast green-screen detection using corner sampling + HSV threshold.
-    Returns (mask_f32, confidence) or (None, 0.0) if not a green screen.
-    """
     h, w = frame_rgb_f32.shape[:2]
     ph, pw = max(int(h * 0.05), 4), max(int(w * 0.05), 4)
     corners = np.concatenate([
@@ -146,38 +121,25 @@ def fast_greenscreen_mask(frame_rgb_f32):
         frame_rgb_f32[-ph:, -pw:].reshape(-1, 3),
     ], axis=0)
     bg_color = np.median(corners, axis=0)
-    # Check if background is green-ish (G channel dominant)
     if not (bg_color[1] > bg_color[0] + 0.05 and bg_color[1] > bg_color[2] + 0.05):
         return None, 0.0
-    # HSV-based mask (more robust than RGB distance)
     frame_u8 = (np.clip(frame_rgb_f32, 0, 1) * 255).astype(np.uint8)
     hsv = cv2.cvtColor(frame_u8, cv2.COLOR_RGB2HSV)
-    # Green hue range in HSV
     green_mask = cv2.inRange(hsv, (35, 40, 40), (85, 255, 255))
-    # Invert: foreground = NOT green
     fg_mask = cv2.bitwise_not(green_mask)
-    # Morphological close to fill small holes
-    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5))
-    fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_CLOSE, kernel)
     fg_mask = cv2.GaussianBlur(fg_mask, (5, 5), 0)
     mask_f32 = fg_mask.astype(np.float32) / 255.0
-    # Confidence: how bimodal is the mask (closer to 0/1 = better)
     confidence = 1.0 - 2.0 * np.mean(np.minimum(mask_f32, 1.0 - mask_f32))
     return mask_f32, confidence
 # ---------------------------------------------------------------------------
-# Model loading (lazy singletons)
 # ---------------------------------------------------------------------------
 _birefnet_session = None
 _corridorkey_sessions = {}
-def _ort_session_opts():
     opts = ort.SessionOptions()
     opts.intra_op_num_threads = 2
     opts.inter_op_num_threads = 1
@@ -186,17 +148,15 @@ def _ort_session_opts():
     opts.enable_mem_pattern = True
     return opts
 def get_birefnet():
     global _birefnet_session
     if _birefnet_session is None:
         logger.info("Downloading BiRefNet-Lite ONNX...")
         path = hf_hub_download(repo_id=BIREFNET_REPO, filename=BIREFNET_FILE)
         logger.info("Loading BiRefNet ONNX: %s", path)
-        _birefnet_session = ort.InferenceSession(path, _ort_session_opts(), providers=["CPUExecutionProvider"])
     return _birefnet_session
 def get_corridorkey(resolution="1024"):
     global _corridorkey_sessions
     if resolution not in _corridorkey_sessions:
@@ -204,62 +164,44 @@ def get_corridorkey(resolution="1024"):
         if not onnx_path or not os.path.exists(onnx_path):
             raise gr.Error(f"CorridorKey ONNX model for {resolution} not found.")
         logger.info("Loading CorridorKey ONNX (%s): %s", resolution, onnx_path)
-        _corridorkey_sessions[resolution] = ort.InferenceSession(onnx_path, _ort_session_opts(), providers=["CPUExecutionProvider"])
     return _corridorkey_sessions[resolution]
 # ---------------------------------------------------------------------------
 # Per-frame inference
 # ---------------------------------------------------------------------------
 def birefnet_frame(session, image_rgb_uint8):
-    """BiRefNet: RGB uint8 [H,W,3] -> float32 [H,W] mask 0-1."""
     h, w = image_rgb_uint8.shape[:2]
-    inp_info = session.get_inputs()[0]
-    res = (inp_info.shape[2], inp_info.shape[3])
     img = cv2.resize(image_rgb_uint8, res).astype(np.float32) / 255.0
-    img = (img - IMAGENET_MEAN) / IMAGENET_STD
-    img = img.transpose(2, 0, 1)[np.newaxis, :].astype(np.float32)
-    outputs = session.run(None, {inp_info.name: img})
-    pred = 1.0 / (1.0 + np.exp(-outputs[-1]))  # sigmoid
-    mask = cv2.resize(pred[0, 0], (w, h))
-    return (mask > 0.04).astype(np.float32)
 def corridorkey_frame(session, image_f32, mask_f32, img_size,
-                      despill_strength=0.5, auto_despeckle=True,
-                      despeckle_size=400):
-    """CorridorKey: image [H,W,3] float32 0-1 + mask [H,W] float32 0-1 -> dict."""
     h, w = image_f32.shape[:2]
-    img_resized = cv2.resize(image_f32, (img_size, img_size))
-    mask_resized = cv2.resize(mask_f32, (img_size, img_size))[:, :, np.newaxis]
-    img_norm = (img_resized - IMAGENET_MEAN) / IMAGENET_STD
-    inp = np.concatenate([img_norm, mask_resized], axis=-1)
     inp = inp.transpose(2, 0, 1)[np.newaxis, :].astype(np.float32)
     alpha_raw, fg_raw = session.run(None, {"input": inp})
     alpha = cv2.resize(alpha_raw[0].transpose(1, 2, 0), (w, h), interpolation=cv2.INTER_LANCZOS4)
     fg = cv2.resize(fg_raw[0].transpose(1, 2, 0), (w, h), interpolation=cv2.INTER_LANCZOS4)
     if alpha.ndim == 2:
         alpha = alpha[:, :, np.newaxis]
     if auto_despeckle:
         alpha = clean_matte(alpha, area_threshold=despeckle_size, dilation=25, blur_size=5)
     fg = despill(fg, green_limit_mode="average", strength=despill_strength)
     return {"alpha": alpha, "fg": fg}
 # ---------------------------------------------------------------------------
-# Video stitching via ffmpeg
 # ---------------------------------------------------------------------------
 def _stitch_ffmpeg(frame_dir, out_path, fps, pattern="%05d.png", pix_fmt="yuv420p",
                    codec="libx264", extra_args=None):
-    """Stitch PNG frames into video via ffmpeg subprocess."""
-    cmd = ["ffmpeg", "-y", "-framerate", str(fps),
-           "-i", os.path.join(frame_dir, pattern),
            "-c:v", codec, "-pix_fmt", pix_fmt]
     if extra_args:
         cmd.extend(extra_args)
@@ -271,36 +213,13 @@ def _stitch_ffmpeg(frame_dir, out_path, fps, pattern="%05d.png", pix_fmt="yuv420
         logger.warning("ffmpeg failed: %s", e)
         return False
-def _stitch_cv2_fallback(frame_dir, out_path, fps, w, h, grayscale=False):
-    """Fallback: stitch via OpenCV VideoWriter if ffmpeg unavailable."""
-    files = sorted([f for f in os.listdir(frame_dir) if f.endswith(".png")])
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")
-    writer = cv2.VideoWriter(out_path, fourcc, fps, (w, h))
-    if not writer.isOpened():
-        logger.warning("mp4v codec unavailable")
-        return False
-    for f in files:
-        img = cv2.imread(os.path.join(frame_dir, f),
-                         cv2.IMREAD_GRAYSCALE if grayscale else cv2.IMREAD_COLOR)
-        if img is None:
-            continue
-        if grayscale:
-            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-        writer.write(img)
-    writer.release()
-    return True
 # ---------------------------------------------------------------------------
-# Video processing pipeline (single-pass, streaming)
 # ---------------------------------------------------------------------------
 def process_video(video_path, resolution, despill_val, mask_mode,
-                  auto_despeckle, despeckle_size, output_mode, progress=gr.Progress()):
     """Remove green screen background from video using CorridorKey AI matting.
-    Handles transparent objects (glass, water, cloth) that traditional chroma key cannot.
-    Returns composite video, downloadable file, and status message.
     """
     if video_path is None:
         raise gr.Error("Please upload a video.")
@@ -308,7 +227,6 @@ def process_video(video_path, resolution, despill_val, mask_mode,
     max_dur = MAX_DURATION_GPU if HAS_CUDA else MAX_DURATION_CPU
     img_size = int(resolution)
-    # Probe video
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
@@ -318,7 +236,6 @@ def process_video(video_path, resolution, despill_val, mask_mode,
     if total_frames == 0:
         raise gr.Error("Could not read video frames. Check file format.")
     duration = total_frames / fps
     if duration > max_dur:
         raise gr.Error(f"Video too long ({duration:.1f}s). Max {max_dur}s on {'GPU' if HAS_CUDA else 'free CPU'} tier.")
@@ -327,7 +244,6 @@ def process_video(video_path, resolution, despill_val, mask_mode,
     logger.info("Processing %d frames (%dx%d @ %.1f fps), resolution=%d, mask=%s",
                 frames_to_process, w, h, fps, img_size, mask_mode)
-    # Load models
     try:
         birefnet = None
         if mask_mode != "Fast (classical)":
@@ -339,42 +255,22 @@ def process_video(video_path, resolution, despill_val, mask_mode,
         raise gr.Error(f"Failed to load models: {e}")
     despill_strength = despill_val / 10.0
-    # Determine what outputs we need
-    need_comp = output_mode == "Composite on checkerboard (MP4)"
-    need_alpha = output_mode == "Alpha matte (MP4)"
-    need_rgba = output_mode in ("Transparent video (WebM)", "PNG sequence (ZIP)")
     tmpdir = tempfile.mkdtemp(prefix="ck_")
     try:
-        # Pre-compute checkerboard if needed
-        bg_lin = None
-        if need_comp:
-            bg_lin = srgb_to_linear(create_checkerboard(w, h))
-        # For PNG-based outputs, create dirs
-        rgba_dir = None
-        alpha_dir = None
-        comp_dir = None
-        if need_rgba:
-            rgba_dir = os.path.join(tmpdir, "rgba")
-            os.makedirs(rgba_dir, exist_ok=True)
-        if output_mode == "PNG sequence (ZIP)":
-            alpha_dir = os.path.join(tmpdir, "alphas")
-            os.makedirs(alpha_dir, exist_ok=True)
-        # For MP4 modes, write directly to VideoWriter via temp PNGs + ffmpeg
-        # (we still need PNGs as ffmpeg input, but only the needed type)
-        if need_comp:
-            comp_dir = os.path.join(tmpdir, "comp")
-            os.makedirs(comp_dir, exist_ok=True)
-        if need_alpha:
-            alpha_dir = os.path.join(tmpdir, "alphas")
-            os.makedirs(alpha_dir, exist_ok=True)
-        # Single-pass processing
         cap = cv2.VideoCapture(video_path)
         frame_times = []
         for i in range(frames_to_process):
             t0 = time.time()
@@ -385,16 +281,16 @@ def process_video(video_path, resolution, despill_val, mask_mode,
             frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
             frame_f32 = frame_rgb.astype(np.float32) / 255.0
-            # Coarse mask: fast classical or BiRefNet
             if mask_mode == "Fast (classical)":
-                mask, confidence = fast_greenscreen_mask(frame_f32)
                 if mask is None:
-                    raise gr.Error("Fast mask failed: video doesn't appear to have a green screen background. Try 'AI (BiRefNet)' mode.")
             elif mask_mode == "Hybrid (auto)":
-                mask, confidence = fast_greenscreen_mask(frame_f32)
-                if mask is None or confidence < 0.7:
                     mask = birefnet_frame(birefnet, frame_rgb)
-            else:  # "AI (BiRefNet)"
                 mask = birefnet_frame(birefnet, frame_rgb)
             # CorridorKey inference
@@ -402,92 +298,76 @@ def process_video(video_path, resolution, despill_val, mask_mode,
                                        despill_strength=despill_strength,
                                        auto_despeckle=auto_despeckle,
                                        despeckle_size=int(despeckle_size))
             alpha = result["alpha"]
             fg = result["fg"]
-            # Write only the output we need
-            if need_comp:
-                fg_lin = srgb_to_linear(fg)
-                comp = linear_to_srgb(composite_straight(fg_lin, bg_lin, alpha))
-                comp_uint8 = (np.clip(comp, 0, 1) * 255).astype(np.uint8)
-                cv2.imwrite(os.path.join(comp_dir, f"{i:05d}.png"), comp_uint8[:, :, ::-1])
-            if need_alpha or alpha_dir:
-                alpha_uint8 = (np.clip(alpha, 0, 1) * 255).astype(np.uint8)
-                if alpha_uint8.ndim == 3:
-                    alpha_uint8 = alpha_uint8[:, :, 0]
-                if alpha_dir:
-                    cv2.imwrite(os.path.join(alpha_dir, f"{i:05d}.png"), alpha_uint8)
-            if need_rgba:
-                fg_uint8 = (np.clip(fg, 0, 1) * 255).astype(np.uint8)
-                a_uint8 = (np.clip(alpha, 0, 1) * 255).astype(np.uint8)
-                if a_uint8.ndim == 3:
-                    a_uint8 = a_uint8[:, :, 0]
-                rgba = np.concatenate([fg_uint8[:, :, ::-1], a_uint8[:, :, np.newaxis]], axis=-1)
-                cv2.imwrite(os.path.join(rgba_dir, f"{i:05d}.png"), rgba)
             # Progress with ETA
             elapsed = time.time() - t0
             frame_times.append(elapsed)
-            avg_time = np.mean(frame_times[-5:]) if len(frame_times) >= 2 else elapsed
-            remaining = (frames_to_process - i - 1) * avg_time
             eta = f"{remaining/60:.1f}min" if remaining > 60 else f"{remaining:.0f}s"
             pct = 0.05 + 0.85 * (i + 1) / frames_to_process
             progress(pct, desc=f"Frame {i+1}/{frames_to_process} ({elapsed:.1f}s) | ~{eta} left")
         cap.release()
-        # Assemble output
-        progress(0.92, desc="Stitching video...")
-        output_video = None
-        output_file = None
-        if need_comp:
-            out_path = os.path.join(tmpdir, "composite.mp4")
-            ok = _stitch_ffmpeg(comp_dir, out_path, fps, extra_args=["-crf", "18"])
-            if not ok:
-                ok = _stitch_cv2_fallback(comp_dir, out_path, fps, w, h)
-            if not ok:
-                raise gr.Error("Video encoding failed. No suitable codec found.")
-            output_video = out_path
-            output_file = out_path
-        elif need_alpha:
-            out_path = os.path.join(tmpdir, "alpha_matte.mp4")
-            ok = _stitch_ffmpeg(alpha_dir, out_path, fps, extra_args=["-crf", "18"])
-            if not ok:
-                ok = _stitch_cv2_fallback(alpha_dir, out_path, fps, w, h, grayscale=True)
-            if not ok:
-                raise gr.Error("Video encoding failed. No suitable codec found.")
-            output_video = out_path
-            output_file = out_path
-        elif output_mode == "Transparent video (WebM)":
-            out_path = os.path.join(tmpdir, "transparent.webm")
-            ok = _stitch_ffmpeg(rgba_dir, out_path, fps,
-                                codec="libvpx-vp9", pix_fmt="yuva420p",
-                                extra_args=["-crf", "30", "-b:v", "0"])
-            if not ok:
-                raise gr.Error("WebM encoding failed. ffmpeg with libvpx-vp9 required.")
-            output_video = out_path
-            output_file = out_path
-        elif output_mode == "PNG sequence (ZIP)":
-            zip_path = os.path.join(tmpdir, "rgba_sequence.zip")
-            with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_STORED) as zf:
-                for f in sorted(os.listdir(rgba_dir)):
-                    zf.write(os.path.join(rgba_dir, f), f"rgba/{f}")
-                if alpha_dir:
-                    for f in sorted(os.listdir(alpha_dir)):
-                        zf.write(os.path.join(alpha_dir, f), f"alpha/{f}")
-            output_file = zip_path
         progress(1.0, desc="Done!")
         avg = np.mean(frame_times) if frame_times else 0
-        status = f"Processed {len(frame_times)} frames ({w}x{h}) at {img_size}px | {avg:.1f}s/frame avg"
-        return output_video, output_file, status
     except gr.Error:
         raise
@@ -495,8 +375,7 @@ def process_video(video_path, resolution, despill_val, mask_mode,
         logger.exception("Processing failed")
         raise gr.Error(f"Processing failed: {e}")
     finally:
-        # Cleanup intermediate dirs (keep output files in tmpdir root)
-        for d in ["comp", "alphas", "rgba"]:
             p = os.path.join(tmpdir, d)
             if os.path.isdir(p):
                 shutil.rmtree(p, ignore_errors=True)
@@ -506,10 +385,8 @@ def process_video(video_path, resolution, despill_val, mask_mode,
 # ---------------------------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------------------------
-def process_example(video_path, resolution, despill, mask_mode, despeckle, despeckle_size, output_mode):
-    return process_video(video_path, resolution, despill, mask_mode, despeckle, despeckle_size, output_mode)
 if HAS_CUDA:
     DESCRIPTION = "# CorridorKey Green Screen Matting\nRemove green backgrounds from video. Based on [CorridorKey](https://www.youtube.com/watch?v=3Ploi723hg4) by Corridor Digital. GPU mode: max {max_dur}s / {max_frames} frames.".format(max_dur=MAX_DURATION_GPU, max_frames=MAX_FRAMES)
@@ -522,60 +399,44 @@ with gr.Blocks(title="CorridorKey") as demo:
     with gr.Row():
         with gr.Column(scale=1):
             input_video = gr.Video(label="Upload Green Screen Video")
             with gr.Accordion("Settings", open=True):
                 resolution = gr.Radio(
-                    choices=["1024", "2048"],
-                    value="1024",
                     label="Processing Resolution",
-                    info="1024 = balanced (~8s/frame CPU), 2048 = max quality (trained resolution, fast on GPU)"
                 )
                 mask_mode = gr.Radio(
                     choices=["Hybrid (auto)", "AI (BiRefNet)", "Fast (classical)"],
-                    value="Hybrid (auto)",
-                    label="Mask Mode",
-                    info="Hybrid = fast green detection + AI fallback. Fast = classical only (~0.01s). AI = always use BiRefNet (~13s/frame)"
                 )
                 despill_slider = gr.Slider(
-                    0, 10, value=5, step=1,
-                    label="Despill Strength",
-                    info="Remove green reflections from subject (0=off, 10=max)"
                 )
                 despeckle_check = gr.Checkbox(
-                    value=True,
-                    label="Auto Despeckle",
-                    info="Remove small disconnected artifacts (tracking markers, noise)"
                 )
                 despeckle_size = gr.Number(
-                    value=400, precision=0,
-                    label="Despeckle Size",
-                    info="Minimum pixel area to keep (smaller = more aggressive cleanup)"
                 )
-            output_mode = gr.Dropdown(
-                choices=[
-                    "Composite on checkerboard (MP4)",
-                    "Alpha matte (MP4)",
-                    "Transparent video (WebM)",
-                    "PNG sequence (ZIP)",
-                ],
-                value="Composite on checkerboard (MP4)",
-                label="Output Format"
-            )
             process_btn = gr.Button("Process Video", variant="primary", size="lg")
         with gr.Column(scale=1):
-            output_video = gr.Video(label="Result Preview")
-            output_file = gr.File(label="Download Result")
             status_text = gr.Textbox(label="Status", interactive=False)
     gr.Examples(
         examples=[
-            ["examples/corridor_greenscreen_demo.mp4", "1024", 5, "Hybrid (auto)", True, 400, "Composite on checkerboard (MP4)"],
         ],
-        inputs=[input_video, resolution, despill_slider, mask_mode, despeckle_check, despeckle_size, output_mode],
-        outputs=[output_video, output_file, status_text],
         fn=process_example,
         cache_examples=True,
         cache_mode="lazy",
@@ -584,62 +445,46 @@ with gr.Blocks(title="CorridorKey") as demo:
     process_btn.click(
         fn=process_video,
-        inputs=[input_video, resolution, despill_slider, mask_mode, despeckle_check, despeckle_size, output_mode],
-        outputs=[output_video, output_file, status_text],
     )
 # ---------------------------------------------------------------------------
 # CLI mode
 # ---------------------------------------------------------------------------
 def cli_main():
-    """CLI mode: python app.py --input video.mp4 [options]"""
     import argparse
     parser = argparse.ArgumentParser(description="CorridorKey Green Screen Matting")
-    parser.add_argument("--input", required=True, help="Input video path")
-    parser.add_argument("--output", default="output", help="Output directory")
-    parser.add_argument("--device", default="auto", choices=["auto", "cpu", "cuda"],
-                        help="Force device (auto=detect GPU/CPU)")
-    parser.add_argument("--resolution", default="1024", choices=["1024", "2048"],
-                        help="Model resolution (1024=fast, 2048=max quality)")
     parser.add_argument("--mask-mode", default="Hybrid (auto)",
                         choices=["Hybrid (auto)", "AI (BiRefNet)", "Fast (classical)"])
-    parser.add_argument("--despill", type=int, default=5, help="Despill strength 0-10")
     parser.add_argument("--no-despeckle", action="store_true")
     parser.add_argument("--despeckle-size", type=int, default=400)
-    parser.add_argument("--format", default="Composite on checkerboard (MP4)",
-                        choices=["Composite on checkerboard (MP4)", "Alpha matte (MP4)",
-                                 "Transparent video (WebM)", "PNG sequence (ZIP)"])
     args = parser.parse_args()
     global HAS_CUDA
-    if args.device == "cpu":
-        HAS_CUDA = False
-    elif args.device == "cuda":
-        HAS_CUDA = True
     print(f"Device: {'CUDA' if HAS_CUDA else 'CPU'}")
     class CLIProgress:
         def __call__(self, val, desc=""):
-            if desc:
-                print(f"  [{val:.0%}] {desc}")
-    video, file, status = process_video(
         args.input, args.resolution, args.despill, args.mask_mode,
-        not args.no_despeckle, args.despeckle_size, args.format,
-        progress=CLIProgress()
     )
     print(f"\n{status}")
-    if video:
-        os.makedirs(args.output, exist_ok=True)
-        dst = os.path.join(args.output, os.path.basename(video))
-        shutil.copy2(video, dst)
-        print(f"Output: {dst}")
-    if file:
-        os.makedirs(args.output, exist_ok=True)
-        dst = os.path.join(args.output, os.path.basename(file))
-        shutil.copy2(file, dst)
         print(f"Output: {dst}")

 import onnxruntime as ort
 # Workaround: Gradio cache_examples bug with None outputs.
 _original_read_from_flag = gr.components.Component.read_from_flag
 def _patched_read_from_flag(self, payload):
     if payload is None or (isinstance(payload, str) and payload.strip() == ""):
         return None
     return _original_read_from_flag(self, payload)
 gr.components.Component.read_from_flag = _patched_read_from_flag
 from huggingface_hub import hf_hub_download
 cv2.setNumThreads(2)
 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
 logger = logging.getLogger(__name__)
 # ---------------------------------------------------------------------------
 BIREFNET_REPO = "onnx-community/BiRefNet_lite-ONNX"
 BIREFNET_FILE = "onnx/model.onnx"
 MODELS_DIR = os.path.join(os.path.dirname(__file__), "models")
 CORRIDORKEY_MODELS = {
     "1024": os.path.join(MODELS_DIR, "corridorkey_1024.onnx"),
     "2048": os.path.join(MODELS_DIR, "corridorkey_2048.onnx"),
 }
 IMAGENET_MEAN = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 1, 3)
 IMAGENET_STD = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 1, 3)
 MAX_DURATION_CPU = 5
 MAX_DURATION_GPU = 30
 MAX_FRAMES = 150
 HAS_CUDA = "CUDAExecutionProvider" in ort.get_available_providers()
 # ---------------------------------------------------------------------------
+# Color utilities (numpy-only)
 # ---------------------------------------------------------------------------
 def linear_to_srgb(x):
     x = np.clip(x, 0.0, None)
     return np.where(x <= 0.0031308, x * 12.92, 1.055 * np.power(x, 1.0 / 2.4) - 0.055)
 def srgb_to_linear(x):
     x = np.clip(x, 0.0, None)
     return np.where(x <= 0.04045, x / 12.92, np.power((x + 0.055) / 1.055, 2.4))
 def composite_straight(fg, bg, alpha):
     return fg * alpha + bg * (1.0 - alpha)
 def despill(image, green_limit_mode="average", strength=1.0):
     if strength <= 0.0:
         return image
     r, g, b = image[..., 0], image[..., 1], image[..., 2]
     limit = (r + b) / 2.0 if green_limit_mode == "average" else np.maximum(r, b)
+    spill = np.maximum(g - limit, 0.0)
+    despilled = np.stack([r + spill * 0.5, g - spill, b + spill * 0.5], axis=-1)
+    return image * (1.0 - strength) + despilled * strength if strength < 1.0 else despilled
 def clean_matte(alpha_np, area_threshold=300, dilation=15, blur_size=5):
     is_3d = alpha_np.ndim == 3
         alpha_np = alpha_np[:, :, 0]
     mask_8u = (alpha_np > 0.5).astype(np.uint8) * 255
     num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask_8u, connectivity=8)
     valid = np.zeros(num_labels, dtype=bool)
     valid[1:] = stats[1:, cv2.CC_STAT_AREA] >= area_threshold
     cleaned = (valid[labels].astype(np.uint8) * 255)
     if dilation > 0:
         k = int(dilation * 2 + 1)
+        cleaned = cv2.dilate(cleaned, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (k, k)))
     if blur_size > 0:
         b = int(blur_size * 2 + 1)
         cleaned = cv2.GaussianBlur(cleaned, (b, b), 0)
+    result = alpha_np * (cleaned.astype(np.float32) / 255.0)
     return result[:, :, np.newaxis] if is_3d else result
 def create_checkerboard(w, h, checker_size=64, color1=0.15, color2=0.55):
+    xg, yg = np.meshgrid(np.arange(w) // checker_size, np.arange(h) // checker_size)
+    bg = np.where(((xg + yg) % 2) == 0, color1, color2).astype(np.float32)
     return np.stack([bg, bg, bg], axis=-1)
+def premultiply(fg, alpha):
+    return fg * alpha
 # ---------------------------------------------------------------------------
+# Fast classical green-screen mask
 # ---------------------------------------------------------------------------
 def fast_greenscreen_mask(frame_rgb_f32):
     h, w = frame_rgb_f32.shape[:2]
     ph, pw = max(int(h * 0.05), 4), max(int(w * 0.05), 4)
     corners = np.concatenate([
         frame_rgb_f32[-ph:, -pw:].reshape(-1, 3),
     ], axis=0)
     bg_color = np.median(corners, axis=0)
     if not (bg_color[1] > bg_color[0] + 0.05 and bg_color[1] > bg_color[2] + 0.05):
         return None, 0.0
     frame_u8 = (np.clip(frame_rgb_f32, 0, 1) * 255).astype(np.uint8)
     hsv = cv2.cvtColor(frame_u8, cv2.COLOR_RGB2HSV)
     green_mask = cv2.inRange(hsv, (35, 40, 40), (85, 255, 255))
     fg_mask = cv2.bitwise_not(green_mask)
+    fg_mask = cv2.morphologyEx(fg_mask, cv2.MORPH_CLOSE, cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)))
     fg_mask = cv2.GaussianBlur(fg_mask, (5, 5), 0)
     mask_f32 = fg_mask.astype(np.float32) / 255.0
     confidence = 1.0 - 2.0 * np.mean(np.minimum(mask_f32, 1.0 - mask_f32))
     return mask_f32, confidence
 # ---------------------------------------------------------------------------
+# Model loading
 # ---------------------------------------------------------------------------
 _birefnet_session = None
 _corridorkey_sessions = {}
+def _ort_opts():
     opts = ort.SessionOptions()
     opts.intra_op_num_threads = 2
     opts.inter_op_num_threads = 1
     opts.enable_mem_pattern = True
     return opts
 def get_birefnet():
     global _birefnet_session
     if _birefnet_session is None:
         logger.info("Downloading BiRefNet-Lite ONNX...")
         path = hf_hub_download(repo_id=BIREFNET_REPO, filename=BIREFNET_FILE)
         logger.info("Loading BiRefNet ONNX: %s", path)
+        _birefnet_session = ort.InferenceSession(path, _ort_opts(), providers=["CPUExecutionProvider"])
     return _birefnet_session
 def get_corridorkey(resolution="1024"):
     global _corridorkey_sessions
     if resolution not in _corridorkey_sessions:
         if not onnx_path or not os.path.exists(onnx_path):
             raise gr.Error(f"CorridorKey ONNX model for {resolution} not found.")
         logger.info("Loading CorridorKey ONNX (%s): %s", resolution, onnx_path)
+        _corridorkey_sessions[resolution] = ort.InferenceSession(onnx_path, _ort_opts(), providers=["CPUExecutionProvider"])
     return _corridorkey_sessions[resolution]
 # ---------------------------------------------------------------------------
 # Per-frame inference
 # ---------------------------------------------------------------------------
 def birefnet_frame(session, image_rgb_uint8):
     h, w = image_rgb_uint8.shape[:2]
+    inp = session.get_inputs()[0]
+    res = (inp.shape[2], inp.shape[3])
     img = cv2.resize(image_rgb_uint8, res).astype(np.float32) / 255.0
+    img = ((img - IMAGENET_MEAN) / IMAGENET_STD).transpose(2, 0, 1)[np.newaxis, :].astype(np.float32)
+    pred = 1.0 / (1.0 + np.exp(-session.run(None, {inp.name: img})[-1]))
+    return (cv2.resize(pred[0, 0], (w, h)) > 0.04).astype(np.float32)
 def corridorkey_frame(session, image_f32, mask_f32, img_size,
+                      despill_strength=0.5, auto_despeckle=True, despeckle_size=400):
     h, w = image_f32.shape[:2]
+    img_r = cv2.resize(image_f32, (img_size, img_size))
+    mask_r = cv2.resize(mask_f32, (img_size, img_size))[:, :, np.newaxis]
+    inp = np.concatenate([(img_r - IMAGENET_MEAN) / IMAGENET_STD, mask_r], axis=-1)
     inp = inp.transpose(2, 0, 1)[np.newaxis, :].astype(np.float32)
     alpha_raw, fg_raw = session.run(None, {"input": inp})
     alpha = cv2.resize(alpha_raw[0].transpose(1, 2, 0), (w, h), interpolation=cv2.INTER_LANCZOS4)
     fg = cv2.resize(fg_raw[0].transpose(1, 2, 0), (w, h), interpolation=cv2.INTER_LANCZOS4)
     if alpha.ndim == 2:
         alpha = alpha[:, :, np.newaxis]
     if auto_despeckle:
         alpha = clean_matte(alpha, area_threshold=despeckle_size, dilation=25, blur_size=5)
     fg = despill(fg, green_limit_mode="average", strength=despill_strength)
     return {"alpha": alpha, "fg": fg}
 # ---------------------------------------------------------------------------
+# Video stitching
 # ---------------------------------------------------------------------------
 def _stitch_ffmpeg(frame_dir, out_path, fps, pattern="%05d.png", pix_fmt="yuv420p",
                    codec="libx264", extra_args=None):
+    cmd = ["ffmpeg", "-y", "-framerate", str(fps), "-i", os.path.join(frame_dir, pattern),
            "-c:v", codec, "-pix_fmt", pix_fmt]
     if extra_args:
         cmd.extend(extra_args)
         logger.warning("ffmpeg failed: %s", e)
         return False
 # ---------------------------------------------------------------------------
+# Main pipeline: generates ALL professional outputs
 # ---------------------------------------------------------------------------
 def process_video(video_path, resolution, despill_val, mask_mode,
+                  auto_despeckle, despeckle_size, progress=gr.Progress()):
     """Remove green screen background from video using CorridorKey AI matting.
+    Returns: comp_video, matte_video, download_zip, status
     """
     if video_path is None:
         raise gr.Error("Please upload a video.")
     max_dur = MAX_DURATION_GPU if HAS_CUDA else MAX_DURATION_CPU
     img_size = int(resolution)
     cap = cv2.VideoCapture(video_path)
     fps = cap.get(cv2.CAP_PROP_FPS) or 30.0
     total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
     if total_frames == 0:
         raise gr.Error("Could not read video frames. Check file format.")
     duration = total_frames / fps
     if duration > max_dur:
         raise gr.Error(f"Video too long ({duration:.1f}s). Max {max_dur}s on {'GPU' if HAS_CUDA else 'free CPU'} tier.")
     logger.info("Processing %d frames (%dx%d @ %.1f fps), resolution=%d, mask=%s",
                 frames_to_process, w, h, fps, img_size, mask_mode)
     try:
         birefnet = None
         if mask_mode != "Fast (classical)":
         raise gr.Error(f"Failed to load models: {e}")
     despill_strength = despill_val / 10.0
     tmpdir = tempfile.mkdtemp(prefix="ck_")
     try:
+        # Output dirs matching original CorridorKey structure
+        comp_dir = os.path.join(tmpdir, "Comp")
+        fg_dir = os.path.join(tmpdir, "FG")
+        matte_dir = os.path.join(tmpdir, "Matte")
+        processed_dir = os.path.join(tmpdir, "Processed")
+        for d in [comp_dir, fg_dir, matte_dir, processed_dir]:
+            os.makedirs(d, exist_ok=True)
+        bg_lin = srgb_to_linear(create_checkerboard(w, h))
         cap = cv2.VideoCapture(video_path)
         frame_times = []
+        total_start = time.time()
         for i in range(frames_to_process):
             t0 = time.time()
             frame_rgb = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2RGB)
             frame_f32 = frame_rgb.astype(np.float32) / 255.0
+            # Coarse mask
             if mask_mode == "Fast (classical)":
+                mask, _ = fast_greenscreen_mask(frame_f32)
                 if mask is None:
+                    raise gr.Error("Fast mask failed: no green screen detected. Try 'AI (BiRefNet)' mode.")
             elif mask_mode == "Hybrid (auto)":
+                mask, conf = fast_greenscreen_mask(frame_f32)
+                if mask is None or conf < 0.7:
                     mask = birefnet_frame(birefnet, frame_rgb)
+            else:
                 mask = birefnet_frame(birefnet, frame_rgb)
             # CorridorKey inference
                                        despill_strength=despill_strength,
                                        auto_despeckle=auto_despeckle,
                                        despeckle_size=int(despeckle_size))
             alpha = result["alpha"]
             fg = result["fg"]
+            # Ensure alpha is [H,W,1] and get 2D version
+            if alpha.ndim == 2:
+                alpha = alpha[:, :, np.newaxis]
+            alpha_2d = alpha[:, :, 0]
+            # -- Comp: composite on checkerboard (sRGB PNG) --
+            fg_lin = srgb_to_linear(fg)
+            comp = linear_to_srgb(composite_straight(fg_lin, bg_lin, alpha))
+            cv2.imwrite(os.path.join(comp_dir, f"{i:05d}.png"),
+                        (np.clip(comp, 0, 1) * 255).astype(np.uint8)[:, :, ::-1])
+            # -- FG: straight foreground, 100% opaque (sRGB PNG) --
+            cv2.imwrite(os.path.join(fg_dir, f"{i:05d}.png"),
+                        (np.clip(fg, 0, 1) * 255).astype(np.uint8)[:, :, ::-1])
+            # -- Matte: alpha channel (grayscale PNG) --
+            cv2.imwrite(os.path.join(matte_dir, f"{i:05d}.png"),
+                        (np.clip(alpha_2d, 0, 1) * 255).astype(np.uint8))
+            # -- Processed: premultiplied RGBA (PNG with transparency) --
+            fg_premul_lin = premultiply(fg_lin, alpha)
+            fg_premul_srgb = linear_to_srgb(fg_premul_lin)
+            fg_premul_u8 = (np.clip(fg_premul_srgb, 0, 1) * 255).astype(np.uint8)
+            alpha_u8 = (np.clip(alpha_2d, 0, 1) * 255).astype(np.uint8)
+            rgba = np.concatenate([fg_premul_u8[:, :, ::-1], alpha_u8[:, :, np.newaxis]], axis=-1)
+            cv2.imwrite(os.path.join(processed_dir, f"{i:05d}.png"), rgba)
             # Progress with ETA
             elapsed = time.time() - t0
             frame_times.append(elapsed)
+            avg_t = np.mean(frame_times[-5:]) if len(frame_times) >= 2 else elapsed
+            remaining = (frames_to_process - i - 1) * avg_t
             eta = f"{remaining/60:.1f}min" if remaining > 60 else f"{remaining:.0f}s"
             pct = 0.05 + 0.85 * (i + 1) / frames_to_process
             progress(pct, desc=f"Frame {i+1}/{frames_to_process} ({elapsed:.1f}s) | ~{eta} left")
         cap.release()
+        total_elapsed = time.time() - total_start
+        total_min = total_elapsed / 60
+        # Stitch preview videos
+        progress(0.92, desc="Stitching videos...")
+        comp_video = os.path.join(tmpdir, "comp_preview.mp4")
+        matte_video = os.path.join(tmpdir, "matte_preview.mp4")
+        _stitch_ffmpeg(comp_dir, comp_video, fps, extra_args=["-crf", "18"])
+        _stitch_ffmpeg(matte_dir, matte_video, fps, extra_args=["-crf", "18"])
+        # Package full professional ZIP
+        progress(0.96, desc="Packaging ZIP...")
+        zip_path = os.path.join(tmpdir, "CorridorKey_Output.zip")
+        with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_STORED) as zf:
+            for folder in ["Comp", "FG", "Matte", "Processed"]:
+                src = os.path.join(tmpdir, folder)
+                for f in sorted(os.listdir(src)):
+                    zf.write(os.path.join(src, f), f"Output/{folder}/{f}")
         progress(1.0, desc="Done!")
+        n = len(frame_times)
         avg = np.mean(frame_times) if frame_times else 0
+        status = f"Processed {n} frames in {total_min:.1f}min ({w}x{h}) at {img_size}px | {avg:.1f}s/frame"
+        return (
+            comp_video if os.path.exists(comp_video) else None,
+            matte_video if os.path.exists(matte_video) else None,
+            zip_path,
+            status,
+        )
     except gr.Error:
         raise
         logger.exception("Processing failed")
         raise gr.Error(f"Processing failed: {e}")
     finally:
+        for d in ["Comp", "FG", "Matte", "Processed"]:
             p = os.path.join(tmpdir, d)
             if os.path.isdir(p):
                 shutil.rmtree(p, ignore_errors=True)
 # ---------------------------------------------------------------------------
 # Gradio UI
 # ---------------------------------------------------------------------------
+def process_example(video_path, resolution, despill, mask_mode, despeckle, despeckle_size):
+    return process_video(video_path, resolution, despill, mask_mode, despeckle, despeckle_size)
 if HAS_CUDA:
     DESCRIPTION = "# CorridorKey Green Screen Matting\nRemove green backgrounds from video. Based on [CorridorKey](https://www.youtube.com/watch?v=3Ploi723hg4) by Corridor Digital. GPU mode: max {max_dur}s / {max_frames} frames.".format(max_dur=MAX_DURATION_GPU, max_frames=MAX_FRAMES)
     with gr.Row():
         with gr.Column(scale=1):
             input_video = gr.Video(label="Upload Green Screen Video")
             with gr.Accordion("Settings", open=True):
                 resolution = gr.Radio(
+                    choices=["1024", "2048"], value="1024",
                     label="Processing Resolution",
+                    info="1024 = balanced (~8s/frame CPU), 2048 = max quality (fast on GPU)"
                 )
                 mask_mode = gr.Radio(
                     choices=["Hybrid (auto)", "AI (BiRefNet)", "Fast (classical)"],
+                    value="Hybrid (auto)", label="Mask Mode",
+                    info="Hybrid = fast green detection + AI fallback. Fast = classical only. AI = always BiRefNet"
                 )
                 despill_slider = gr.Slider(
+                    0, 10, value=5, step=1, label="Despill Strength",
+                    info="Remove green reflections (0=off, 10=max)"
                 )
                 despeckle_check = gr.Checkbox(
+                    value=True, label="Auto Despeckle",
+                    info="Remove small disconnected artifacts"
                 )
                 despeckle_size = gr.Number(
+                    value=400, precision=0, label="Despeckle Size",
+                    info="Min pixel area to keep"
                 )
             process_btn = gr.Button("Process Video", variant="primary", size="lg")
         with gr.Column(scale=1):
+            with gr.Row():
+                comp_video = gr.Video(label="Composite Preview")
+                matte_video = gr.Video(label="Alpha Matte")
+            download_zip = gr.File(label="Download Full Package (Comp + FG + Matte + Processed)")
             status_text = gr.Textbox(label="Status", interactive=False)
     gr.Examples(
         examples=[
+            ["examples/corridor_greenscreen_demo.mp4", "1024", 5, "Hybrid (auto)", True, 400],
         ],
+        inputs=[input_video, resolution, despill_slider, mask_mode, despeckle_check, despeckle_size],
+        outputs=[comp_video, matte_video, download_zip, status_text],
         fn=process_example,
         cache_examples=True,
         cache_mode="lazy",
     process_btn.click(
         fn=process_video,
+        inputs=[input_video, resolution, despill_slider, mask_mode, despeckle_check, despeckle_size],
+        outputs=[comp_video, matte_video, download_zip, status_text],
     )
 # ---------------------------------------------------------------------------
 # CLI mode
 # ---------------------------------------------------------------------------
 def cli_main():
     import argparse
     parser = argparse.ArgumentParser(description="CorridorKey Green Screen Matting")
+    parser.add_argument("--input", required=True)
+    parser.add_argument("--output", default="output")
+    parser.add_argument("--device", default="auto", choices=["auto", "cpu", "cuda"])
+    parser.add_argument("--resolution", default="1024", choices=["1024", "2048"])
     parser.add_argument("--mask-mode", default="Hybrid (auto)",
                         choices=["Hybrid (auto)", "AI (BiRefNet)", "Fast (classical)"])
+    parser.add_argument("--despill", type=int, default=5)
     parser.add_argument("--no-despeckle", action="store_true")
     parser.add_argument("--despeckle-size", type=int, default=400)
     args = parser.parse_args()
     global HAS_CUDA
+    if args.device == "cpu": HAS_CUDA = False
+    elif args.device == "cuda": HAS_CUDA = True
     print(f"Device: {'CUDA' if HAS_CUDA else 'CPU'}")
     class CLIProgress:
         def __call__(self, val, desc=""):
+            if desc: print(f"  [{val:.0%}] {desc}")
+    comp, matte, zipf, status = process_video(
         args.input, args.resolution, args.despill, args.mask_mode,
+        not args.no_despeckle, args.despeckle_size, progress=CLIProgress()
     )
     print(f"\n{status}")
+    os.makedirs(args.output, exist_ok=True)
+    if zipf:
+        dst = os.path.join(args.output, os.path.basename(zipf))
+        shutil.copy2(zipf, dst)
         print(f"Output: {dst}")