Spaces:

nycu-cplab
/

3AM

Sleeping

App Files Files Community

nycu-cplab commited on Jan 2

Commit

9d941d0

1 Parent(s): 03d1a10

ui improve

Browse files

Files changed (1) hide show

app.py +177 -52

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import spaces
 import subprocess
 import sys, os
 from pathlib import Path
 ''' loading modules '''
 ROOT = Path(__file__).resolve().parent
@@ -173,7 +174,9 @@ def create_video_from_masks(frames, masks_dict, output_path="output_tracking.mp4
     if not frames:
         logger.warning("No frames to create video.")
         return None
     h, w = np.array(frames[0]).shape[:2]
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
@@ -195,7 +198,34 @@ def create_video_from_masks(frames, masks_dict, output_path="output_tracking.mp4
 # --- GPU Wrapped Functions ---
-@spaces.GPU
 def process_video_and_features(video_path, interval):
     """Load video, subsample frames, get views, MUSt3R features, SAM2 inputs."""
     logger.info(f"Starting GPU process: Video feature extraction (Interval: {interval})")
@@ -251,7 +281,17 @@ def generate_frame_mask(image_tensor, points, labels, original_size):
         logger.error(f"Error during mask generation: {e}")
         raise e
-@spaces.GPU
 def run_tracking(sam2_input_images, must3r_feats, must3r_outputs, start_idx, first_frame_mask):
     """Track the mask across the video."""
     logger.info(f"Starting tracking from frame index {start_idx}...")
@@ -289,6 +329,10 @@ def on_video_upload(video_path, interval):
         logger.error(f"Failed to process video: {e}")
         raise gr.Error(f"Processing failed: {str(e)}")
     # Initialize state
     state = {
         "pil_imgs": pil_imgs,
@@ -301,7 +345,11 @@ def on_video_upload(video_path, interval):
         "current_points": [],
         "current_labels": [],
         "current_mask": None,
-        "frame_idx": 0
     }
     first_frame = pil_imgs[0]
@@ -427,7 +475,11 @@ def on_track_click(state):
             first_frame_mask
         )
-        output_path = create_video_from_masks(state["pil_imgs"], tracked_masks_dict)
         return output_path
     except Exception as e:
         logger.error(f"Tracking failed in UI callback: {e}")
@@ -451,101 +503,173 @@ description = """
     <p>Upload a video, geometric features are extracted automatically. Select a frame, click to annotate objects, and track them in 3D-consistent space.</p>
 </div>
 """
 with gr.Blocks(title="3AM: 3egment Anything") as app:
     gr.HTML(description)
     app_state = gr.State()
     with gr.Row():
         with gr.Column(scale=1):
-            with gr.Group():
-                # Added height limit to video input
-                video_input = gr.Video(
-                    label="Upload Video",
-                    sources=["upload"],
-                    height=512
-                )
-                interval_slider = gr.Slider(
-                    label="Frame Interval (Applied to entire pipeline)",
-                    minimum=1,
-                    maximum=30,
-                    step=1,
-                    value=1,
-                    info="Process every N-th frame. Higher values = faster processing but lower temporal resolution."
-                )
-            process_status = gr.Textbox(label="Status", value="Waiting for upload...", interactive=False)
         with gr.Column(scale=2):
-            # Added height limit to image display
             img_display = gr.Image(
-                label="Annotate Frame",
-                interactive=True,
                 height=512
             )
-            frame_slider = gr.Slider(label="Select Frame", minimum=0, maximum=100, step=1, value=0)
             with gr.Row():
                 mode_radio = gr.Radio(
-                    choices=["Positive Point", "Negative Point", "Box Top-Left", "Box Bottom-Right"],
                     value="Positive Point",
                     label="Annotation Mode"
                 )
                 with gr.Column():
-                    gen_mask_btn = gr.Button("Generate Mask", variant="primary")
-                    reset_btn = gr.Button("Reset Annotations")
     with gr.Row():
-        track_btn = gr.Button("Start Tracking", variant="primary", scale=1)
     with gr.Row():
-        # Added height limit to video output
         video_output = gr.Video(
-            label="Tracking Output",
-            autoplay=True,
             height=512
         )
-    # --- Events ---
     video_input.upload(
-        fn=lambda: "Processing video (MUSt3R + SAM2)...",
-        outputs=process_status
     ).then(
         fn=on_video_upload,
         inputs=[video_input, interval_slider],
         outputs=[img_display, app_state, frame_slider, img_display]
     ).then(
-        fn=lambda: "Ready to annotate.",
-        outputs=process_status
     )
     frame_slider.change(
         fn=on_slider_change,
         inputs=[app_state, frame_slider],
         outputs=[img_display]
     )
-    # 1. Click on image -> Draw point (no mask gen)
     img_display.select(
         fn=on_image_click,
         inputs=[app_state, mode_radio],
         outputs=[img_display]
     )
-    # 2. Click Generate -> Check box consistency & Gen Mask
     gen_mask_btn.click(
         fn=on_generate_mask_click,
         inputs=[app_state],
         outputs=[img_display]
     )
     reset_btn.click(
         fn=reset_annotations,
         inputs=[app_state],
         outputs=[img_display]
     )
     track_btn.click(
         fn=lambda: "Tracking in progress...",
         outputs=process_status
@@ -558,6 +682,7 @@ with gr.Blocks(title="3AM: 3egment Anything") as app:
         outputs=process_status
     )
 if __name__ == "__main__":
     logger.info("Starting Gradio app...")
     app.launch()

 import subprocess
 import sys, os
 from pathlib import Path
+import math
 ''' loading modules '''
 ROOT = Path(__file__).resolve().parent
     if not frames:
         logger.warning("No frames to create video.")
         return None
+    fps = float(fps)
+    if not (fps > 0.0):
+        fps = 24.0
     h, w = np.array(frames[0]).shape[:2]
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, fps, (w, h))
 # --- GPU Wrapped Functions ---
+def estimate_video_fps(video_path: str) -> float:
+    cap = cv2.VideoCapture(video_path)
+    fps = float(cap.get(cv2.CAP_PROP_FPS)) or 0.0
+    cap.release()
+    # Robust fallback if metadata is missing
+    return fps if fps > 0.0 else 24.0
+MAX_GPU_SECONDS = 600  # e.g., 10 minutes
+def clamp_duration(sec: int) -> int:
+    return int(min(MAX_GPU_SECONDS, max(1, sec)))
+def estimate_total_frames(video_path: str) -> int:
+    cap = cv2.VideoCapture(video_path)
+    n = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) or 0
+    cap.release()
+    return max(1, n)
+def get_duration_must3r_features(video_path, interval):
+    # interval is applied to the entire pipeline, so actual processed frames ~= ceil(total / interval)
+    total = estimate_total_frames(video_path)
+    interval = max(1, int(interval))
+    processed = math.ceil(total / interval)
+    # Tune this coefficient based on your observed runtime on ZeroGPU
+    sec_per_frame = 2
+    return clamp_duration(int(processed * sec_per_frame))
+@spaces.GPU(duration=get_duration_must3r_features)
 def process_video_and_features(video_path, interval):
     """Load video, subsample frames, get views, MUSt3R features, SAM2 inputs."""
     logger.info(f"Starting GPU process: Video feature extraction (Interval: {interval})")
         logger.error(f"Error during mask generation: {e}")
         raise e
+def get_duration_tracking(sam2_input_images, must3r_feats, must3r_outputs, start_idx, first_frame_mask):
+    # sam2_input_images is already subsampled, so this is the true number of frames to track
+    try:
+        n = int(getattr(sam2_input_images, "shape")[0])
+    except Exception:
+        n = 100  # fallback if something unexpected is passed
+    sec_per_frame = 2
+    return clamp_duration(int(n * sec_per_frame))
+@spaces.GPU(duration=get_duration_tracking)
 def run_tracking(sam2_input_images, must3r_feats, must3r_outputs, start_idx, first_frame_mask):
     """Track the mask across the video."""
     logger.info(f"Starting tracking from frame index {start_idx}...")
         logger.error(f"Failed to process video: {e}")
         raise gr.Error(f"Processing failed: {str(e)}")
+    fps_in = estimate_video_fps(video_path)
+    interval_i = max(1, int(interval))
+    fps_out = max(1.0, fps_in / interval_i)
     # Initialize state
     state = {
         "pil_imgs": pil_imgs,
         "current_points": [],
         "current_labels": [],
         "current_mask": None,
+        "frame_idx": 0,
+        "video_path": video_path,
+        "interval": interval_i,
+        "fps_in": fps_in,
+        "fps_out": fps_out
     }
     first_frame = pil_imgs[0]
             first_frame_mask
         )
+        output_path = create_video_from_masks(
+            state["pil_imgs"],
+            tracked_masks_dict,
+            fps=state.get("fps_out", 24.0),
+        )
         return output_path
     except Exception as e:
         logger.error(f"Tracking failed in UI callback: {e}")
     <p>Upload a video, geometric features are extracted automatically. Select a frame, click to annotate objects, and track them in 3D-consistent space.</p>
 </div>
 """
 with gr.Blocks(title="3AM: 3egment Anything") as app:
     gr.HTML(description)
+    gr.Markdown(
+        """
+# 3AM: 3egment Anything
+**Workflow**
+1) Upload video
+2) Adjust frame interval → Load frames
+3) Annotate & generate mask
+4) Track through the video
+"""
+    )
     app_state = gr.State()
     with gr.Row():
         with gr.Column(scale=1):
+            gr.Markdown("## Step 1 — Upload video")
+            video_input = gr.Video(
+                label="Upload Video",
+                sources=["upload"],
+                height=512
+            )
+            gr.Markdown("## Step 2 — Set interval, then load frames")
+            interval_slider = gr.Slider(
+                label="Frame Interval",
+                minimum=1,
+                maximum=30,
+                step=1,
+                value=1,
+                info="Default ≈ total_frames / 100"
+            )
+            load_btn = gr.Button(
+                "Load Frames",
+                variant="primary"
+            )
+            process_status = gr.Textbox(
+                label="Status",
+                value="1) Upload a video.",
+                interactive=False
+            )
         with gr.Column(scale=2):
+            gr.Markdown("## Step 3 — Annotate frame & generate mask")
             img_display = gr.Image(
+                label="Annotate Frame",
+                interactive=True,
                 height=512
             )
+            frame_slider = gr.Slider(
+                label="Select Frame",
+                minimum=0,
+                maximum=100,
+                step=1,
+                value=0
+            )
             with gr.Row():
                 mode_radio = gr.Radio(
+                    choices=[
+                        "Positive Point",
+                        "Negative Point",
+                        "Box Top-Left",
+                        "Box Bottom-Right",
+                    ],
                     value="Positive Point",
                     label="Annotation Mode"
                 )
                 with gr.Column():
+                    gen_mask_btn = gr.Button(
+                        "Generate Mask",
+                        variant="primary",
+                        interactive=False
+                    )
+                    reset_btn = gr.Button(
+                        "Reset Annotations",
+                        interactive=False
+                    )
+    gr.Markdown("## Step 4 — Track through the video")
     with gr.Row():
+        track_btn = gr.Button(
+            "Start Tracking",
+            variant="primary",
+            scale=1,
+            interactive=False
+        )
     with gr.Row():
         video_output = gr.Video(
+            label="Tracking Output",
+            autoplay=True,
             height=512
         )
+    # ------------------------------------------------
+    # Events
+    # ------------------------------------------------
+    # Upload: only read metadata & set default interval
+    def on_video_uploaded(video_path):
+        n_frames = estimate_total_frames(video_path)
+        default_interval = max(1, n_frames // 100)
+        return (
+            gr.update(value=default_interval, maximum=min(30, n_frames)),
+            f"Video uploaded ({n_frames} frames). "
+            "2) Adjust interval, then click 'Load Frames'."
+        )
     video_input.upload(
+        fn=on_video_uploaded,
+        inputs=video_input,
+        outputs=[interval_slider, process_status]
+    )
+    # Load frames: heavy compute happens here
+    load_btn.click(
+        fn=lambda: (
+            "Loading frames...",
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+            gr.update(interactive=False),
+        ),
+        outputs=[process_status, gen_mask_btn, reset_btn, track_btn]
     ).then(
         fn=on_video_upload,
         inputs=[video_input, interval_slider],
         outputs=[img_display, app_state, frame_slider, img_display]
     ).then(
+        fn=lambda: (
+            "Ready. 3) Annotate and generate mask.",
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+            gr.update(interactive=True),
+        ),
+        outputs=[process_status, gen_mask_btn, reset_btn, track_btn]
     )
     frame_slider.change(
         fn=on_slider_change,
         inputs=[app_state, frame_slider],
         outputs=[img_display]
     )
     img_display.select(
         fn=on_image_click,
         inputs=[app_state, mode_radio],
         outputs=[img_display]
     )
     gen_mask_btn.click(
         fn=on_generate_mask_click,
         inputs=[app_state],
         outputs=[img_display]
     )
     reset_btn.click(
         fn=reset_annotations,
         inputs=[app_state],
         outputs=[img_display]
     )
     track_btn.click(
         fn=lambda: "Tracking in progress...",
         outputs=process_status
         outputs=process_status
     )
 if __name__ == "__main__":
     logger.info("Starting Gradio app...")
     app.launch()