app update
Browse files
app.py
CHANGED
|
@@ -68,34 +68,53 @@ def initialize_models():
|
|
| 68 |
print("All models initialized successfully!")
|
| 69 |
|
| 70 |
|
| 71 |
-
def extract_frames_from_video(video_path, max_frames=
|
| 72 |
"""
|
| 73 |
Extract frames from video file
|
| 74 |
|
| 75 |
Args:
|
| 76 |
video_path: Path to video file
|
| 77 |
-
max_frames: Maximum number of frames to extract
|
| 78 |
|
| 79 |
Returns:
|
| 80 |
frames: List of numpy arrays (H,W,3), uint8 RGB
|
| 81 |
-
|
| 82 |
"""
|
| 83 |
cap = cv2.VideoCapture(video_path)
|
| 84 |
-
|
|
|
|
| 85 |
|
| 86 |
-
frames
|
| 87 |
-
|
|
|
|
| 88 |
ret, frame = cap.read()
|
| 89 |
if not ret:
|
| 90 |
break
|
| 91 |
# Convert BGR to RGB
|
| 92 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 93 |
-
|
| 94 |
|
| 95 |
cap.release()
|
| 96 |
-
print(f"Extracted {len(frames)} frames from video (FPS: {fps})")
|
| 97 |
|
| 98 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 99 |
|
| 100 |
|
| 101 |
def get_prompt(click_state, click_input):
|
|
@@ -123,17 +142,21 @@ def get_prompt(click_state, click_input):
|
|
| 123 |
return click_state
|
| 124 |
|
| 125 |
|
| 126 |
-
def load_video(video_input, video_state):
|
| 127 |
"""
|
| 128 |
Load video and extract first frame for mask generation
|
| 129 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
if video_input is None:
|
| 131 |
return video_state, None, \
|
| 132 |
gr.update(visible=False), gr.update(visible=False), \
|
| 133 |
gr.update(visible=False), gr.update(visible=False)
|
| 134 |
|
| 135 |
-
# Extract frames
|
| 136 |
-
frames, fps = extract_frames_from_video(video_input, max_frames=
|
| 137 |
|
| 138 |
if len(frames) == 0:
|
| 139 |
return video_state, None, \
|
|
@@ -359,6 +382,9 @@ def run_videomama_with_sam2(video_state, click_state):
|
|
| 359 |
|
| 360 |
status_msg = f"✓ Complete! Generated {len(output_frames)} frames."
|
| 361 |
|
|
|
|
|
|
|
|
|
|
| 362 |
return video_state, str(output_video_path), str(mask_video_path), str(greenscreen_path), status_msg
|
| 363 |
|
| 364 |
|
|
@@ -382,6 +408,42 @@ def save_video(frames, output_path, fps):
|
|
| 382 |
print(f"Saved video to {output_path}")
|
| 383 |
|
| 384 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 385 |
def restart():
|
| 386 |
"""Reset all states"""
|
| 387 |
return None, [[], []], None, \
|
|
@@ -431,6 +493,14 @@ with gr.Blocks(title="VideoMaMa Demo") as demo:
|
|
| 431 |
with gr.Column(scale=1):
|
| 432 |
gr.Markdown("### Step 1: Upload Video")
|
| 433 |
video_input = gr.Video(label="Input Video")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 434 |
load_button = gr.Button("📁 Load Video", variant="primary")
|
| 435 |
|
| 436 |
gr.Markdown("### Step 2: Mark Object")
|
|
@@ -466,7 +536,7 @@ with gr.Blocks(title="VideoMaMa Demo") as demo:
|
|
| 466 |
# Event handlers
|
| 467 |
load_button.click(
|
| 468 |
fn=load_video,
|
| 469 |
-
inputs=[video_input, video_state],
|
| 470 |
outputs=[video_state, first_frame_display,
|
| 471 |
point_prompt, clear_button, run_button, status_text]
|
| 472 |
)
|
|
@@ -511,6 +581,9 @@ if __name__ == "__main__":
|
|
| 511 |
print("VideoMaMa Interactive Demo")
|
| 512 |
print("=" * 60)
|
| 513 |
|
|
|
|
|
|
|
|
|
|
| 514 |
# Models will be initialized on first use (lazy loading for ZeroGPU)
|
| 515 |
# initialize_models()
|
| 516 |
|
|
|
|
| 68 |
print("All models initialized successfully!")
|
| 69 |
|
| 70 |
|
| 71 |
+
def extract_frames_from_video(video_path, max_frames=24):
|
| 72 |
"""
|
| 73 |
Extract frames from video file
|
| 74 |
|
| 75 |
Args:
|
| 76 |
video_path: Path to video file
|
| 77 |
+
max_frames: Maximum number of frames to extract (default: 24)
|
| 78 |
|
| 79 |
Returns:
|
| 80 |
frames: List of numpy arrays (H,W,3), uint8 RGB
|
| 81 |
+
adjusted_fps: Adjusted FPS for output video to maintain normal playback speed
|
| 82 |
"""
|
| 83 |
cap = cv2.VideoCapture(video_path)
|
| 84 |
+
original_fps = cap.get(cv2.CAP_PROP_FPS)
|
| 85 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 86 |
|
| 87 |
+
# Read all frames first
|
| 88 |
+
all_frames = []
|
| 89 |
+
while cap.isOpened():
|
| 90 |
ret, frame = cap.read()
|
| 91 |
if not ret:
|
| 92 |
break
|
| 93 |
# Convert BGR to RGB
|
| 94 |
frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
| 95 |
+
all_frames.append(frame_rgb)
|
| 96 |
|
| 97 |
cap.release()
|
|
|
|
| 98 |
|
| 99 |
+
# If video has more frames than max_frames, randomly sample
|
| 100 |
+
if len(all_frames) > max_frames:
|
| 101 |
+
print(f"Video has {len(all_frames)} frames, randomly sampling {max_frames} frames...")
|
| 102 |
+
# Sort indices to maintain temporal order
|
| 103 |
+
sampled_indices = sorted(np.random.choice(len(all_frames), max_frames, replace=False))
|
| 104 |
+
frames = [all_frames[i] for i in sampled_indices]
|
| 105 |
+
print(f"Sampled frame indices: {sampled_indices}")
|
| 106 |
+
|
| 107 |
+
# Adjust FPS to maintain normal playback speed
|
| 108 |
+
# If we sampled N frames from M total frames, adjust FPS proportionally
|
| 109 |
+
adjusted_fps = original_fps * (len(frames) / len(all_frames))
|
| 110 |
+
else:
|
| 111 |
+
frames = all_frames
|
| 112 |
+
adjusted_fps = original_fps
|
| 113 |
+
print(f"Video has {len(frames)} frames (≤ {max_frames}), using all frames")
|
| 114 |
+
|
| 115 |
+
print(f"Using {len(frames)} frames from video (Original FPS: {original_fps:.2f}, Adjusted FPS: {adjusted_fps:.2f})")
|
| 116 |
+
|
| 117 |
+
return frames, adjusted_fps
|
| 118 |
|
| 119 |
|
| 120 |
def get_prompt(click_state, click_input):
|
|
|
|
| 142 |
return click_state
|
| 143 |
|
| 144 |
|
| 145 |
+
def load_video(video_input, video_state, num_frames):
|
| 146 |
"""
|
| 147 |
Load video and extract first frame for mask generation
|
| 148 |
"""
|
| 149 |
+
# Clean up old output files if they exist
|
| 150 |
+
if video_state is not None and "output_paths" in video_state:
|
| 151 |
+
cleanup_old_videos(video_state["output_paths"])
|
| 152 |
+
|
| 153 |
if video_input is None:
|
| 154 |
return video_state, None, \
|
| 155 |
gr.update(visible=False), gr.update(visible=False), \
|
| 156 |
gr.update(visible=False), gr.update(visible=False)
|
| 157 |
|
| 158 |
+
# Extract frames with user-specified number
|
| 159 |
+
frames, fps = extract_frames_from_video(video_input, max_frames=num_frames)
|
| 160 |
|
| 161 |
if len(frames) == 0:
|
| 162 |
return video_state, None, \
|
|
|
|
| 382 |
|
| 383 |
status_msg = f"✓ Complete! Generated {len(output_frames)} frames."
|
| 384 |
|
| 385 |
+
# Store paths for cleanup later
|
| 386 |
+
video_state["output_paths"] = [str(output_video_path), str(mask_video_path), str(greenscreen_path)]
|
| 387 |
+
|
| 388 |
return video_state, str(output_video_path), str(mask_video_path), str(greenscreen_path), status_msg
|
| 389 |
|
| 390 |
|
|
|
|
| 408 |
print(f"Saved video to {output_path}")
|
| 409 |
|
| 410 |
|
| 411 |
+
def cleanup_old_videos(video_paths):
|
| 412 |
+
"""Remove old output videos to save storage space"""
|
| 413 |
+
if video_paths is None:
|
| 414 |
+
return
|
| 415 |
+
|
| 416 |
+
for path in video_paths:
|
| 417 |
+
try:
|
| 418 |
+
if os.path.exists(path):
|
| 419 |
+
os.remove(path)
|
| 420 |
+
print(f"Cleaned up: {path}")
|
| 421 |
+
except Exception as e:
|
| 422 |
+
print(f"Failed to remove {path}: {e}")
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
def cleanup_old_outputs(max_age_minutes=30):
|
| 426 |
+
"""
|
| 427 |
+
Remove output files older than max_age_minutes to prevent storage overflow
|
| 428 |
+
This runs periodically to clean up abandoned files
|
| 429 |
+
"""
|
| 430 |
+
output_dir = Path("outputs")
|
| 431 |
+
if not output_dir.exists():
|
| 432 |
+
return
|
| 433 |
+
|
| 434 |
+
current_time = time.time()
|
| 435 |
+
max_age_seconds = max_age_minutes * 60
|
| 436 |
+
|
| 437 |
+
for file_path in output_dir.glob("*.mp4"):
|
| 438 |
+
try:
|
| 439 |
+
file_age = current_time - file_path.stat().st_mtime
|
| 440 |
+
if file_age > max_age_seconds:
|
| 441 |
+
file_path.unlink()
|
| 442 |
+
print(f"Cleaned up old file: {file_path} (age: {file_age/60:.1f} minutes)")
|
| 443 |
+
except Exception as e:
|
| 444 |
+
print(f"Failed to clean up {file_path}: {e}")
|
| 445 |
+
|
| 446 |
+
|
| 447 |
def restart():
|
| 448 |
"""Reset all states"""
|
| 449 |
return None, [[], []], None, \
|
|
|
|
| 493 |
with gr.Column(scale=1):
|
| 494 |
gr.Markdown("### Step 1: Upload Video")
|
| 495 |
video_input = gr.Video(label="Input Video")
|
| 496 |
+
num_frames_slider = gr.Slider(
|
| 497 |
+
minimum=1,
|
| 498 |
+
maximum=50,
|
| 499 |
+
value=24,
|
| 500 |
+
step=1,
|
| 501 |
+
label="Number of Frames",
|
| 502 |
+
info="More frames = better quality but slower processing"
|
| 503 |
+
)
|
| 504 |
load_button = gr.Button("📁 Load Video", variant="primary")
|
| 505 |
|
| 506 |
gr.Markdown("### Step 2: Mark Object")
|
|
|
|
| 536 |
# Event handlers
|
| 537 |
load_button.click(
|
| 538 |
fn=load_video,
|
| 539 |
+
inputs=[video_input, video_state, num_frames_slider],
|
| 540 |
outputs=[video_state, first_frame_display,
|
| 541 |
point_prompt, clear_button, run_button, status_text]
|
| 542 |
)
|
|
|
|
| 581 |
print("VideoMaMa Interactive Demo")
|
| 582 |
print("=" * 60)
|
| 583 |
|
| 584 |
+
# Clean up old output files on startup
|
| 585 |
+
cleanup_old_outputs(max_age_minutes=30)
|
| 586 |
+
|
| 587 |
# Models will be initialized on first use (lazy loading for ZeroGPU)
|
| 588 |
# initialize_models()
|
| 589 |
|