Update frame interval handling and frame_per slider to reflect exported image interval in video processing
Browse files
app.py
CHANGED
|
@@ -38,6 +38,23 @@ def clean(Seg_Tracker):
|
|
| 38 |
torch.cuda.empty_cache()
|
| 39 |
return None, ({}, {}), None, None, 0, None, None, None, 0
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
def get_meta_from_video(Seg_Tracker, input_video, scale_slider, checkpoint):
|
| 42 |
|
| 43 |
output_dir = '/tmp/output_frames'
|
|
@@ -49,10 +66,10 @@ def get_meta_from_video(Seg_Tracker, input_video, scale_slider, checkpoint):
|
|
| 49 |
if input_video is None:
|
| 50 |
return None, ({}, {}), None, None, 0, None, None, None, 0
|
| 51 |
cap = cv2.VideoCapture(input_video)
|
|
|
|
| 52 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 53 |
cap.release()
|
| 54 |
-
|
| 55 |
-
frame_interval = max(1, total_frames // output_frames)
|
| 56 |
print(f"frame_interval: {frame_interval}")
|
| 57 |
try:
|
| 58 |
ffmpeg.input(input_video, hwaccel='cuda').output(
|
|
@@ -99,7 +116,11 @@ def get_meta_from_video(Seg_Tracker, input_video, scale_slider, checkpoint):
|
|
| 99 |
image_predictor = SAM2ImagePredictor(sam2_model)
|
| 100 |
inference_state = predictor.init_state(video_path=output_dir)
|
| 101 |
predictor.reset_state(inference_state)
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
def mask2bbox(mask):
|
| 105 |
if len(np.where(mask > 0)[0]) == 0:
|
|
@@ -142,7 +163,7 @@ def draw_rect(image, bbox, obj_id):
|
|
| 142 |
rgb_color = tuple(map(int, (color[:3] * 255).astype(np.uint8)))
|
| 143 |
inv_color = tuple(map(int, (255 - color[:3] * 255).astype(np.uint8)))
|
| 144 |
x0, y0, x1, y1 = bbox
|
| 145 |
-
image_with_rect = cv2.rectangle(image.copy(), (x0, y0), (x1, y1),
|
| 146 |
return image_with_rect
|
| 147 |
|
| 148 |
def sam_click(Seg_Tracker, frame_num, point_mode, click_stack, ann_obj_id, evt: gr.SelectData):
|
|
@@ -432,7 +453,7 @@ def seg_track_app():
|
|
| 432 |
with gr.Row():
|
| 433 |
checkpoint = gr.Dropdown(label="Model Size", choices=["tiny", "small", "base-plus", "large"], value="tiny")
|
| 434 |
scale_slider = gr.Slider(
|
| 435 |
-
label="Downsampe Frame Rate",
|
| 436 |
minimum=0.0,
|
| 437 |
maximum=1.0,
|
| 438 |
step=0.25,
|
|
@@ -464,7 +485,7 @@ def seg_track_app():
|
|
| 464 |
with gr.Row():
|
| 465 |
with gr.Column():
|
| 466 |
frame_per = gr.Slider(
|
| 467 |
-
label = "
|
| 468 |
minimum= 0.0,
|
| 469 |
maximum= 100.0,
|
| 470 |
step=0.01,
|
|
@@ -611,6 +632,12 @@ def seg_track_app():
|
|
| 611 |
Seg_Tracker, input_first_frame, drawing_board, last_draw
|
| 612 |
]
|
| 613 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 614 |
|
| 615 |
app.queue(concurrency_count=1)
|
| 616 |
app.launch(debug=True, enable_queue=True, share=False)
|
|
|
|
| 38 |
torch.cuda.empty_cache()
|
| 39 |
return None, ({}, {}), None, None, 0, None, None, None, 0
|
| 40 |
|
| 41 |
+
def change_video(input_video):
|
| 42 |
+
if input_video is None:
|
| 43 |
+
return 0, 0
|
| 44 |
+
cap = cv2.VideoCapture(input_video)
|
| 45 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 46 |
+
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 47 |
+
cap.release()
|
| 48 |
+
scale_slider = gr.Slider.update(minimum=1.0,
|
| 49 |
+
maximum=fps,
|
| 50 |
+
step=1.0,
|
| 51 |
+
value=fps,)
|
| 52 |
+
frame_per = gr.Slider.update(minimum= 0.0,
|
| 53 |
+
maximum= total_frames / fps,
|
| 54 |
+
step=1.0/fps,
|
| 55 |
+
value=0.0,)
|
| 56 |
+
return scale_slider, frame_per
|
| 57 |
+
|
| 58 |
def get_meta_from_video(Seg_Tracker, input_video, scale_slider, checkpoint):
|
| 59 |
|
| 60 |
output_dir = '/tmp/output_frames'
|
|
|
|
| 66 |
if input_video is None:
|
| 67 |
return None, ({}, {}), None, None, 0, None, None, None, 0
|
| 68 |
cap = cv2.VideoCapture(input_video)
|
| 69 |
+
fps = cap.get(cv2.CAP_PROP_FPS)
|
| 70 |
total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 71 |
cap.release()
|
| 72 |
+
frame_interval = max(1, int(fps // scale_slider))
|
|
|
|
| 73 |
print(f"frame_interval: {frame_interval}")
|
| 74 |
try:
|
| 75 |
ffmpeg.input(input_video, hwaccel='cuda').output(
|
|
|
|
| 116 |
image_predictor = SAM2ImagePredictor(sam2_model)
|
| 117 |
inference_state = predictor.init_state(video_path=output_dir)
|
| 118 |
predictor.reset_state(inference_state)
|
| 119 |
+
frame_per = gr.Slider.update(minimum= 0.0,
|
| 120 |
+
maximum= total_frames / fps,
|
| 121 |
+
step=frame_interval / fps,
|
| 122 |
+
value=0.0,)
|
| 123 |
+
return (predictor, inference_state, image_predictor), ({}, {}), first_frame_rgb, first_frame_rgb, frame_per, None, None, None, 0
|
| 124 |
|
| 125 |
def mask2bbox(mask):
|
| 126 |
if len(np.where(mask > 0)[0]) == 0:
|
|
|
|
| 163 |
rgb_color = tuple(map(int, (color[:3] * 255).astype(np.uint8)))
|
| 164 |
inv_color = tuple(map(int, (255 - color[:3] * 255).astype(np.uint8)))
|
| 165 |
x0, y0, x1, y1 = bbox
|
| 166 |
+
image_with_rect = cv2.rectangle(image.copy(), (x0, y0), (x1, y1), rgb_color, thickness=2)
|
| 167 |
return image_with_rect
|
| 168 |
|
| 169 |
def sam_click(Seg_Tracker, frame_num, point_mode, click_stack, ann_obj_id, evt: gr.SelectData):
|
|
|
|
| 453 |
with gr.Row():
|
| 454 |
checkpoint = gr.Dropdown(label="Model Size", choices=["tiny", "small", "base-plus", "large"], value="tiny")
|
| 455 |
scale_slider = gr.Slider(
|
| 456 |
+
label="Downsampe Frame Rate (fps)",
|
| 457 |
minimum=0.0,
|
| 458 |
maximum=1.0,
|
| 459 |
step=0.25,
|
|
|
|
| 485 |
with gr.Row():
|
| 486 |
with gr.Column():
|
| 487 |
frame_per = gr.Slider(
|
| 488 |
+
label = "Time (seconds)",
|
| 489 |
minimum= 0.0,
|
| 490 |
maximum= 100.0,
|
| 491 |
step=0.01,
|
|
|
|
| 632 |
Seg_Tracker, input_first_frame, drawing_board, last_draw
|
| 633 |
]
|
| 634 |
)
|
| 635 |
+
|
| 636 |
+
input_video.change(
|
| 637 |
+
fn=change_video,
|
| 638 |
+
inputs=[input_video],
|
| 639 |
+
outputs=[scale_slider, frame_per]
|
| 640 |
+
)
|
| 641 |
|
| 642 |
app.queue(concurrency_count=1)
|
| 643 |
app.launch(debug=True, enable_queue=True, share=False)
|