SAM2-Video-Predictor

Sleeping

File size: 3,926 Bytes

import os
import gradio as gr
import numpy as np
from PIL import Image
import cv2
import spaces

from inference.seg import process_image_or_video
from config import SAPIENS_LITE_MODELS_PATH

def update_model_choices(task):
    model_choices = list(SAPIENS_LITE_MODELS_PATH[task.lower()].keys())
    return gr.Dropdown(choices=model_choices, value=model_choices[0] if model_choices else None)

@spaces.GPU(duration=120)
def process_image(input_image, task, version):
    if isinstance(input_image, np.ndarray):
        input_image = Image.fromarray(input_image)
    
    result = process_image_or_video(input_image, task=task.lower(), version=version)
    
    return result

def process_video(input_video, task, version):
    cap = cv2.VideoCapture(input_video)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    output_video = cv2.VideoWriter('output_video.mp4', cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        processed_frame = process_image_or_video(frame_rgb, task=task.lower(), version=version)
        
        if processed_frame is not None:
            processed_frame_bgr = cv2.cvtColor(np.array(processed_frame), cv2.COLOR_RGB2BGR)
            output_video.write(processed_frame_bgr)
    
    cap.release()
    output_video.release()
    
    return 'output_video.mp4'

with gr.Blocks() as demo:
    gr.Markdown("# Sapiens Arena 🤸🏽‍♂️ - WIP devmode")
    with gr.Tabs():
        with gr.TabItem('Image'):
            with gr.Row():
                with gr.Column():
                    input_image = gr.Image(label="Input Image", type="pil")
                    select_task_image = gr.Radio(
                        ["seg", "pose", "depth", "normal"], 
                        label="Task", 
                        info="Choose the task to perform",
                        value="seg"
                    )
                    model_name_image = gr.Dropdown(
                        label="Model Version",
                        choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
                        value="sapiens_0.3b",
                    )
                with gr.Column():
                    result_image = gr.Image(label="Result")
                    run_button_image = gr.Button("Run")
        
        with gr.TabItem('Video'):
            with gr.Row():
                with gr.Column():
                    input_video = gr.Video(label="Input Video")
                    select_task_video = gr.Radio(
                        ["seg", "pose", "depth", "normal"], 
                        label="Task", 
                        info="Choose the task to perform",
                        value="seg"
                    )
                    model_name_video = gr.Dropdown(
                        label="Model Version",
                        choices=list(SAPIENS_LITE_MODELS_PATH["seg"].keys()),
                        value="sapiens_0.3b",
                    )
                with gr.Column():
                    result_video = gr.Video(label="Result")
                    run_button_video = gr.Button("Run")

    select_task_image.change(fn=update_model_choices, inputs=select_task_image, outputs=model_name_image)
    select_task_video.change(fn=update_model_choices, inputs=select_task_video, outputs=model_name_video)

    run_button_image.click(
        fn=process_image,
        inputs=[input_image, select_task_image, model_name_image],
        outputs=[result_image],
    )

    run_button_video.click(
        fn=process_video,
        inputs=[input_video, select_task_video, model_name_video],
        outputs=[result_video],
    )

if __name__ == "__main__":
    demo.launch(share=True)