| import gradio as gr |
| from loadimg import load_img |
| import spaces |
| from transformers import AutoModelForImageSegmentation |
| import torch |
| from torchvision import transforms |
| import moviepy.editor as mp |
| from pydub import AudioSegment |
| from PIL import Image |
| import numpy as np |
| import os |
| import tempfile |
| import uuid |
|
|
| torch.set_float32_matmul_precision(["high", "highest"][0]) |
|
|
| birefnet = AutoModelForImageSegmentation.from_pretrained( |
| "ZhengPeng7/BiRefNet", trust_remote_code=True |
| ) |
| birefnet.to("cuda") |
| transform_image = transforms.Compose( |
| [ |
| transforms.Resize((1024, 1024)), |
| transforms.ToTensor(), |
| transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), |
| ] |
| ) |
|
|
|
|
| @spaces.GPU |
| def fn(vid, fps, color): |
| |
| video = mp.VideoFileClip(vid) |
|
|
| |
| audio = video.audio |
|
|
| |
| frames = video.iter_frames(fps=fps) |
|
|
| |
| processed_frames = [] |
| for frame in frames: |
| pil_image = Image.fromarray(frame) |
| processed_image = process(pil_image, color) |
| processed_frames.append(np.array(processed_image)) |
|
|
| |
| processed_video = mp.ImageSequenceClip(processed_frames, fps=fps) |
|
|
| |
| processed_video = processed_video.set_audio(audio) |
|
|
| |
| temp_dir = "temp" |
| os.makedirs(temp_dir, exist_ok=True) |
| unique_filename = str(uuid.uuid4()) + ".mp4" |
| temp_filepath = os.path.join(temp_dir, unique_filename) |
| processed_video.write_videofile(temp_filepath, codec="libx264") |
|
|
| |
| return temp_filepath |
|
|
|
|
| def process(image, color_hex): |
| image_size = image.size |
| input_images = transform_image(image).unsqueeze(0).to("cuda") |
| |
| with torch.no_grad(): |
| preds = birefnet(input_images)[-1].sigmoid().cpu() |
| pred = preds[0].squeeze() |
| pred_pil = transforms.ToPILImage()(pred) |
| mask = pred_pil.resize(image_size) |
|
|
| |
| color_rgb = tuple(int(color_hex[i : i + 2], 16) for i in (1, 3, 5)) |
|
|
| |
| background = Image.new("RGBA", image_size, color_rgb + (255,)) |
|
|
| |
| image = Image.composite(image, background, mask) |
|
|
| return image |
|
|
|
|
| def process_file(f, color="#00FF00"): |
| name_path = f.rsplit(".", 1)[0] + ".png" |
| im = load_img(f, output_type="pil") |
| im = im.convert("RGB") |
| transparent = process(im, color) |
| transparent.save(name_path) |
| return name_path |
|
|
|
|
| with gr.Blocks() as demo: |
| in_video = gr.Video(label="birefnet") |
| out_video = gr.Video() |
| fps_slider = gr.Slider(minimum=1, maximum=60, step=1, value=12, label="Output FPS") |
| color_picker = gr.ColorPicker(label="Background Color", value="#00FF00") |
| submit_button = gr.Button("Process Video") |
|
|
| submit_button.click( |
| fn, inputs=[in_video, fps_slider, color_picker], outputs=out_video |
| ) |
|
|
| if __name__ == "__main__": |
| demo.launch(show_error=True) |