import gradio as gr from loadimg import load_img import spaces from transformers import AutoModelForImageSegmentation import torch from torchvision import transforms from pydub import AudioSegment from PIL import Image import numpy as np import os import tempfile import uuid import time from concurrent.futures import ThreadPoolExecutor from moviepy import VideoFileClip, vfx, concatenate_videoclips, ImageSequenceClip torch.set_float32_matmul_precision("medium") device = "cuda" if torch.cuda.is_available() else "cpu" # Load both BiRefNet models birefnet = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet", trust_remote_code=True) birefnet.to(device) birefnet_lite = AutoModelForImageSegmentation.from_pretrained("ZhengPeng7/BiRefNet_lite", trust_remote_code=True) birefnet_lite.to(device) # ═══════════════════════════════════════════════════════════ # 🔧 고퀄리티 설정 - 해상도 업그레이드 # ═══════════════════════════════════════════════════════════ HIGH_QUALITY_SIZE = 1024 # 768 → 1024로 업그레이드 transform_image = transforms.Compose([ transforms.Resize((HIGH_QUALITY_SIZE, HIGH_QUALITY_SIZE)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) # ═══════════════════════════════════════════════════════════ # 🔘 NEUMORPHISM CSS 스타일 # ═══════════════════════════════════════════════════════════ NEUMORPHISM_CSS = """ /* 🎨 핵심 색상 팔레트 */ :root { --neu-bg: #e0e5ec; --neu-shadow-dark: #a3b1c6; --neu-shadow-light: #ffffff; --neu-text: #4a5568; --neu-text-dark: #2d3748; --neu-accent: #667eea; --neu-accent-light: #7c91f0; --neu-success: #48bb78; --neu-warning: #ed8936; } /* 📦 전체 배경 */ body, .gradio-container { background: linear-gradient(145deg, #e2e8ec, #d8dde4) !important; min-height: 100vh; } .gradio-container { max-width: 1400px !important; margin: 0 auto !important; padding: 30px !important; } /* 🔲 메인 컨테이너 볼록 효과 */ .main, .contain { background: var(--neu-bg) !important; border-radius: 30px !important; box-shadow: 12px 12px 24px var(--neu-shadow-dark), -12px -12px 24px var(--neu-shadow-light) !important; padding: 25px !important; } /* 📝 타이틀 스타일 */ h1, .markdown h1 { color: var(--neu-text-dark) !important; text-shadow: 3px 3px 6px var(--neu-shadow-light), -2px -2px 4px rgba(0,0,0,0.08) !important; font-weight: 800 !important; letter-spacing: -0.5px !important; } h3, .markdown h3 { color: var(--neu-text) !important; font-weight: 600 !important; } /* 🎬 비디오/이미지 컴포넌트 오목 효과 */ .video-container, .image-container, [data-testid="video"], [data-testid="image"], .upload-container, .svelte-1uvlhfp { background: var(--neu-bg) !important; border-radius: 20px !important; box-shadow: inset 8px 8px 16px var(--neu-shadow-dark), inset -8px -8px 16px var(--neu-shadow-light) !important; border: none !important; padding: 15px !important; transition: all 0.3s ease !important; } /* 🔘 버튼 뉴모피즘 스타일 */ button, .gr-button, button.primary, button.secondary, .gr-button-primary, .gr-button-secondary { background: linear-gradient(145deg, #e8edf4, #d4d9e0) !important; border: none !important; border-radius: 50px !important; padding: 18px 45px !important; color: var(--neu-text-dark) !important; font-weight: 700 !important; font-size: 16px !important; box-shadow: 10px 10px 20px var(--neu-shadow-dark), -10px -10px 20px var(--neu-shadow-light) !important; transition: all 0.25s ease !important; cursor: pointer !important; } button:hover, .gr-button:hover { background: linear-gradient(145deg, #ecf1f8, #d8dde4) !important; box-shadow: 6px 6px 12px var(--neu-shadow-dark), -6px -6px 12px var(--neu-shadow-light) !important; transform: translateY(-2px) !important; } button:active, .gr-button:active { box-shadow: inset 6px 6px 12px var(--neu-shadow-dark), inset -6px -6px 12px var(--neu-shadow-light) !important; transform: translateY(0) !important; } /* 🎚️ 슬라이더 스타일 */ input[type="range"] { background: var(--neu-bg) !important; border-radius: 15px !important; box-shadow: inset 4px 4px 8px var(--neu-shadow-dark), inset -4px -4px 8px var(--neu-shadow-light) !important; height: 12px !important; } input[type="range"]::-webkit-slider-thumb { background: linear-gradient(145deg, #f0f5fa, #d4d9e0) !important; border-radius: 50% !important; width: 28px !important; height: 28px !important; box-shadow: 6px 6px 12px var(--neu-shadow-dark), -6px -6px 12px var(--neu-shadow-light) !important; cursor: pointer !important; } /* 🔘 라디오 버튼 & 체크박스 */ .gr-radio, .gr-checkbox, input[type="radio"], input[type="checkbox"] { background: var(--neu-bg) !important; border-radius: 12px !important; box-shadow: inset 4px 4px 8px var(--neu-shadow-dark), inset -4px -4px 8px var(--neu-shadow-light) !important; border: none !important; } .gr-radio-label, .gr-checkbox-label { color: var(--neu-text) !important; font-weight: 600 !important; } /* 라디오/체크박스 그룹 컨테이너 */ .gr-radio-group, .gr-checkbox-group, .radio-group, .checkbox-group { background: var(--neu-bg) !important; border-radius: 20px !important; padding: 15px 20px !important; box-shadow: 8px 8px 16px var(--neu-shadow-dark), -8px -8px 16px var(--neu-shadow-light) !important; } /* 🎨 컬러 피커 */ input[type="color"] { background: var(--neu-bg) !important; border-radius: 50% !important; width: 60px !important; height: 60px !important; box-shadow: 8px 8px 16px var(--neu-shadow-dark), -8px -8px 16px var(--neu-shadow-light) !important; border: none !important; cursor: pointer !important; padding: 8px !important; } /* 📊 Row 컨테이너 */ .gr-row, .row { background: transparent !important; gap: 25px !important; } /* 📝 텍스트박스 오목 효과 */ textarea, input[type="text"], .gr-textbox { background: var(--neu-bg) !important; border-radius: 15px !important; box-shadow: inset 6px 6px 12px var(--neu-shadow-dark), inset -6px -6px 12px var(--neu-shadow-light) !important; border: none !important; padding: 15px 20px !important; color: var(--neu-text-dark) !important; font-weight: 500 !important; } textarea:focus, input[type="text"]:focus { outline: none !important; box-shadow: inset 8px 8px 16px var(--neu-shadow-dark), inset -8px -8px 16px var(--neu-shadow-light), 0 0 0 3px rgba(102, 126, 234, 0.3) !important; } /* 🏷️ 레이블 스타일 */ label, .gr-label { color: var(--neu-text-dark) !important; font-weight: 700 !important; font-size: 14px !important; text-transform: uppercase !important; letter-spacing: 0.5px !important; margin-bottom: 10px !important; } /* 📦 블록 컨테이너 */ .gr-block, .block { background: var(--neu-bg) !important; border-radius: 25px !important; box-shadow: 10px 10px 20px var(--neu-shadow-dark), -10px -10px 20px var(--neu-shadow-light) !important; padding: 20px !important; margin: 15px 0 !important; } /* 🔲 패널 구분선 제거 */ .gr-panel, .panel { border: none !important; background: transparent !important; } /* ℹ️ 정보 텍스트 */ .gr-info, .info { color: var(--neu-text) !important; background: var(--neu-bg) !important; border-radius: 12px !important; padding: 12px 18px !important; box-shadow: inset 4px 4px 8px var(--neu-shadow-dark), inset -4px -4px 8px var(--neu-shadow-light) !important; } /* 🎯 예제 섹션 */ .gr-examples, .examples { background: var(--neu-bg) !important; border-radius: 20px !important; padding: 20px !important; box-shadow: 8px 8px 16px var(--neu-shadow-dark), -8px -8px 16px var(--neu-shadow-light) !important; } /* 🌟 호버 효과 강화 */ .gr-block:hover { box-shadow: 12px 12px 24px var(--neu-shadow-dark), -12px -12px 24px var(--neu-shadow-light) !important; } /* 📱 반응형 조정 */ @media (max-width: 768px) { .gradio-container { padding: 15px !important; } button, .gr-button { padding: 14px 30px !important; font-size: 14px !important; } .gr-block { border-radius: 18px !important; padding: 15px !important; } } /* ✨ 애니메이션 */ @keyframes neuPulse { 0%, 100% { box-shadow: 10px 10px 20px var(--neu-shadow-dark), -10px -10px 20px var(--neu-shadow-light); } 50% { box-shadow: 14px 14px 28px var(--neu-shadow-dark), -14px -14px 28px var(--neu-shadow-light); } } .processing { animation: neuPulse 1.5s ease-in-out infinite !important; } /* 🎨 프로그레스 표시 */ .progress-bar { background: var(--neu-bg) !important; border-radius: 10px !important; box-shadow: inset 4px 4px 8px var(--neu-shadow-dark), inset -4px -4px 8px var(--neu-shadow-light) !important; overflow: hidden !important; } .progress-bar-fill { background: linear-gradient(90deg, var(--neu-accent), var(--neu-accent-light)) !important; border-radius: 10px !important; } """ # Function to process a single frame def process_frame(frame, bg_type, bg, fast_mode, bg_frame_index, background_frames, color): try: pil_image = Image.fromarray(frame) if bg_type == "Color": processed_image = process(pil_image, color, fast_mode) elif bg_type == "Image": processed_image = process(pil_image, bg, fast_mode) elif bg_type == "Video": background_frame = background_frames[bg_frame_index] bg_frame_index += 1 background_image = Image.fromarray(background_frame) processed_image = process(pil_image, background_image, fast_mode) else: processed_image = pil_image return np.array(processed_image), bg_frame_index except Exception as e: print(f"Error processing frame: {e}") return frame, bg_frame_index @spaces.GPU def fn(vid, bg_type="Color", bg_image=None, bg_video=None, color="#00FF00", fps=0, video_handling="slow_down", fast_mode=False, max_workers=16): try: start_time = time.time() video = VideoFileClip(vid) if fps == 0: fps = video.fps audio = video.audio frames = list(video.iter_frames(fps=fps)) processed_frames = [] yield gr.update(visible=True), gr.update(visible=False), f"🚀 Processing started... Elapsed time: 0 seconds" if bg_type == "Video": background_video = VideoFileClip(bg_video) if background_video.duration < video.duration: if video_handling == "slow_down": background_video = background_video.fx(vfx.speedx, factor=video.duration / background_video.duration) else: background_video = concatenate_videoclips([background_video] * int(video.duration / background_video.duration + 1)) background_frames = list(background_video.iter_frames(fps=fps)) else: background_frames = None bg_frame_index = 0 with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [executor.submit(process_frame, frames[i], bg_type, bg_image, fast_mode, bg_frame_index + i, background_frames, color) for i in range(len(frames))] for i, future in enumerate(futures): result, _ = future.result() processed_frames.append(result) elapsed_time = time.time() - start_time progress_pct = ((i + 1) / len(frames)) * 100 yield result, None, f"⚡ Processing frame {i+1}/{len(frames)} ({progress_pct:.1f}%)... Elapsed: {elapsed_time:.2f}s" processed_video = ImageSequenceClip(processed_frames, fps=fps) processed_video = processed_video.with_audio(audio) with tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) as temp_file: temp_filepath = temp_file.name # 고퀄리티 비디오 인코딩 설정 processed_video.write_videofile( temp_filepath, codec="libx264", bitrate="8000k", # 비트레이트 증가 audio_codec="aac", audio_bitrate="192k", preset="slow", # 더 나은 압축 품질 ffmpeg_params=["-crf", "18"] # 고품질 CRF 값 ) elapsed_time = time.time() - start_time yield gr.update(visible=False), gr.update(visible=True), f"✅ Processing complete! Total time: {elapsed_time:.2f} seconds" yield processed_frames[-1], temp_filepath, f"✅ Processing complete! Total time: {elapsed_time:.2f} seconds" except Exception as e: print(f"Error: {e}") elapsed_time = time.time() - start_time yield gr.update(visible=False), gr.update(visible=True), f"❌ Error: {e}. Elapsed: {elapsed_time:.2f}s" yield None, f"Error processing video: {e}", f"❌ Error: {e}. Elapsed: {elapsed_time:.2f}s" def process(image, bg, fast_mode=False): image_size = image.size input_images = transform_image(image).unsqueeze(0).to(device) model = birefnet_lite if fast_mode else birefnet with torch.no_grad(): preds = model(input_images)[-1].sigmoid().cpu() pred = preds[0].squeeze() pred_pil = transforms.ToPILImage()(pred) mask = pred_pil.resize(image_size, Image.LANCZOS) # 고품질 리사이징 if isinstance(bg, str) and bg.startswith("#"): color_rgb = tuple(int(bg[i:i+2], 16) for i in (1, 3, 5)) background = Image.new("RGBA", image_size, color_rgb + (255,)) elif isinstance(bg, Image.Image): background = bg.convert("RGBA").resize(image_size, Image.LANCZOS) else: background = Image.open(bg).convert("RGBA").resize(image_size, Image.LANCZOS) image = Image.composite(image, background, mask) return image # ═══════════════════════════════════════════════════════════ # 🎨 GRADIO UI with Neumorphism # ═══════════════════════════════════════════════════════════ with gr.Blocks( css=NEUMORPHISM_CSS, title="🎬 Video Background Remover Pro", theme=gr.themes.Soft( primary_hue="slate", secondary_hue="blue", neutral_hue="slate", font=gr.themes.GoogleFont("Inter") ) ) as demo: gr.Markdown(""" # 🎬 Video Background Remover & Changer Pro ### ✨ AI-powered background replacement with high-quality processing **Features:** Color, Image, or Video backgrounds • High-resolution processing (1024px) • Fast & Quality modes """) with gr.Row(): in_video = gr.Video( label="📥 Input Video", interactive=True, height=400 ) stream_image = gr.Image( label="⚡ Live Preview", visible=False, height=400 ) out_video = gr.Video( label="📤 Output Video", height=400 ) submit_button = gr.Button( "🚀 Change Background", interactive=True, variant="primary", size="lg" ) with gr.Row(): with gr.Column(scale=1): bg_type = gr.Radio( ["Color", "Image", "Video"], label="🎨 Background Type", value="Color", interactive=True ) color_picker = gr.ColorPicker( label="🎨 Background Color", value="#00FF00", visible=True, interactive=True ) bg_image = gr.Image( label="🖼️ Background Image", type="filepath", visible=False, interactive=True ) bg_video = gr.Video( label="🎬 Background Video", visible=False, interactive=True ) with gr.Column(visible=False) as video_handling_options: video_handling_radio = gr.Radio( ["slow_down", "loop"], label="🔄 Video Sync Mode", value="slow_down", interactive=True ) with gr.Column(scale=1): gr.Markdown("### ⚙️ Quality Settings") fps_slider = gr.Slider( minimum=0, maximum=120, # 60 → 120 증가 step=1, value=0, label="🎞️ Output FPS (0 = Original)", interactive=True ) fast_mode_checkbox = gr.Checkbox( label="⚡ Fast Mode (BiRefNet_lite) - Uncheck for highest quality", value=False, # 기본값을 False로 변경 (고품질 모드) interactive=True ) max_workers_slider = gr.Slider( minimum=1, maximum=64, # 32 → 64 증가 step=1, value=16, # 10 → 16 증가 label="🔧 Parallel Workers", info="Higher = Faster (requires more VRAM)", interactive=True ) time_textbox = gr.Textbox( label="📊 Processing Status", interactive=False, placeholder="Status will appear here..." ) def update_visibility(bg_type): if bg_type == "Color": return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) elif bg_type == "Image": return gr.update(visible=False), gr.update(visible=True), gr.update(visible=False), gr.update(visible=False) elif bg_type == "Video": return gr.update(visible=False), gr.update(visible=False), gr.update(visible=True), gr.update(visible=True) else: return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False), gr.update(visible=False) bg_type.change( update_visibility, inputs=bg_type, outputs=[color_picker, bg_image, bg_video, video_handling_options] ) examples = gr.Examples( [ ["rickroll-2sec.mp4", "Video", None, "background.mp4"], ["rickroll-2sec.mp4", "Image", "images.webp", None], ["rickroll-2sec.mp4", "Color", None, None], ], inputs=[in_video, bg_type, bg_image, bg_video], outputs=[stream_image, out_video, time_textbox], fn=fn, cache_examples=True, cache_mode="eager", ) submit_button.click( fn, inputs=[in_video, bg_type, bg_image, bg_video, color_picker, fps_slider, video_handling_radio, fast_mode_checkbox, max_workers_slider], outputs=[stream_image, out_video, time_textbox], ) gr.Markdown(""" --- ### 📋 Tips for Best Results - **High Quality Mode**: Uncheck 'Fast Mode' for best edge detection - **4K Videos**: Use higher worker count (32-64) for faster processing - **Green Screen**: Use `#00FF00` for classic chroma key compatibility """) if __name__ == "__main__": demo.launch(show_error=True)