import gradio as gr import numpy as np from PIL import Image def create_enhanced_ui(): with gr.Blocks() as demo: gr.Markdown("# VideoMaMa - Enhanced Segmentation") with gr.Row(): with gr.Column(): video_input = gr.Video(label="Upload Video") # Segmentation method selector seg_method = gr.Radio( ["Click Points", "Brush/Draw", "Text Prompt"], label="Segmentation Method", value="Click Points" ) # Text prompt input (shown when Text Prompt selected) text_prompt = gr.Textbox( label="Text Prompt", placeholder="e.g., 'person', 'piano', 'cat'", visible=False ) # Image editor with multiple tools image_editor = gr.Image( label="Select/Draw Object", tool="sketch", # Brush tool brush_radius=15, brush_color="#FF0000" ) process_btn = gr.Button("Process Video", variant="primary") with gr.Column(): output_video = gr.Video(label="Result") mask_preview = gr.Image(label="Mask Preview") # Toggle text input visibility based on method def update_visibility(method): return gr.update(visible=(method == "Text Prompt")) seg_method.change( update_visibility, inputs=[seg_method], outputs=[text_prompt] ) process_btn.click( process_video_enhanced, inputs=[video_input, seg_method, text_prompt, image_editor], outputs=[output_video, mask_preview] ) return demo def process_video_enhanced(video, method, text_prompt, image_data): if method == "Text Prompt": # Use Grounding DINO + SAM2 points = text_to_points(text_prompt, video) elif method == "Brush/Draw": # Use drawn mask directly mask = image_data_to_mask(image_data) else: # Use click points (original method) points = extract_points_from_clicks(image_data) # Process with VideoMaMa (existing pipeline) return videomama_pipeline.process(video, points)