import gradio as gr
import numpy as np
from PIL import Image

def create_enhanced_ui():
    with gr.Blocks() as demo:
        gr.Markdown("# VideoMaMa - Enhanced Segmentation")
        
        with gr.Row():
            with gr.Column():
                video_input = gr.Video(label="Upload Video")
                
                # Segmentation method selector
                seg_method = gr.Radio(
                    ["Click Points", "Brush/Draw", "Text Prompt"],
                    label="Segmentation Method",
                    value="Click Points"
                )
                
                # Text prompt input (shown when Text Prompt selected)
                text_prompt = gr.Textbox(
                    label="Text Prompt",
                    placeholder="e.g., 'person', 'piano', 'cat'",
                    visible=False
                )
                
                # Image editor with multiple tools
                image_editor = gr.Image(
                    label="Select/Draw Object",
                    tool="sketch",  # Brush tool
                    brush_radius=15,
                    brush_color="#FF0000"
                )
                
                process_btn = gr.Button("Process Video", variant="primary")
            
            with gr.Column():
                output_video = gr.Video(label="Result")
                mask_preview = gr.Image(label="Mask Preview")
        
        # Toggle text input visibility based on method
        def update_visibility(method):
            return gr.update(visible=(method == "Text Prompt"))
        
        seg_method.change(
            update_visibility, 
            inputs=[seg_method], 
            outputs=[text_prompt]
        )
        
        process_btn.click(
            process_video_enhanced,
            inputs=[video_input, seg_method, text_prompt, image_editor],
            outputs=[output_video, mask_preview]
        )
    
    return demo

def process_video_enhanced(video, method, text_prompt, image_data):
    if method == "Text Prompt":
        # Use Grounding DINO + SAM2
        points = text_to_points(text_prompt, video)
    elif method == "Brush/Draw":
        # Use drawn mask directly
        mask = image_data_to_mask(image_data)
    else:
        # Use click points (original method)
        points = extract_points_from_clicks(image_data)
    
    # Process with VideoMaMa (existing pipeline)
    return videomama_pipeline.process(video, points)