import gradio as gr
import torch
import numpy as np
from PIL import Image
try:
    from spaces import GPU
except ImportError:
    # Define a no-op decorator if running locally
    def GPU(func):
        return func
    
import os
import argparse
from inference import GenerativeInferenceModel, get_inference_configs

# Parse command line arguments
parser = argparse.ArgumentParser(description='Run Generative Inference Demo')
parser.add_argument('--port', type=int, default=7860, help='Port to run the server on')
args = parser.parse_args()

# Create model directories if they don't exist
os.makedirs("models", exist_ok=True)
os.makedirs("stimuli", exist_ok=True)

# Check if running on Hugging Face Spaces (using 'SPACE_ID' as an example environment variable)
if "SPACE_ID" in os.environ:
    default_port = int(os.environ.get("PORT", 7860))  # Use provided PORT or fallback to 7860
else:
    default_port = 8861  # Local default port

# Initialize model
model = GenerativeInferenceModel()

@GPU 
def run_inference(image, model_type, inference_type, eps_value, num_iterations, 
                 step_size, initial_noise=0.05, step_noise=0.01, model_layer="all"):
    # Convert eps to float
    eps = float(eps_value)
    
    # Load inference configuration based on the selected type
    config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations), step_size=float(step_size))
    
    # Handle ReverseDiffusion specific parameters
    if inference_type == "ReverseDiffusion":
        config['initial_inference_noise_ratio'] = float(initial_noise)
        config['diffusion_noise_ratio'] = float(step_noise)
        config['top_layer'] = model_layer
    
    # Run generative inference
    result = model.inference(image, model_type, config)
    
    # Extract results based on return type
    if isinstance(result, tuple):
        # Old format returning (output_image, all_steps)
        output_image, all_steps = result
    else:
        # New format returning dictionary
        output_image = result['final_image']
        all_steps = result['steps']
    
    # Create animation frames
    frames = []
    for i, step_image in enumerate(all_steps):
        # Convert tensor to PIL image
        step_pil = Image.fromarray((step_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
        frames.append(step_pil)
    
    # Convert the final output image to PIL
    final_image = Image.fromarray((output_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
    
    # Return the final inferred image and the animation frames directly
    return final_image, frames

# Define the interface
with gr.Blocks(title="Generative Inference Demo") as demo:
    gr.Markdown("# Generative Inference Demo")
    gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.")
    
    with gr.Row():
        with gr.Column(scale=1):
            # Inputs
            image_input = gr.Image(label="Upload Image or Select an Illusion", type="pil")
            
            with gr.Row():
                model_choice = gr.Dropdown(
                    choices=["robust_resnet50", "standard_resnet50"], 
                    value="robust_resnet50", 
                    label="Model"
                )
                
                inference_type = gr.Dropdown(
                    choices=["IncreaseConfidence", "ReverseDiffusion"], 
                    value="IncreaseConfidence", 
                    label="Inference Method"
                )
            
            with gr.Row():
                eps_slider = gr.Slider(minimum=0.0, maximum=50.0, value=0.5, step=0.1, label="Epsilon (Perturbation Size)")
                iterations_slider = gr.Slider(minimum=1, maximum=500, value=50, step=1, label="Number of Iterations")
                step_size_slider = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.1, label="Step Size")
            
            # Additional parameters for ReverseDiffusion that appear conditionally
            with gr.Row(visible=False) as diffusion_params:
                initial_noise_slider = gr.Slider(minimum=0.0, maximum=0.5, value=0.05, step=0.01, 
                                               label="Initial Noise Ratio")
                step_noise_slider = gr.Slider(minimum=0.0, maximum=0.2, value=0.01, step=0.01, 
                                            label="Per-Step Noise Ratio")
                
            with gr.Row(visible=False) as layer_params:
                layer_choice = gr.Dropdown(
                    choices=["all", "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", "layer4", "avgpool"],
                    value="all",
                    label="Model Layer"
                )
            
            # Show/hide parameters based on inference type
            def toggle_params(inference):
                if inference == "ReverseDiffusion":
                    return gr.update(visible=True), gr.update(visible=True)
                else:
                    return gr.update(visible=False), gr.update(visible=False)
                
            inference_type.change(toggle_params, [inference_type], [diffusion_params, layer_params])
            
            run_button = gr.Button("Run Inference")
            
        with gr.Column(scale=2):
            # Outputs
            output_image = gr.Image(label="Final Inferred Image")
            output_frames = gr.Gallery(label="Inference Steps", columns=4, rows=2)
    
    # Set up example images with default parameters for all inputs
    examples = [
        # IncreaseConfidence examples
        [os.path.join("stimuli", "Kanizsa_square.jpg"), "robust_resnet50", "IncreaseConfidence", 
         0.5, 50, 1.0, 0.05, 0.01, "all"],
        [os.path.join("stimuli", "face_vase.png"), "robust_resnet50", "IncreaseConfidence", 
         0.5, 50, 1.0, 0.05, 0.01, "all"],
        [os.path.join("stimuli", "figure_ground.png"), "robust_resnet50", "IncreaseConfidence", 
         0.7, 100, 1.0, 0.05, 0.01, "all"],
        
        # ReverseDiffusion examples with different layers and noise values
        [os.path.join("stimuli", "Neon_Color_Circle.jpg"), "robust_resnet50", "ReverseDiffusion", 
         0.3, 80, 0.8, 0.05, 0.01, "all"],
        [os.path.join("stimuli", "Kanizsa_square.jpg"), "robust_resnet50", "ReverseDiffusion", 
         0.5, 50, 0.8, 0.1, 0.02, "layer4"],  # Using layer4 (high-level features)
        [os.path.join("stimuli", "face_vase.png"), "robust_resnet50", "ReverseDiffusion", 
         0.4, 60, 0.8, 0.15, 0.03, "layer1"]   # Using layer1 (lower-level features)
    ]
    
    gr.Examples(examples=examples, inputs=[
        image_input, model_choice, inference_type, 
        eps_slider, iterations_slider, step_size_slider,
        initial_noise_slider, step_noise_slider, layer_choice
    ])
    
    # Set up event handler
    run_button.click(
        fn=run_inference,
        inputs=[
            image_input, model_choice, inference_type, 
            eps_slider, iterations_slider, step_size_slider,
            initial_noise_slider, step_noise_slider, layer_choice
        ],
        outputs=[output_image, output_frames]
    )
    
    # Include a description of the technique
    gr.Markdown("""
    ## About Generative Inference
    
    Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo offers two methods:
    
    ### 1. IncreaseConfidence
    Optimizes the input to increase the network's confidence in its least confident predictions. This reveals how the
    network perceives contours, figure-ground separation, and other visual phenomena similar to human perception.
    
    ### 2. ReverseDiffusion
    Starts with a noisy version of the image and guides the optimization to match features of the noisy image. 
    This approach can reveal different aspects of visual processing and is inspired by diffusion models.
    
    When using ReverseDiffusion, additional parameters become available:
    - **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning
    - **Per-Step Noise Ratio**: Controls the amount of noise added at each optimization step
    - **Model Layer**: Select a specific layer of the ResNet50 model to extract features from:
      - `all`: Use the full model (default)
      - `conv1`: First convolutional layer
      - `bn1`: First batch normalization layer
      - `relu`: First ReLU activation
      - `maxpool`: Max pooling layer
      - `layer1`: First residual block
      - `layer2`: Second residual block
      - `layer3`: Third residual block
      - `layer4`: Fourth residual block
      - `avgpool`: Average pooling layer
    
    Different layers capture different levels of abstraction - earlier layers represent low-level features
    like edges and textures, while later layers represent higher-level features and object parts.
    
    This demo allows you to:
    1. Upload your own images or select from example images
    2. Choose between inference methods (IncreaseConfidence or ReverseDiffusion)
    3. Select between robust or standard ResNet50 models
    4. Adjust parameters like perturbation size (epsilon) and number of iterations
    5. For ReverseDiffusion, fine-tune noise levels and select specific model layers
    6. Visualize how the perception emerges over time
    """)

# Launch the demo with specific settings
if __name__ == "__main__":
    print(f"Starting server on port {args.port}")
    # Simplified launch parameters
    demo.launch(
        server_name="0.0.0.0",  # Listen on all interfaces
        server_port=args.port,  # Use the port from command line arguments
        share=False,
        debug=True
    )