Spaces:
Running
on
Zero
Running
on
Zero
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from PIL import Image | |
| try: | |
| from spaces import GPU | |
| except ImportError: | |
| # Define a no-op decorator if running locally | |
| def GPU(func): | |
| return func | |
| import os | |
| import argparse | |
| from inference import GenerativeInferenceModel, get_inference_configs | |
| # Parse command line arguments | |
| parser = argparse.ArgumentParser(description='Run Generative Inference Demo') | |
| parser.add_argument('--port', type=int, default=7860, help='Port to run the server on') | |
| args = parser.parse_args() | |
| # Create model directories if they don't exist | |
| os.makedirs("models", exist_ok=True) | |
| os.makedirs("stimuli", exist_ok=True) | |
| # Check if running on Hugging Face Spaces (using 'SPACE_ID' as an example environment variable) | |
| if "SPACE_ID" in os.environ: | |
| default_port = int(os.environ.get("PORT", 7860)) # Use provided PORT or fallback to 7860 | |
| else: | |
| default_port = 8861 # Local default port | |
| # Initialize model | |
| model = GenerativeInferenceModel() | |
| def run_inference(image, model_type, inference_type, eps_value, num_iterations, | |
| step_size, initial_noise=0.05, step_noise=0.01, model_layer="all"): | |
| # Convert eps to float | |
| eps = float(eps_value) | |
| # Load inference configuration based on the selected type | |
| config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations), step_size=float(step_size)) | |
| # Handle ReverseDiffusion specific parameters | |
| if inference_type == "ReverseDiffusion": | |
| config['initial_inference_noise_ratio'] = float(initial_noise) | |
| config['diffusion_noise_ratio'] = float(step_noise) | |
| config['top_layer'] = model_layer | |
| # Run generative inference | |
| result = model.inference(image, model_type, config) | |
| # Extract results based on return type | |
| if isinstance(result, tuple): | |
| # Old format returning (output_image, all_steps) | |
| output_image, all_steps = result | |
| else: | |
| # New format returning dictionary | |
| output_image = result['final_image'] | |
| all_steps = result['steps'] | |
| # Create animation frames | |
| frames = [] | |
| for i, step_image in enumerate(all_steps): | |
| # Convert tensor to PIL image | |
| step_pil = Image.fromarray((step_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)) | |
| frames.append(step_pil) | |
| # Convert the final output image to PIL | |
| final_image = Image.fromarray((output_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)) | |
| # Return the final inferred image and the animation frames directly | |
| return final_image, frames | |
| # Define the interface | |
| with gr.Blocks(title="Generative Inference Demo") as demo: | |
| gr.Markdown("# Generative Inference Demo") | |
| gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| # Inputs | |
| image_input = gr.Image(label="Upload Image or Select an Illusion", type="pil") | |
| with gr.Row(): | |
| model_choice = gr.Dropdown( | |
| choices=["robust_resnet50", "standard_resnet50"], | |
| value="robust_resnet50", | |
| label="Model" | |
| ) | |
| inference_type = gr.Dropdown( | |
| choices=["IncreaseConfidence", "ReverseDiffusion"], | |
| value="IncreaseConfidence", | |
| label="Inference Method" | |
| ) | |
| with gr.Row(): | |
| eps_slider = gr.Slider(minimum=0.0, maximum=50.0, value=0.5, step=0.1, label="Epsilon (Perturbation Size)") | |
| iterations_slider = gr.Slider(minimum=1, maximum=500, value=50, step=1, label="Number of Iterations") | |
| step_size_slider = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.1, label="Step Size") | |
| # Additional parameters for ReverseDiffusion that appear conditionally | |
| with gr.Row(visible=False) as diffusion_params: | |
| initial_noise_slider = gr.Slider(minimum=0.0, maximum=0.5, value=0.05, step=0.01, | |
| label="Initial Noise Ratio") | |
| step_noise_slider = gr.Slider(minimum=0.0, maximum=0.2, value=0.01, step=0.01, | |
| label="Per-Step Noise Ratio") | |
| with gr.Row(visible=False) as layer_params: | |
| layer_choice = gr.Dropdown( | |
| choices=["all", "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", "layer4", "avgpool"], | |
| value="all", | |
| label="Model Layer" | |
| ) | |
| # Show/hide parameters based on inference type | |
| def toggle_params(inference): | |
| if inference == "ReverseDiffusion": | |
| return gr.update(visible=True), gr.update(visible=True) | |
| else: | |
| return gr.update(visible=False), gr.update(visible=False) | |
| inference_type.change(toggle_params, [inference_type], [diffusion_params, layer_params]) | |
| run_button = gr.Button("Run Inference") | |
| with gr.Column(scale=2): | |
| # Outputs | |
| output_image = gr.Image(label="Final Inferred Image") | |
| output_frames = gr.Gallery(label="Inference Steps", columns=4, rows=2) | |
| # Set up example images with default parameters for all inputs | |
| examples = [ | |
| # IncreaseConfidence examples | |
| [os.path.join("stimuli", "Kanizsa_square.jpg"), "robust_resnet50", "IncreaseConfidence", | |
| 0.5, 50, 1.0, 0.05, 0.01, "all"], | |
| [os.path.join("stimuli", "face_vase.png"), "robust_resnet50", "IncreaseConfidence", | |
| 0.5, 50, 1.0, 0.05, 0.01, "all"], | |
| [os.path.join("stimuli", "figure_ground.png"), "robust_resnet50", "IncreaseConfidence", | |
| 0.7, 100, 1.0, 0.05, 0.01, "all"], | |
| # ReverseDiffusion examples with different layers and noise values | |
| [os.path.join("stimuli", "Neon_Color_Circle.jpg"), "robust_resnet50", "ReverseDiffusion", | |
| 0.3, 80, 0.8, 0.05, 0.01, "all"], | |
| [os.path.join("stimuli", "Kanizsa_square.jpg"), "robust_resnet50", "ReverseDiffusion", | |
| 0.5, 50, 0.8, 0.1, 0.02, "layer4"], # Using layer4 (high-level features) | |
| [os.path.join("stimuli", "face_vase.png"), "robust_resnet50", "ReverseDiffusion", | |
| 0.4, 60, 0.8, 0.15, 0.03, "layer1"] # Using layer1 (lower-level features) | |
| ] | |
| gr.Examples(examples=examples, inputs=[ | |
| image_input, model_choice, inference_type, | |
| eps_slider, iterations_slider, step_size_slider, | |
| initial_noise_slider, step_noise_slider, layer_choice | |
| ]) | |
| # Set up event handler | |
| run_button.click( | |
| fn=run_inference, | |
| inputs=[ | |
| image_input, model_choice, inference_type, | |
| eps_slider, iterations_slider, step_size_slider, | |
| initial_noise_slider, step_noise_slider, layer_choice | |
| ], | |
| outputs=[output_image, output_frames] | |
| ) | |
| # Include a description of the technique | |
| gr.Markdown(""" | |
| ## About Generative Inference | |
| Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo offers two methods: | |
| ### 1. IncreaseConfidence | |
| Optimizes the input to increase the network's confidence in its least confident predictions. This reveals how the | |
| network perceives contours, figure-ground separation, and other visual phenomena similar to human perception. | |
| ### 2. ReverseDiffusion | |
| Starts with a noisy version of the image and guides the optimization to match features of the noisy image. | |
| This approach can reveal different aspects of visual processing and is inspired by diffusion models. | |
| When using ReverseDiffusion, additional parameters become available: | |
| - **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning | |
| - **Per-Step Noise Ratio**: Controls the amount of noise added at each optimization step | |
| - **Model Layer**: Select a specific layer of the ResNet50 model to extract features from: | |
| - `all`: Use the full model (default) | |
| - `conv1`: First convolutional layer | |
| - `bn1`: First batch normalization layer | |
| - `relu`: First ReLU activation | |
| - `maxpool`: Max pooling layer | |
| - `layer1`: First residual block | |
| - `layer2`: Second residual block | |
| - `layer3`: Third residual block | |
| - `layer4`: Fourth residual block | |
| - `avgpool`: Average pooling layer | |
| Different layers capture different levels of abstraction - earlier layers represent low-level features | |
| like edges and textures, while later layers represent higher-level features and object parts. | |
| This demo allows you to: | |
| 1. Upload your own images or select from example images | |
| 2. Choose between inference methods (IncreaseConfidence or ReverseDiffusion) | |
| 3. Select between robust or standard ResNet50 models | |
| 4. Adjust parameters like perturbation size (epsilon) and number of iterations | |
| 5. For ReverseDiffusion, fine-tune noise levels and select specific model layers | |
| 6. Visualize how the perception emerges over time | |
| """) | |
| # Launch the demo with specific settings | |
| if __name__ == "__main__": | |
| print(f"Starting server on port {args.port}") | |
| # Simplified launch parameters | |
| demo.launch( | |
| server_name="0.0.0.0", # Listen on all interfaces | |
| server_port=args.port, # Use the port from command line arguments | |
| share=False, | |
| debug=True | |
| ) |