Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from train import TrainingLoop | |
| from scipy.special import softmax | |
| import numpy as np | |
| # Global variables for training and data storage | |
| train = None | |
| frames, attributions = None, None | |
| # Lunar Lander environment state feature mapping | |
| LUNAR_LANDER_FEATURES = { | |
| 0: "X-coordinate", | |
| 1: "Y-coordinate", | |
| 2: "Linear velocity in the X-axis", | |
| 3: "Linear velocity in the Y-axis", | |
| 4: "Angle", | |
| 5: "Angular velocity", | |
| 6: "Left leg touched the floor", | |
| 7: "Right leg touched the floor" | |
| } | |
| def create_training_loop(env_spec): | |
| """Initialize the training loop with the specified environment.""" | |
| global train | |
| train = TrainingLoop(env_spec=env_spec) | |
| train.create_agent() | |
| return train.env.spec | |
| def display_softmax(inputs): | |
| """Convert raw attribution values to softmax probabilities for visualization.""" | |
| inputs = np.array(inputs) | |
| probabilities = softmax(inputs) | |
| softmax_dict = { | |
| name: float(prob) | |
| for name, prob in zip(LUNAR_LANDER_FEATURES.values(), probabilities) | |
| } | |
| return softmax_dict | |
| def generate_output(num_iterations, option): | |
| """Generate attribution explanations for the trained agent.""" | |
| global frames, attributions | |
| frames, attributions = train.explain_trained( | |
| num_iterations=num_iterations, | |
| option=option | |
| ) | |
| slider.maximum = len(frames) | |
| def get_frame_and_attribution(slider_value): | |
| """Get frame and attribution data for the selected timestep.""" | |
| global frames, attributions | |
| slider_value = min(slider_value, len(frames) - 1) | |
| frame = frames[slider_value] | |
| print(f"Frame shape: {frame.shape}") | |
| attribution = display_softmax(attributions[slider_value]) | |
| return frame, attribution | |
| with gr.Blocks( | |
| title="Deep RL Explainability", | |
| theme=gr.themes.Soft(), | |
| css=""" | |
| .gradio-container { | |
| max-width: 1200px !important; | |
| } | |
| .tab-nav { | |
| background: linear-gradient(90deg, #667eea 0%, #764ba2 100%); | |
| } | |
| """ | |
| ) as demo: | |
| # Header section | |
| gr.Markdown(""" | |
| # ๐ Deep Reinforcement Learning Explainability | |
| **Exploring AI decision-making through Integrated Gradients in RL environments** | |
| --- | |
| """) | |
| # Introduction section | |
| gr.Markdown(""" | |
| ## ๐ How This Works | |
| This application demonstrates the application of **[Integrated Gradients](https://captum.ai/docs/extension/integrated_gradients)** | |
| to Deep Reinforcement Learning scenarios. We use PyTorch's Captum library for interpretability | |
| and Gymnasium for the continuous Lunar Lander environment. | |
| ### ๐ง Training Algorithm: [DDPG](https://arxiv.org/abs/1509.02971) | |
| The agent is trained using **Deep Deterministic Policy Gradients** and achieves an average reward | |
| of **260.8** per episode (successful landings). | |
| ### ๐ฏ How to Use This Space | |
| 1. **Select Environment**: Choose the Lunar Lander environment | |
| 2. **Choose Baseline**: Select between zero tensor or running average baseline | |
| 3. **Generate Attributions**: Click "ATTRIBUTE" and wait ~20-25 seconds | |
| 4. **Explore Results**: Use the slider to examine attributions at different timesteps | |
| The attributions are normalized using Softmax to provide interpretable probability distributions. | |
| """) | |
| # Main interface tab | |
| with gr.Tab("๐ Attribution Analysis", elem_id="attribution-tab"): | |
| # Environment setup | |
| gr.Markdown("### ๐ Environment Setup") | |
| env_spec = gr.Dropdown( | |
| choices=["LunarLander-v2"], | |
| type="value", | |
| multiselect=False, | |
| label="Environment Specification", | |
| value="LunarLander-v2", | |
| info="Select the RL environment to analyze" | |
| ) | |
| env_interface = gr.Interface( | |
| title="Initialize Environment", | |
| allow_flagging="never", | |
| inputs=env_spec, | |
| fn=create_training_loop, | |
| outputs=gr.JSON(label="Environment Spec"), | |
| description="Click to initialize the training environment" | |
| ) | |
| # Attribution controls | |
| gr.Markdown("### โ๏ธ Attribution Configuration") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| option = gr.Dropdown( | |
| choices=["Torch Tensor of 0's", "Running Average"], | |
| type="index", | |
| label="Baseline Method", | |
| info="Choose the baseline for Integrated Gradients" | |
| ) | |
| with gr.Column(scale=1): | |
| baselines = gr.Slider( | |
| label="Number of Baseline Iterations", | |
| interactive=True, | |
| minimum=0, | |
| maximum=100, | |
| value=10, | |
| step=5, | |
| info="Number of baseline inputs to collect for averaging" | |
| ) | |
| # Generate button | |
| generate_btn = gr.Button( | |
| "๐ GENERATE ATTRIBUTIONS", | |
| variant="primary", | |
| size="lg" | |
| ) | |
| generate_btn.click( | |
| fn=generate_output, | |
| inputs=[baselines, option], | |
| outputs=[] | |
| ) | |
| # Results section | |
| gr.Markdown("### ๐ Results Visualization") | |
| slider = gr.Slider( | |
| label="๐ฌ Key Frame Selector", | |
| minimum=0, | |
| maximum=1000, | |
| step=1, | |
| value=0, | |
| info="Navigate through different timesteps to see attributions" | |
| ) | |
| results_interface = gr.Interface( | |
| fn=get_frame_and_attribution, | |
| inputs=slider, | |
| live=True, | |
| outputs=[ | |
| gr.Image(label="๐ฎ Environment State", type="numpy"), | |
| gr.Label(label="๐ Feature Attributions", num_top_classes=8) | |
| ], | |
| title="Real-time Attribution Analysis" | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ## ๐ ๏ธ Local Usage & Installation | |
| ### Required Packages | |
| ```bash | |
| pip install torch gymnasium 'gymnasium[box2d]' | |
| ``` | |
| ### Box2D Installation (macOS) | |
| ```bash | |
| brew install swig | |
| pip install box2d | |
| ``` | |
| ## ๐ฏ Lunar Lander Environment Details | |
| ### Reward Structure | |
| - **Position**: Increased/decreased based on distance to landing pad | |
| - **Velocity**: Increased/decreased based on speed (slower is better) | |
| - **Angle**: Decreased when lander is tilted (horizontal is ideal) | |
| - **Landing**: +10 points for each leg touching ground | |
| - **Fuel**: -0.03 points per frame for side engine, -0.3 for main engine | |
| - **Episode End**: -100 for crash, +100 for safe landing | |
| **Success Threshold**: 200+ points per episode | |
| ### Training Functions | |
| - `load_trained()`: Loads pre-trained model (1000 episodes) | |
| - `train()`: Trains from scratch | |
| - Set `render_mode=False` for faster training | |
| --- | |
| *Built with โค๏ธ using Gradio, PyTorch, and Captum* | |
| """) | |
| if __name__ == "__main__": | |
| demo.launch() |