import gradio as gr import h2o from h2o.estimators import H2OIsolationForestEstimator import pandas as pd import numpy as np import shap import matplotlib.pyplot as plt from sklearn.datasets import make_classification from itertools import combinations # Initialize H2O h2o.init() # Generate synthetic data with 20 features np.random.seed(42) X, _ = make_classification( n_samples=500, n_features=20, n_informative=10, n_redundant=5, n_clusters_per_class=1, random_state=42 ) outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers X = np.vstack([X, outliers]) # Convert to H2O Frame columns = [f"Feature{i+1}" for i in range(20)] df = pd.DataFrame(X, columns=columns) h2o_df = h2o.H2OFrame(df) # Fit H2O Isolation Forest iso_forest = H2OIsolationForestEstimator( ntrees=100, max_depth=8, sample_size=256, seed=42, contamination=0.1 ) iso_forest.train(training_frame=h2o_df) # Predict anomaly scores predictions = iso_forest.predict(h2o_df) pred_df = predictions.as_data_frame() df["Anomaly_Score"] = pred_df["score"] df["Anomaly_Label"] = pred_df["predict"].map({0: "Normal", 1: "Anomaly"}) # Define SHAP explainer explainer = shap.Explainer( lambda x: iso_forest.predict(h2o.H2OFrame(x)).as_data_frame()[["score", "predict"]], df[columns] ) # Helper function for SHAP summary plot def shap_summary(): shap_values = explainer(df[columns]) plt.figure() shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False) plt.savefig("shap_summary.png") return "shap_summary.png" # Helper function for SHAP waterfall plot def shap_waterfall(index): shap_values = explainer(df[columns]) plt.figure() shap.waterfall_plot(shap.Explanation( values=shap_values.values[int(index)], base_values=shap_values.base_values[int(index)], data=df.iloc[int(index)], feature_names=columns )) plt.savefig("shap_waterfall.png") return "shap_waterfall.png" # Helper function for scatter plot def scatter_plot(feature1, feature2): plt.figure(figsize=(8, 6)) plt.scatter( df[feature1], df[feature2], c=(df["Anomaly_Label"] == "Anomaly"), cmap="coolwarm", edgecolor="k", alpha=0.7 ) plt.title(f"Isolation Forest - {feature1} vs {feature2}") plt.xlabel(feature1) plt.ylabel(feature2) plt.savefig("scatter_plot.png") return "scatter_plot.png" # Gradio app with gr.Blocks() as app: gr.Markdown("# Anomaly Detection with Isolation Forest") with gr.Tab("SHAP Summary Plot"): gr.Markdown("Global explainability using SHAP summary plot.") shap_summary_button = gr.Button("Generate SHAP Summary") shap_summary_image = gr.Image() shap_summary_button.click(fn=shap_summary, outputs=shap_summary_image) with gr.Tab("SHAP Waterfall Plot"): gr.Markdown("Local explainability for a specific data point.") index_input = gr.Number(label="Data Point Index", value=0) shap_waterfall_button = gr.Button("Generate SHAP Waterfall") shap_waterfall_image = gr.Image() shap_waterfall_button.click(fn=shap_waterfall, inputs=index_input, outputs=shap_waterfall_image) with gr.Tab("Scatter Plot"): gr.Markdown("Visualize pairwise feature interactions.") feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1") feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2") scatter_plot_button = gr.Button("Generate Scatter Plot") scatter_plot_image = gr.Image() scatter_plot_button.click(fn=scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_plot_image) # Launch the app app.launch()