Spaces:
Build error
Build error
File size: 3,770 Bytes
c533407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
import gradio as gr
import h2o
from h2o.estimators import H2OIsolationForestEstimator
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from itertools import combinations
# Initialize H2O
h2o.init()
# Generate synthetic data with 20 features
np.random.seed(42)
X, _ = make_classification(
n_samples=500,
n_features=20,
n_informative=10,
n_redundant=5,
n_clusters_per_class=1,
random_state=42
)
outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
X = np.vstack([X, outliers])
# Convert to H2O Frame
columns = [f"Feature{i+1}" for i in range(20)]
df = pd.DataFrame(X, columns=columns)
h2o_df = h2o.H2OFrame(df)
# Fit H2O Isolation Forest
iso_forest = H2OIsolationForestEstimator(
ntrees=100,
max_depth=8,
sample_size=256,
seed=42,
contamination=0.1
)
iso_forest.train(training_frame=h2o_df)
# Predict anomaly scores
predictions = iso_forest.predict(h2o_df)
pred_df = predictions.as_data_frame()
df["Anomaly_Score"] = pred_df["score"]
df["Anomaly_Label"] = pred_df["predict"].map({0: "Normal", 1: "Anomaly"})
# Define SHAP explainer
explainer = shap.Explainer(
lambda x: iso_forest.predict(h2o.H2OFrame(x)).as_data_frame()[["score", "predict"]],
df[columns]
)
# Helper function for SHAP summary plot
def shap_summary():
shap_values = explainer(df[columns])
plt.figure()
shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False)
plt.savefig("shap_summary.png")
return "shap_summary.png"
# Helper function for SHAP waterfall plot
def shap_waterfall(index):
shap_values = explainer(df[columns])
plt.figure()
shap.waterfall_plot(shap.Explanation(
values=shap_values.values[int(index)],
base_values=shap_values.base_values[int(index)],
data=df.iloc[int(index)],
feature_names=columns
))
plt.savefig("shap_waterfall.png")
return "shap_waterfall.png"
# Helper function for scatter plot
def scatter_plot(feature1, feature2):
plt.figure(figsize=(8, 6))
plt.scatter(
df[feature1],
df[feature2],
c=(df["Anomaly_Label"] == "Anomaly"),
cmap="coolwarm",
edgecolor="k",
alpha=0.7
)
plt.title(f"Isolation Forest - {feature1} vs {feature2}")
plt.xlabel(feature1)
plt.ylabel(feature2)
plt.savefig("scatter_plot.png")
return "scatter_plot.png"
# Gradio app
with gr.Blocks() as app:
gr.Markdown("# Anomaly Detection with Isolation Forest")
with gr.Tab("SHAP Summary Plot"):
gr.Markdown("Global explainability using SHAP summary plot.")
shap_summary_button = gr.Button("Generate SHAP Summary")
shap_summary_image = gr.Image()
shap_summary_button.click(fn=shap_summary, outputs=shap_summary_image)
with gr.Tab("SHAP Waterfall Plot"):
gr.Markdown("Local explainability for a specific data point.")
index_input = gr.Number(label="Data Point Index", value=0)
shap_waterfall_button = gr.Button("Generate SHAP Waterfall")
shap_waterfall_image = gr.Image()
shap_waterfall_button.click(fn=shap_waterfall, inputs=index_input, outputs=shap_waterfall_image)
with gr.Tab("Scatter Plot"):
gr.Markdown("Visualize pairwise feature interactions.")
feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1")
feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2")
scatter_plot_button = gr.Button("Generate Scatter Plot")
scatter_plot_image = gr.Image()
scatter_plot_button.click(fn=scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_plot_image)
# Launch the app
app.launch()
|