File size: 3,804 Bytes
c5a6c0e
 
c533407
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
!pip install -r requirements.txt

import gradio as gr
import h2o
from h2o.estimators import H2OIsolationForestEstimator
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from itertools import combinations

# Initialize H2O
h2o.init()

# Generate synthetic data with 20 features
np.random.seed(42)
X, _ = make_classification(
    n_samples=500,
    n_features=20,
    n_informative=10,
    n_redundant=5,
    n_clusters_per_class=1,
    random_state=42
)
outliers = np.random.uniform(low=-6, high=6, size=(50, 20))  # Add outliers
X = np.vstack([X, outliers])

# Convert to H2O Frame
columns = [f"Feature{i+1}" for i in range(20)]
df = pd.DataFrame(X, columns=columns)
h2o_df = h2o.H2OFrame(df)

# Fit H2O Isolation Forest
iso_forest = H2OIsolationForestEstimator(
    ntrees=100,
    max_depth=8,
    sample_size=256,
    seed=42,
    contamination=0.1
)
iso_forest.train(training_frame=h2o_df)

# Predict anomaly scores
predictions = iso_forest.predict(h2o_df)
pred_df = predictions.as_data_frame()
df["Anomaly_Score"] = pred_df["score"]
df["Anomaly_Label"] = pred_df["predict"].map({0: "Normal", 1: "Anomaly"})

# Define SHAP explainer
explainer = shap.Explainer(
    lambda x: iso_forest.predict(h2o.H2OFrame(x)).as_data_frame()[["score", "predict"]],
    df[columns]
)

# Helper function for SHAP summary plot
def shap_summary():
    shap_values = explainer(df[columns])
    plt.figure()
    shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False)
    plt.savefig("shap_summary.png")
    return "shap_summary.png"

# Helper function for SHAP waterfall plot
def shap_waterfall(index):
    shap_values = explainer(df[columns])
    plt.figure()
    shap.waterfall_plot(shap.Explanation(
        values=shap_values.values[int(index)],
        base_values=shap_values.base_values[int(index)],
        data=df.iloc[int(index)],
        feature_names=columns
    ))
    plt.savefig("shap_waterfall.png")
    return "shap_waterfall.png"

# Helper function for scatter plot
def scatter_plot(feature1, feature2):
    plt.figure(figsize=(8, 6))
    plt.scatter(
        df[feature1],
        df[feature2],
        c=(df["Anomaly_Label"] == "Anomaly"),
        cmap="coolwarm",
        edgecolor="k",
        alpha=0.7
    )
    plt.title(f"Isolation Forest - {feature1} vs {feature2}")
    plt.xlabel(feature1)
    plt.ylabel(feature2)
    plt.savefig("scatter_plot.png")
    return "scatter_plot.png"

# Gradio app
with gr.Blocks() as app:
    gr.Markdown("# Anomaly Detection with Isolation Forest")
    
    with gr.Tab("SHAP Summary Plot"):
        gr.Markdown("Global explainability using SHAP summary plot.")
        shap_summary_button = gr.Button("Generate SHAP Summary")
        shap_summary_image = gr.Image()
        shap_summary_button.click(fn=shap_summary, outputs=shap_summary_image)

    with gr.Tab("SHAP Waterfall Plot"):
        gr.Markdown("Local explainability for a specific data point.")
        index_input = gr.Number(label="Data Point Index", value=0)
        shap_waterfall_button = gr.Button("Generate SHAP Waterfall")
        shap_waterfall_image = gr.Image()
        shap_waterfall_button.click(fn=shap_waterfall, inputs=index_input, outputs=shap_waterfall_image)

    with gr.Tab("Scatter Plot"):
        gr.Markdown("Visualize pairwise feature interactions.")
        feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1")
        feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2")
        scatter_plot_button = gr.Button("Generate Scatter Plot")
        scatter_plot_image = gr.Image()
        scatter_plot_button.click(fn=scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_plot_image)

# Launch the app
app.launch()