rtik007 commited on
Commit
c533407
·
verified ·
1 Parent(s): 32c1cb0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +118 -0
app.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import h2o
3
+ from h2o.estimators import H2OIsolationForestEstimator
4
+ import pandas as pd
5
+ import numpy as np
6
+ import shap
7
+ import matplotlib.pyplot as plt
8
+ from sklearn.datasets import make_classification
9
+ from itertools import combinations
10
+
11
+ # Initialize H2O
12
+ h2o.init()
13
+
14
+ # Generate synthetic data with 20 features
15
+ np.random.seed(42)
16
+ X, _ = make_classification(
17
+ n_samples=500,
18
+ n_features=20,
19
+ n_informative=10,
20
+ n_redundant=5,
21
+ n_clusters_per_class=1,
22
+ random_state=42
23
+ )
24
+ outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
25
+ X = np.vstack([X, outliers])
26
+
27
+ # Convert to H2O Frame
28
+ columns = [f"Feature{i+1}" for i in range(20)]
29
+ df = pd.DataFrame(X, columns=columns)
30
+ h2o_df = h2o.H2OFrame(df)
31
+
32
+ # Fit H2O Isolation Forest
33
+ iso_forest = H2OIsolationForestEstimator(
34
+ ntrees=100,
35
+ max_depth=8,
36
+ sample_size=256,
37
+ seed=42,
38
+ contamination=0.1
39
+ )
40
+ iso_forest.train(training_frame=h2o_df)
41
+
42
+ # Predict anomaly scores
43
+ predictions = iso_forest.predict(h2o_df)
44
+ pred_df = predictions.as_data_frame()
45
+ df["Anomaly_Score"] = pred_df["score"]
46
+ df["Anomaly_Label"] = pred_df["predict"].map({0: "Normal", 1: "Anomaly"})
47
+
48
+ # Define SHAP explainer
49
+ explainer = shap.Explainer(
50
+ lambda x: iso_forest.predict(h2o.H2OFrame(x)).as_data_frame()[["score", "predict"]],
51
+ df[columns]
52
+ )
53
+
54
+ # Helper function for SHAP summary plot
55
+ def shap_summary():
56
+ shap_values = explainer(df[columns])
57
+ plt.figure()
58
+ shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False)
59
+ plt.savefig("shap_summary.png")
60
+ return "shap_summary.png"
61
+
62
+ # Helper function for SHAP waterfall plot
63
+ def shap_waterfall(index):
64
+ shap_values = explainer(df[columns])
65
+ plt.figure()
66
+ shap.waterfall_plot(shap.Explanation(
67
+ values=shap_values.values[int(index)],
68
+ base_values=shap_values.base_values[int(index)],
69
+ data=df.iloc[int(index)],
70
+ feature_names=columns
71
+ ))
72
+ plt.savefig("shap_waterfall.png")
73
+ return "shap_waterfall.png"
74
+
75
+ # Helper function for scatter plot
76
+ def scatter_plot(feature1, feature2):
77
+ plt.figure(figsize=(8, 6))
78
+ plt.scatter(
79
+ df[feature1],
80
+ df[feature2],
81
+ c=(df["Anomaly_Label"] == "Anomaly"),
82
+ cmap="coolwarm",
83
+ edgecolor="k",
84
+ alpha=0.7
85
+ )
86
+ plt.title(f"Isolation Forest - {feature1} vs {feature2}")
87
+ plt.xlabel(feature1)
88
+ plt.ylabel(feature2)
89
+ plt.savefig("scatter_plot.png")
90
+ return "scatter_plot.png"
91
+
92
+ # Gradio app
93
+ with gr.Blocks() as app:
94
+ gr.Markdown("# Anomaly Detection with Isolation Forest")
95
+
96
+ with gr.Tab("SHAP Summary Plot"):
97
+ gr.Markdown("Global explainability using SHAP summary plot.")
98
+ shap_summary_button = gr.Button("Generate SHAP Summary")
99
+ shap_summary_image = gr.Image()
100
+ shap_summary_button.click(fn=shap_summary, outputs=shap_summary_image)
101
+
102
+ with gr.Tab("SHAP Waterfall Plot"):
103
+ gr.Markdown("Local explainability for a specific data point.")
104
+ index_input = gr.Number(label="Data Point Index", value=0)
105
+ shap_waterfall_button = gr.Button("Generate SHAP Waterfall")
106
+ shap_waterfall_image = gr.Image()
107
+ shap_waterfall_button.click(fn=shap_waterfall, inputs=index_input, outputs=shap_waterfall_image)
108
+
109
+ with gr.Tab("Scatter Plot"):
110
+ gr.Markdown("Visualize pairwise feature interactions.")
111
+ feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1")
112
+ feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2")
113
+ scatter_plot_button = gr.Button("Generate Scatter Plot")
114
+ scatter_plot_image = gr.Image()
115
+ scatter_plot_button.click(fn=scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_plot_image)
116
+
117
+ # Launch the app
118
+ app.launch()