rtik007 commited on
Commit
93f8de3
·
verified ·
1 Parent(s): cfe8fa3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -77
app.py CHANGED
@@ -1,16 +1,11 @@
1
- import gradio as gr
2
- import h2o
3
- from h2o.estimators import H2OIsolationForestEstimator
4
- import pandas as pd
5
  import numpy as np
 
 
 
6
  import shap
7
  import matplotlib.pyplot as plt
8
- from sklearn.datasets import make_classification
9
  from itertools import combinations
10
 
11
- # Initialize H2O
12
- h2o.init()
13
-
14
  # Generate synthetic data with 20 features
15
  np.random.seed(42)
16
  X, _ = make_classification(
@@ -24,56 +19,49 @@ X, _ = make_classification(
24
  outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
25
  X = np.vstack([X, outliers])
26
 
27
- # Convert to H2O Frame
28
  columns = [f"Feature{i+1}" for i in range(20)]
29
  df = pd.DataFrame(X, columns=columns)
30
- h2o_df = h2o.H2OFrame(df)
31
 
32
- # Fit H2O Isolation Forest
33
- iso_forest = H2OIsolationForestEstimator(
34
- ntrees=100,
35
- max_depth=8,
36
- sample_size=256,
37
- seed=42,
38
- contamination=0.1
39
  )
40
- iso_forest.train(training_frame=h2o_df)
41
 
42
  # Predict anomaly scores
43
- predictions = iso_forest.predict(h2o_df)
44
- pred_df = predictions.as_data_frame()
45
- df["Anomaly_Score"] = pred_df["score"]
46
- df["Anomaly_Label"] = pred_df["predict"].map({0: "Normal", 1: "Anomaly"})
47
 
48
- # Define SHAP explainer
49
- explainer = shap.Explainer(
50
- lambda x: iso_forest.predict(h2o.H2OFrame(x)).as_data_frame()[["score", "predict"]],
51
- df[columns]
52
- )
 
 
53
 
54
- # Helper function for SHAP summary plot
55
- def shap_summary():
56
- shap_values = explainer(df[columns])
57
- plt.figure()
58
- shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False)
59
- plt.savefig("shap_summary.png")
60
- return "shap_summary.png"
61
 
62
- # Helper function for SHAP waterfall plot
63
- def shap_waterfall(index):
64
- shap_values = explainer(df[columns])
65
- plt.figure()
66
- shap.waterfall_plot(shap.Explanation(
67
- values=shap_values.values[int(index)],
68
- base_values=shap_values.base_values[int(index)],
69
- data=df.iloc[int(index)],
70
  feature_names=columns
71
- ))
72
- plt.savefig("shap_waterfall.png")
73
- return "shap_waterfall.png"
74
 
75
- # Helper function for scatter plot
76
- def scatter_plot(feature1, feature2):
 
 
77
  plt.figure(figsize=(8, 6))
78
  plt.scatter(
79
  df[feature1],
@@ -86,33 +74,4 @@ def scatter_plot(feature1, feature2):
86
  plt.title(f"Isolation Forest - {feature1} vs {feature2}")
87
  plt.xlabel(feature1)
88
  plt.ylabel(feature2)
89
- plt.savefig("scatter_plot.png")
90
- return "scatter_plot.png"
91
-
92
- # Gradio app
93
- with gr.Blocks() as app:
94
- gr.Markdown("# Anomaly Detection with Isolation Forest")
95
-
96
- with gr.Tab("SHAP Summary Plot"):
97
- gr.Markdown("Global explainability using SHAP summary plot.")
98
- shap_summary_button = gr.Button("Generate SHAP Summary")
99
- shap_summary_image = gr.Image()
100
- shap_summary_button.click(fn=shap_summary, outputs=shap_summary_image)
101
-
102
- with gr.Tab("SHAP Waterfall Plot"):
103
- gr.Markdown("Local explainability for a specific data point.")
104
- index_input = gr.Number(label="Data Point Index", value=0)
105
- shap_waterfall_button = gr.Button("Generate SHAP Waterfall")
106
- shap_waterfall_image = gr.Image()
107
- shap_waterfall_button.click(fn=shap_waterfall, inputs=index_input, outputs=shap_waterfall_image)
108
-
109
- with gr.Tab("Scatter Plot"):
110
- gr.Markdown("Visualize pairwise feature interactions.")
111
- feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1")
112
- feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2")
113
- scatter_plot_button = gr.Button("Generate Scatter Plot")
114
- scatter_plot_image = gr.Image()
115
- scatter_plot_button.click(fn=scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_plot_image)
116
-
117
- # Launch the app
118
- app.launch()
 
 
 
 
 
1
  import numpy as np
2
+ import pandas as pd
3
+ from sklearn.datasets import make_classification
4
+ from sklearn.ensemble import IsolationForest
5
  import shap
6
  import matplotlib.pyplot as plt
 
7
  from itertools import combinations
8
 
 
 
 
9
  # Generate synthetic data with 20 features
10
  np.random.seed(42)
11
  X, _ = make_classification(
 
19
  outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
20
  X = np.vstack([X, outliers])
21
 
22
+ # Convert to DataFrame
23
  columns = [f"Feature{i+1}" for i in range(20)]
24
  df = pd.DataFrame(X, columns=columns)
 
25
 
26
+ # Fit Isolation Forest
27
+ iso_forest = IsolationForest(
28
+ n_estimators=100,
29
+ max_samples=256,
30
+ contamination=0.1,
31
+ random_state=42
 
32
  )
33
+ iso_forest.fit(df)
34
 
35
  # Predict anomaly scores
36
+ anomaly_scores = iso_forest.decision_function(df) # Negative values indicate anomalies
37
+ anomaly_labels = iso_forest.predict(df) # -1 for anomaly, 1 for normal
 
 
38
 
39
+ # Add results to DataFrame
40
+ df["Anomaly_Score"] = anomaly_scores
41
+ df["Anomaly_Label"] = np.where(anomaly_labels == -1, "Anomaly", "Normal")
42
+
43
+ # SHAP Explainability
44
+ explainer = shap.Explainer(iso_forest, df[columns])
45
+ shap_values = explainer(df[columns])
46
 
47
+ # SHAP Summary Plot (Global Explainability)
48
+ shap.summary_plot(shap_values, df[columns], feature_names=columns)
 
 
 
 
 
49
 
50
+ # SHAP Waterfall Plot for a Specific Data Point (Local Explainability)
51
+ specific_index = df[df["Anomaly_Label"] == "Anomaly"].index[0] # Select first anomaly
52
+ shap.waterfall_plot(
53
+ shap.Explanation(
54
+ values=shap_values.values[specific_index],
55
+ base_values=shap_values.base_values[specific_index],
56
+ data=df.iloc[specific_index],
 
57
  feature_names=columns
58
+ )
59
+ )
 
60
 
61
+ # Scatter plots for pairwise combinations of features
62
+ feature_combinations = list(combinations(columns[:5], 2)) # Use first 5 features for simplicity
63
+
64
+ for feature1, feature2 in feature_combinations:
65
  plt.figure(figsize=(8, 6))
66
  plt.scatter(
67
  df[feature1],
 
74
  plt.title(f"Isolation Forest - {feature1} vs {feature2}")
75
  plt.xlabel(feature1)
76
  plt.ylabel(feature2)
77
+ plt.show()