rtik007's picture
Update app.py
da15f9a verified
raw
history blame
3.77 kB
import gradio as gr
import h2o
from h2o.estimators import H2OIsolationForestEstimator
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from itertools import combinations
# Initialize H2O
h2o.init()
# Generate synthetic data with 20 features
np.random.seed(42)
X, _ = make_classification(
n_samples=500,
n_features=20,
n_informative=10,
n_redundant=5,
n_clusters_per_class=1,
random_state=42
)
outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
X = np.vstack([X, outliers])
# Convert to H2O Frame
columns = [f"Feature{i+1}" for i in range(20)]
df = pd.DataFrame(X, columns=columns)
h2o_df = h2o.H2OFrame(df)
# Fit H2O Isolation Forest
iso_forest = H2OIsolationForestEstimator(
ntrees=100,
max_depth=8,
sample_size=256,
seed=42,
contamination=0.1
)
iso_forest.train(training_frame=h2o_df)
# Predict anomaly scores
predictions = iso_forest.predict(h2o_df)
pred_df = predictions.as_data_frame()
df["Anomaly_Score"] = pred_df["score"]
df["Anomaly_Label"] = pred_df["predict"].map({0: "Normal", 1: "Anomaly"})
# Define SHAP explainer
explainer = shap.Explainer(
lambda x: iso_forest.predict(h2o.H2OFrame(x)).as_data_frame()[["score", "predict"]],
df[columns]
)
# Helper function for SHAP summary plot
def shap_summary():
shap_values = explainer(df[columns])
plt.figure()
shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False)
plt.savefig("shap_summary.png")
return "shap_summary.png"
# Helper function for SHAP waterfall plot
def shap_waterfall(index):
shap_values = explainer(df[columns])
plt.figure()
shap.waterfall_plot(shap.Explanation(
values=shap_values.values[int(index)],
base_values=shap_values.base_values[int(index)],
data=df.iloc[int(index)],
feature_names=columns
))
plt.savefig("shap_waterfall.png")
return "shap_waterfall.png"
# Helper function for scatter plot
def scatter_plot(feature1, feature2):
plt.figure(figsize=(8, 6))
plt.scatter(
df[feature1],
df[feature2],
c=(df["Anomaly_Label"] == "Anomaly"),
cmap="coolwarm",
edgecolor="k",
alpha=0.7
)
plt.title(f"Isolation Forest - {feature1} vs {feature2}")
plt.xlabel(feature1)
plt.ylabel(feature2)
plt.savefig("scatter_plot.png")
return "scatter_plot.png"
# Gradio app
with gr.Blocks() as app:
gr.Markdown("# Anomaly Detection with Isolation Forest")
with gr.Tab("SHAP Summary Plot"):
gr.Markdown("Global explainability using SHAP summary plot.")
shap_summary_button = gr.Button("Generate SHAP Summary")
shap_summary_image = gr.Image()
shap_summary_button.click(fn=shap_summary, outputs=shap_summary_image)
with gr.Tab("SHAP Waterfall Plot"):
gr.Markdown("Local explainability for a specific data point.")
index_input = gr.Number(label="Data Point Index", value=0)
shap_waterfall_button = gr.Button("Generate SHAP Waterfall")
shap_waterfall_image = gr.Image()
shap_waterfall_button.click(fn=shap_waterfall, inputs=index_input, outputs=shap_waterfall_image)
with gr.Tab("Scatter Plot"):
gr.Markdown("Visualize pairwise feature interactions.")
feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1")
feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2")
scatter_plot_button = gr.Button("Generate Scatter Plot")
scatter_plot_image = gr.Image()
scatter_plot_button.click(fn=scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_plot_image)
# Launch the app
app.launch()