rtik007's picture
Update app.py
cadabca verified
raw
history blame
3.77 kB
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.ensemble import IsolationForest
import shap
import matplotlib.pyplot as plt
from itertools import combinations
import gradio as gr
# Generate synthetic data with 20 features
np.random.seed(42)
X, _ = make_classification(
n_samples=500,
n_features=20,
n_informative=10,
n_redundant=5,
n_clusters_per_class=1,
random_state=42
)
outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
X = np.vstack([X, outliers])
# Convert to DataFrame
columns = [f"Feature{i+1}" for i in range(20)]
df = pd.DataFrame(X, columns=columns)
# Fit Isolation Forest
iso_forest = IsolationForest(
n_estimators=100,
max_samples=256,
contamination=0.1,
random_state=42
)
iso_forest.fit(df)
# Predict anomaly scores
anomaly_scores = iso_forest.decision_function(df) # Negative values indicate anomalies
anomaly_labels = iso_forest.predict(df) # -1 for anomaly, 1 for normal
# Add results to DataFrame
df["Anomaly_Score"] = anomaly_scores
df["Anomaly_Label"] = np.where(anomaly_labels == -1, "Anomaly", "Normal")
# SHAP Explainability
explainer = shap.Explainer(iso_forest, df[columns])
shap_values = explainer(df[columns])
# Define functions for Gradio
def get_shap_summary():
"""Generates SHAP summary plot."""
plt.figure()
shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False)
plt.savefig("shap_summary.png")
return "shap_summary.png"
def get_shap_waterfall(index):
"""Generates SHAP waterfall plot for a specific data point."""
specific_index = int(index)
plt.figure()
shap.waterfall_plot(
shap.Explanation(
values=shap_values.values[specific_index],
base_values=shap_values.base_values[specific_index],
data=df.iloc[specific_index],
feature_names=columns
)
)
plt.savefig("shap_waterfall.png")
return "shap_waterfall.png"
def get_scatter_plot(feature1, feature2):
"""Generates scatter plot for two features."""
plt.figure(figsize=(8, 6))
plt.scatter(
df[feature1],
df[feature2],
c=(df["Anomaly_Label"] == "Anomaly"),
cmap="coolwarm",
edgecolor="k",
alpha=0.7
)
plt.title(f"Isolation Forest - {feature1} vs {feature2}")
plt.xlabel(feature1)
plt.ylabel(feature2)
plt.savefig("scatter_plot.png")
return "scatter_plot.png"
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Isolation Forest Anomaly Detection")
with gr.Tab("SHAP Summary"):
gr.Markdown("### Global Explainability: SHAP Summary Plot")
shap_button = gr.Button("Generate SHAP Summary Plot")
shap_image = gr.Image()
shap_button.click(get_shap_summary, outputs=shap_image)
with gr.Tab("SHAP Waterfall"):
gr.Markdown("### Local Explainability: SHAP Waterfall Plot")
index_input = gr.Number(label="Data Point Index", value=0)
shap_waterfall_button = gr.Button("Generate SHAP Waterfall Plot")
shap_waterfall_image = gr.Image()
shap_waterfall_button.click(get_shap_waterfall, inputs=index_input, outputs=shap_waterfall_image)
with gr.Tab("Feature Scatter Plot"):
gr.Markdown("### Feature Interaction: Scatter Plot")
feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1")
feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2")
scatter_button = gr.Button("Generate Scatter Plot")
scatter_image = gr.Image()
scatter_button.click(get_scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_image)
# Launch the Gradio app
demo.launch()