Spaces:
Build error
Build error
| import gradio as gr | |
| import h2o | |
| from h2o.estimators import H2OIsolationForestEstimator | |
| import pandas as pd | |
| import numpy as np | |
| import shap | |
| import matplotlib.pyplot as plt | |
| from sklearn.datasets import make_classification | |
| from itertools import combinations | |
| # Initialize H2O | |
| h2o.init() | |
| # Generate synthetic data with 20 features | |
| np.random.seed(42) | |
| X, _ = make_classification( | |
| n_samples=500, | |
| n_features=20, | |
| n_informative=10, | |
| n_redundant=5, | |
| n_clusters_per_class=1, | |
| random_state=42 | |
| ) | |
| outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers | |
| X = np.vstack([X, outliers]) | |
| # Convert to H2O Frame | |
| columns = [f"Feature{i+1}" for i in range(20)] | |
| df = pd.DataFrame(X, columns=columns) | |
| h2o_df = h2o.H2OFrame(df) | |
| # Fit H2O Isolation Forest | |
| iso_forest = H2OIsolationForestEstimator( | |
| ntrees=100, | |
| max_depth=8, | |
| sample_size=256, | |
| seed=42, | |
| contamination=0.1 | |
| ) | |
| iso_forest.train(training_frame=h2o_df) | |
| # Predict anomaly scores | |
| predictions = iso_forest.predict(h2o_df) | |
| pred_df = predictions.as_data_frame() | |
| df["Anomaly_Score"] = pred_df["score"] | |
| df["Anomaly_Label"] = pred_df["predict"].map({0: "Normal", 1: "Anomaly"}) | |
| # Define SHAP explainer | |
| explainer = shap.Explainer( | |
| lambda x: iso_forest.predict(h2o.H2OFrame(x)).as_data_frame()[["score", "predict"]], | |
| df[columns] | |
| ) | |
| # Helper function for SHAP summary plot | |
| def shap_summary(): | |
| shap_values = explainer(df[columns]) | |
| plt.figure() | |
| shap.summary_plot(shap_values, df[columns], feature_names=columns, show=False) | |
| plt.savefig("shap_summary.png") | |
| return "shap_summary.png" | |
| # Helper function for SHAP waterfall plot | |
| def shap_waterfall(index): | |
| shap_values = explainer(df[columns]) | |
| plt.figure() | |
| shap.waterfall_plot(shap.Explanation( | |
| values=shap_values.values[int(index)], | |
| base_values=shap_values.base_values[int(index)], | |
| data=df.iloc[int(index)], | |
| feature_names=columns | |
| )) | |
| plt.savefig("shap_waterfall.png") | |
| return "shap_waterfall.png" | |
| # Helper function for scatter plot | |
| def scatter_plot(feature1, feature2): | |
| plt.figure(figsize=(8, 6)) | |
| plt.scatter( | |
| df[feature1], | |
| df[feature2], | |
| c=(df["Anomaly_Label"] == "Anomaly"), | |
| cmap="coolwarm", | |
| edgecolor="k", | |
| alpha=0.7 | |
| ) | |
| plt.title(f"Isolation Forest - {feature1} vs {feature2}") | |
| plt.xlabel(feature1) | |
| plt.ylabel(feature2) | |
| plt.savefig("scatter_plot.png") | |
| return "scatter_plot.png" | |
| # Gradio app | |
| with gr.Blocks() as app: | |
| gr.Markdown("# Anomaly Detection with Isolation Forest") | |
| with gr.Tab("SHAP Summary Plot"): | |
| gr.Markdown("Global explainability using SHAP summary plot.") | |
| shap_summary_button = gr.Button("Generate SHAP Summary") | |
| shap_summary_image = gr.Image() | |
| shap_summary_button.click(fn=shap_summary, outputs=shap_summary_image) | |
| with gr.Tab("SHAP Waterfall Plot"): | |
| gr.Markdown("Local explainability for a specific data point.") | |
| index_input = gr.Number(label="Data Point Index", value=0) | |
| shap_waterfall_button = gr.Button("Generate SHAP Waterfall") | |
| shap_waterfall_image = gr.Image() | |
| shap_waterfall_button.click(fn=shap_waterfall, inputs=index_input, outputs=shap_waterfall_image) | |
| with gr.Tab("Scatter Plot"): | |
| gr.Markdown("Visualize pairwise feature interactions.") | |
| feature1_dropdown = gr.Dropdown(choices=columns, label="Feature 1") | |
| feature2_dropdown = gr.Dropdown(choices=columns, label="Feature 2") | |
| scatter_plot_button = gr.Button("Generate Scatter Plot") | |
| scatter_plot_image = gr.Image() | |
| scatter_plot_button.click(fn=scatter_plot, inputs=[feature1_dropdown, feature2_dropdown], outputs=scatter_plot_image) | |
| # Launch the app | |
| app.launch() | |