import numpy as np import pandas as pd from sklearn.datasets import make_classification from sklearn.ensemble import IsolationForest import shap import matplotlib.pyplot as plt import gradio as gr # Generate synthetic data with 20 features np.random.seed(42) X, _ = make_classification( n_samples=500, n_features=20, n_informative=10, n_redundant=5, n_clusters_per_class=1, random_state=42 ) outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers X = np.vstack([X, outliers]) # Convert to DataFrame columns = [f"Feature{i+1}" for i in range(20)] df = pd.DataFrame(X, columns=columns) # Fit Isolation Forest iso_forest = IsolationForest( n_estimators=100, max_samples=256, contamination=0.1, random_state=42 ) iso_forest.fit(df) # Predict anomaly scores anomaly_scores = iso_forest.decision_function(df) # Negative values indicate anomalies anomaly_labels = iso_forest.predict(df) # -1 for anomaly, 1 for normal # Add results to DataFrame df["Anomaly_Score"] = anomaly_scores df["Anomaly_Label"] = np.where(anomaly_labels == -1, "Anomaly", "Normal") # SHAP Explainability explainer = shap.Explainer(iso_forest, df[columns]) shap_values = explainer(df[columns]) # Define functions for Gradio def get_anomaly_samples(): """Returns formatted top, middle, and bottom 10 records based on anomaly score.""" sorted_df = df.sort_values("Anomaly_Score", ascending=False) # Top 10 anomalies top_10 = sorted_df[sorted_df["Anomaly_Label"] == "Anomaly"].head(10) # Middle 10 (mixed records) mid_start = len(sorted_df) // 2 - 5 middle_10 = sorted_df.iloc[mid_start: mid_start + 10] # Bottom 10 normal records bottom_10 = sorted_df[sorted_df["Anomaly_Label"] == "Normal"].tail(10) return top_10, middle_10, bottom_10 # Create Gradio interface with gr.Blocks() as demo: gr.Markdown("# Isolation Forest Anomaly Detection") with gr.Tab("Anomaly Samples"): gr.Markdown("

Top 10 Records (Anomalies)

", unsafe_allow_html=True) top_table = gr.Dataframe(label="Top 10 Records") gr.Markdown("

Middle 10 Records (Mixed)

", unsafe_allow_html=True) middle_table = gr.Dataframe(label="Middle 10 Records") gr.Markdown("

Bottom 10 Records (Normal)

", unsafe_allow_html=True) bottom_table = gr.Dataframe(label="Bottom 10 Records") anomaly_samples_button = gr.Button("Show Anomaly Samples") anomaly_samples_button.click( get_anomaly_samples, outputs=[top_table, middle_table, bottom_table] ) # Launch the Gradio app demo.launch()