Spaces:
Build error
Build error
| import numpy as np | |
| import pandas as pd | |
| from sklearn.datasets import make_classification | |
| from sklearn.ensemble import IsolationForest | |
| import shap | |
| import matplotlib.pyplot as plt | |
| from itertools import combinations | |
| # Generate synthetic data with 20 features | |
| np.random.seed(42) | |
| X, _ = make_classification( | |
| n_samples=500, | |
| n_features=20, | |
| n_informative=10, | |
| n_redundant=5, | |
| n_clusters_per_class=1, | |
| random_state=42 | |
| ) | |
| outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers | |
| X = np.vstack([X, outliers]) | |
| # Convert to DataFrame | |
| columns = [f"Feature{i+1}" for i in range(20)] | |
| df = pd.DataFrame(X, columns=columns) | |
| # Fit Isolation Forest | |
| iso_forest = IsolationForest( | |
| n_estimators=100, | |
| max_samples=256, | |
| contamination=0.1, | |
| random_state=42 | |
| ) | |
| iso_forest.fit(df) | |
| # Predict anomaly scores | |
| anomaly_scores = iso_forest.decision_function(df) # Negative values indicate anomalies | |
| anomaly_labels = iso_forest.predict(df) # -1 for anomaly, 1 for normal | |
| # Add results to DataFrame | |
| df["Anomaly_Score"] = anomaly_scores | |
| df["Anomaly_Label"] = np.where(anomaly_labels == -1, "Anomaly", "Normal") | |
| # SHAP Explainability | |
| explainer = shap.Explainer(iso_forest, df[columns]) | |
| shap_values = explainer(df[columns]) | |
| # SHAP Summary Plot (Global Explainability) | |
| shap.summary_plot(shap_values, df[columns], feature_names=columns) | |
| # SHAP Waterfall Plot for a Specific Data Point (Local Explainability) | |
| specific_index = df[df["Anomaly_Label"] == "Anomaly"].index[0] # Select first anomaly | |
| shap.waterfall_plot( | |
| shap.Explanation( | |
| values=shap_values.values[specific_index], | |
| base_values=shap_values.base_values[specific_index], | |
| data=df.iloc[specific_index], | |
| feature_names=columns | |
| ) | |
| ) | |
| # Scatter plots for pairwise combinations of features | |
| feature_combinations = list(combinations(columns[:5], 2)) # Use first 5 features for simplicity | |
| for feature1, feature2 in feature_combinations: | |
| plt.figure(figsize=(8, 6)) | |
| plt.scatter( | |
| df[feature1], | |
| df[feature2], | |
| c=(df["Anomaly_Label"] == "Anomaly"), | |
| cmap="coolwarm", | |
| edgecolor="k", | |
| alpha=0.7 | |
| ) | |
| plt.title(f"Isolation Forest - {feature1} vs {feature2}") | |
| plt.xlabel(feature1) | |
| plt.ylabel(feature2) | |
| plt.show() | |