cathrica
/

deep-learning-project

ml-intern

Model card Files Files and versions

xet

Community

cathrica commited on Apr 29

Commit

d1e780d

verified ·

1 Parent(s): 9ea3b1c

Add SHAP + LIME explainability analysis

Browse files

Files changed (1) hide show

explainability/shap_analysis.py +243 -0

explainability/shap_analysis.py ADDED Viewed

	@@ -0,0 +1,243 @@

+"""
+SHAP and LIME explainability analysis for trained IDS models.
+"""
+import os
+import sys
+import json
+import numpy as np
+import torch
+import shap
+from lime import lime_tabular
+import matplotlib
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from models.mlp_baseline import MLP_IDS
+from models.lstm_model import LSTM_IDS
+from models.cnn1d_model import CNN1D_IDS
+from data.preprocess import load_preprocessed, FEATURE_NAMES
+SEED = 42
+np.random.seed(SEED)
+torch.manual_seed(SEED)
+DEVICE = torch.device('cpu')  # SHAP works best on CPU for these models
+RESULTS_DIR = 'results'
+MODELS_DIR = 'saved_models'
+N_BACKGROUND = 100   # Background samples for SHAP
+N_EXPLAIN = 200      # Samples to explain
+def load_model(model_class, model_name, num_classes=2):
+    """Load trained model."""
+    model = model_class(in_dim=41, num_classes=num_classes)
+    model.load_state_dict(torch.load(
+        os.path.join(MODELS_DIR, f'{model_name}_best.pt'),
+        weights_only=True, map_location='cpu'
+    ))
+    model.eval()
+    return model
+def model_predict_fn(model, X):
+    """Wrapper for LIME compatibility — returns probabilities."""
+    with torch.no_grad():
+        tensor = torch.FloatTensor(X).to(DEVICE)
+        logits = model(tensor)
+        probs = torch.softmax(logits, dim=1).numpy()
+    return probs
+def run_shap_analysis(model, model_name, X_train, X_test, class_names):
+    """Compute SHAP values using KernelExplainer (model-agnostic)."""
+    print(f"\n--- SHAP Analysis: {model_name} ---")
+    # Background data
+    bg_idx = np.random.choice(len(X_train), N_BACKGROUND, replace=False)
+    background = X_train[bg_idx]
+    # Samples to explain
+    exp_idx = np.random.choice(len(X_test), N_EXPLAIN, replace=False)
+    X_explain = X_test[exp_idx]
+    # Create predict function
+    def predict_fn(X):
+        return model_predict_fn(model, X)
+    # KernelExplainer (model-agnostic, works for all architectures)
+    explainer = shap.KernelExplainer(predict_fn, background)
+    print(f"  Computing SHAP values for {N_EXPLAIN} samples...")
+    shap_values = explainer.shap_values(X_explain, nsamples=200, silent=True)
+    # --- Global Feature Importance ---
+    mean_abs_shap = np.abs(shap_values[0]).mean(axis=0)
+    feature_importance = list(zip(FEATURE_NAMES, mean_abs_shap))
+    feature_importance.sort(key=lambda x: x[1], reverse=True)
+    print(f"\n  Top 10 features (by mean |SHAP| for {class_names[0]}):")
+    for fname, imp in feature_importance[:10]:
+        print(f"    {fname:35s}: {imp:.4f}")
+    # --- Save SHAP summary plot ---
+    os.makedirs(RESULTS_DIR, exist_ok=True)
+    plt.figure(figsize=(10, 8))
+    shap.summary_plot(shap_values[0], X_explain, feature_names=FEATURE_NAMES,
+                      show=False, max_display=15)
+    plt.title(f'SHAP Feature Importance - {model_name.upper()} ({class_names[0]})')
+    plt.tight_layout()
+    plt.savefig(os.path.join(RESULTS_DIR, f'shap_summary_{model_name}.png'), dpi=150)
+    plt.close()
+    # --- Save bar plot ---
+    plt.figure(figsize=(10, 6))
+    top_features = feature_importance[:15]
+    names = [f[0] for f in top_features]
+    values = [f[1] for f in top_features]
+    plt.barh(range(len(names)), values[::-1], color='steelblue')
+    plt.yticks(range(len(names)), names[::-1])
+    plt.xlabel('Mean |SHAP value|')
+    plt.title(f'Top 15 Features - {model_name.upper()}')
+    plt.tight_layout()
+    plt.savefig(os.path.join(RESULTS_DIR, f'shap_bar_{model_name}.png'), dpi=150)
+    plt.close()
+    return shap_values, feature_importance, exp_idx
+def run_lime_analysis(model, model_name, X_train, X_test, class_names, n_instances=20):
+    """Run LIME on a subset of test samples."""
+    print(f"\n--- LIME Analysis: {model_name} ---")
+    def predict_fn(X):
+        return model_predict_fn(model, X)
+    explainer = lime_tabular.LimeTabularExplainer(
+        X_train,
+        feature_names=FEATURE_NAMES,
+        class_names=class_names,
+        discretize_continuous=True,
+        random_state=SEED
+    )
+    lime_results = []
+    all_top_features = {}
+    idx_to_explain = np.random.choice(len(X_test), n_instances, replace=False)
+    for i, idx in enumerate(idx_to_explain):
+        sample = X_test[idx]
+        exp = explainer.explain_instance(sample, predict_fn, num_features=10, top_labels=1)
+        pred_class = np.argmax(predict_fn(sample.reshape(1, -1)))
+        feature_weights = exp.as_list(label=pred_class)
+        lime_results.append({
+            'sample_idx': int(idx),
+            'predicted_class': class_names[pred_class],
+            'top_features': [(fw[0], float(fw[1])) for fw in feature_weights]
+        })
+        for fw in feature_weights:
+            fname = fw[0].split(' ')[0]
+            all_top_features[fname] = all_top_features.get(fname, 0) + 1
+        if (i + 1) % 5 == 0:
+            print(f"  Explained {i+1}/{n_instances} samples")
+    sorted_features = sorted(all_top_features.items(), key=lambda x: x[1], reverse=True)
+    print(f"\n  Top features by LIME frequency ({n_instances} samples):")
+    for fname, count in sorted_features[:10]:
+        print(f"    {fname:35s}: appears in {count}/{n_instances} explanations")
+    # Save LIME feature frequency plot
+    plt.figure(figsize=(10, 6))
+    top_lime = sorted_features[:15]
+    names = [f[0] for f in top_lime]
+    counts = [f[1] for f in top_lime]
+    plt.barh(range(len(names)), counts[::-1], color='coral')
+    plt.yticks(range(len(names)), names[::-1])
+    plt.xlabel(f'Frequency in top-10 (out of {n_instances} samples)')
+    plt.title(f'LIME Top Features - {model_name.upper()}')
+    plt.tight_layout()
+    plt.savefig(os.path.join(RESULTS_DIR, f'lime_frequency_{model_name}.png'), dpi=150)
+    plt.close()
+    return lime_results, sorted_features
+def compare_shap_lime(shap_importance, lime_frequency, model_name):
+    """Compare SHAP vs LIME feature rankings."""
+    from scipy.stats import spearmanr
+    shap_features = {f: i for i, (f, _) in enumerate(shap_importance[:20])}
+    lime_features = {f: i for i, (f, _) in enumerate(lime_frequency[:20])}
+    common = set(shap_features.keys()) & set(lime_features.keys())
+    if len(common) >= 5:
+        shap_ranks = [shap_features[f] for f in common]
+        lime_ranks = [lime_features[f] for f in common]
+        corr, p_value = spearmanr(shap_ranks, lime_ranks)
+        print(f"\n  SHAP vs LIME rank correlation ({model_name}):")
+        print(f"    Common features in top-20: {len(common)}")
+        print(f"    Spearman correlation: {corr:.4f} (p={p_value:.4f})")
+        return {'spearman_corr': float(corr), 'p_value': float(p_value),
+                'n_common': len(common)}
+    else:
+        print(f"  Too few common features ({len(common)}) for correlation")
+        return {'n_common': len(common)}
+def main():
+    X_train, X_test, y_train, y_test, le, scaler, meta = load_preprocessed()
+    class_names = meta['class_names']
+    print(f"Data loaded: {X_train.shape} train, {X_test.shape} test")
+    print(f"Classes: {class_names}")
+    all_xai_results = {}
+    models_to_analyze = [
+        ('mlp', MLP_IDS),
+        ('lstm', LSTM_IDS),
+        ('cnn1d', CNN1D_IDS),
+    ]
+    for model_name, model_class in models_to_analyze:
+        model_path = os.path.join(MODELS_DIR, f'{model_name}_best.pt')
+        if not os.path.exists(model_path):
+            print(f"  Skipping {model_name} - no saved model found")
+            continue
+        model = load_model(model_class, model_name, num_classes=len(class_names))
+        shap_vals, shap_importance, exp_idx = run_shap_analysis(
+            model, model_name, X_train, X_test, class_names
+        )
+        lime_results, lime_frequency = run_lime_analysis(
+            model, model_name, X_train, X_test, class_names, n_instances=30
+        )
+        comparison = compare_shap_lime(shap_importance, lime_frequency, model_name)
+        all_xai_results[model_name] = {
+            'shap_top_features': [(f, float(v)) for f, v in shap_importance[:15]],
+            'lime_top_features': [(f, int(v)) for f, v in lime_frequency[:15]],
+            'shap_vs_lime': comparison,
+        }
+    with open(os.path.join(RESULTS_DIR, 'explainability_results.json'), 'w') as f:
+        json.dump(all_xai_results, f, indent=2)
+    print(f"\nExplainability analysis complete!")
+    print(f"Results saved to {RESULTS_DIR}/")
+if __name__ == '__main__':
+    main()