Spaces:
Build error
Build error
| import numpy as np | |
| import pandas as pd | |
| from sklearn.datasets import make_classification | |
| from sklearn.ensemble import IsolationForest | |
| from sklearn.metrics import roc_curve, auc | |
| import shap | |
| import matplotlib.pyplot as plt | |
| import gradio as gr | |
| # Generate synthetic data with 20 features | |
| np.random.seed(42) | |
| X, _ = make_classification( | |
| n_samples=500, | |
| n_features=20, | |
| n_informative=10, | |
| n_redundant=5, | |
| n_clusters_per_class=1, | |
| random_state=42 | |
| ) | |
| outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers | |
| X = np.vstack([X, outliers]) | |
| # Convert to DataFrame | |
| columns = [f"Feature{i+1}" for i in range(20)] | |
| df = pd.DataFrame(X, columns=columns) | |
| # Fit Isolation Forest | |
| iso_forest = IsolationForest( | |
| n_estimators=100, | |
| max_samples=256, | |
| contamination=0.1, | |
| random_state=42 | |
| ) | |
| iso_forest.fit(df) | |
| # Predict anomaly scores | |
| anomaly_scores = iso_forest.decision_function(df) # Negative values indicate anomalies | |
| anomaly_labels = iso_forest.predict(df) # -1 for anomaly, 1 for normal | |
| # Add results to DataFrame | |
| df["Anomaly_Score"] = anomaly_scores | |
| df["Anomaly_Label"] = np.where(anomaly_labels == -1, "Anomaly", "Normal") | |
| # Generate true labels (1 for anomaly, 0 for normal) for ROC curve | |
| true_labels = np.where(df["Anomaly_Label"] == "Anomaly", 1, 0) | |
| # SHAP Explainability | |
| explainer = shap.Explainer(iso_forest, df[columns]) | |
| shap_values = explainer(df[columns]) | |
| # Define functions for Gradio | |
| def get_roc_curve(): | |
| """Generates the ROC curve plot.""" | |
| fpr, tpr, _ = roc_curve(true_labels, -df["Anomaly_Score"]) # Use -scores as higher scores mean normal | |
| roc_auc = auc(fpr, tpr) | |
| plt.figure(figsize=(8, 6)) | |
| plt.plot(fpr, tpr, label=f"ROC Curve (AUC = {roc_auc:.2f})") | |
| plt.plot([0, 1], [0, 1], "k--", label="Random Guess") | |
| plt.xlabel("False Positive Rate") | |
| plt.ylabel("True Positive Rate") | |
| plt.title("Receiver Operating Characteristic (ROC) Curve") | |
| plt.legend(loc="lower right") | |
| plt.grid() | |
| plt.savefig("roc_curve.png") | |
| return "roc_curve.png" | |
| def get_anomaly_samples(): | |
| """Returns formatted top, middle, and bottom 10 records based on anomaly score.""" | |
| sorted_df = df.sort_values("Anomaly_Score", ascending=False) | |
| # Top 10 anomalies | |
| top_10 = sorted_df[sorted_df["Anomaly_Label"] == "Anomaly"].head(10) | |
| # Middle 10 (mixed records) | |
| mid_start = len(sorted_df) // 2 - 5 | |
| middle_10 = sorted_df.iloc[mid_start: mid_start + 10] | |
| # Bottom 10 normal records | |
| bottom_10 = sorted_df[sorted_df["Anomaly_Label"] == "Normal"].tail(10) | |
| return top_10, middle_10, bottom_10 | |
| # Create Gradio interface | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Isolation Forest Anomaly Detection") | |
| with gr.Tab("Anomaly Samples"): | |
| gr.HTML("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Top 10 Records (Anomalies)</h3>") | |
| top_table = gr.Dataframe(label="Top 10 Records") | |
| gr.HTML("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Middle 10 Records (Mixed)</h3>") | |
| middle_table = gr.Dataframe(label="Middle 10 Records") | |
| gr.HTML("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Bottom 10 Records (Normal)</h3>") | |
| bottom_table = gr.Dataframe(label="Bottom 10 Records") | |
| anomaly_samples_button = gr.Button("Show Anomaly Samples") | |
| anomaly_samples_button.click( | |
| get_anomaly_samples, | |
| outputs=[top_table, middle_table, bottom_table] | |
| ) | |
| with gr.Tab("ROC Curve"): | |
| gr.Markdown("### ROC Curve for Isolation Forest") | |
| roc_button = gr.Button("Generate ROC Curve") | |
| roc_image = gr.Image() | |
| roc_button.click(get_roc_curve, outputs=roc_image) | |
| # Launch the Gradio app | |
| demo.launch() | |