rtik007's picture
Update app.py
3d9e921 verified
raw
history blame
2.86 kB
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.ensemble import IsolationForest
import shap
import matplotlib.pyplot as plt
import gradio as gr
# Generate synthetic data with 20 features
np.random.seed(42)
X, _ = make_classification(
n_samples=500,
n_features=20,
n_informative=10,
n_redundant=5,
n_clusters_per_class=1,
random_state=42
)
outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
X = np.vstack([X, outliers])
# Convert to DataFrame
columns = [f"Feature{i+1}" for i in range(20)]
df = pd.DataFrame(X, columns=columns)
# Fit Isolation Forest
iso_forest = IsolationForest(
n_estimators=100,
max_samples=256,
contamination=0.1,
random_state=42
)
iso_forest.fit(df)
# Predict anomaly scores
anomaly_scores = iso_forest.decision_function(df) # Negative values indicate anomalies
anomaly_labels = iso_forest.predict(df) # -1 for anomaly, 1 for normal
# Add results to DataFrame
df["Anomaly_Score"] = anomaly_scores
df["Anomaly_Label"] = np.where(anomaly_labels == -1, "Anomaly", "Normal")
# SHAP Explainability
explainer = shap.Explainer(iso_forest, df[columns])
shap_values = explainer(df[columns])
# Define functions for Gradio
def get_anomaly_samples():
"""Returns formatted top, middle, and bottom 10 records based on anomaly score."""
sorted_df = df.sort_values("Anomaly_Score", ascending=False)
# Top 10 anomalies
top_10 = sorted_df[sorted_df["Anomaly_Label"] == "Anomaly"].head(10)
# Middle 10 (mixed records)
mid_start = len(sorted_df) // 2 - 5
middle_10 = sorted_df.iloc[mid_start: mid_start + 10]
# Bottom 10 normal records
bottom_10 = sorted_df[sorted_df["Anomaly_Label"] == "Normal"].tail(10)
return top_10, middle_10, bottom_10
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Isolation Forest Anomaly Detection")
with gr.Tab("Anomaly Samples"):
gr.Markdown("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Top 10 Records (Anomalies)</h3>", unsafe_allow_html=True)
top_table = gr.Dataframe(label="Top 10 Records")
gr.Markdown("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Middle 10 Records (Mixed)</h3>", unsafe_allow_html=True)
middle_table = gr.Dataframe(label="Middle 10 Records")
gr.Markdown("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Bottom 10 Records (Normal)</h3>", unsafe_allow_html=True)
bottom_table = gr.Dataframe(label="Bottom 10 Records")
anomaly_samples_button = gr.Button("Show Anomaly Samples")
anomaly_samples_button.click(
get_anomaly_samples,
outputs=[top_table, middle_table, bottom_table]
)
# Launch the Gradio app
demo.launch()