File size: 5,872 Bytes
077cbed
551187b
5555147
 
 
 
 
 
 
 
ab8a593
91d1b3e
ab8a593
91d1b3e
5555147
 
91d1b3e
f8721a9
ab8a593
5555147
 
 
 
 
 
 
 
 
e9e1584
 
f7681d1
 
ab8a593
 
91d1b3e
ab8a593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91d1b3e
 
 
 
 
 
 
 
 
 
 
 
b6e0d81
ab8a593
b6e0d81
 
 
 
 
ab8a593
b6e0d81
 
 
ab8a593
b6e0d81
91d1b3e
ab8a593
91d1b3e
b6e0d81
ab8a593
 
 
 
 
91d1b3e
 
b6e0d81
 
ab8a593
 
b6e0d81
 
91d1b3e
077cbed
ab8a593
91d1b3e
 
 
ab8a593
91d1b3e
 
992cdf4
91d1b3e
 
 
 
 
 
 
 
 
 
 
992cdf4
ab8a593
 
 
 
f8721a9
91d1b3e
 
 
 
 
 
 
 
f8721a9
ab8a593
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_blobs, make_moons
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import gradio as gr
import time

# Helper function: Prepare data
def prepare_data(input_data, n_samples, outliers_fraction=0.01):
    n_outliers = max(int(outliers_fraction * n_samples), 1)
    n_inliers = n_samples - n_outliers
    blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
    DATA_MAPPING = {
        "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
        "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
        "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
        "Moons": 4.0 * (make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] - np.array([0.5, 0.25])),
        "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
    }
    X = DATA_MAPPING[input_data]
    rng = np.random.RandomState(42)
    outliers = rng.uniform(low=-6, high=6, size=(n_outliers, 2))
    X = np.concatenate([X, outliers], axis=0)
    return X

# Autoencoder Anomaly Detection
def build_autoencoder(input_dim):
    model = Sequential([
        Dense(64, activation='relu', input_dim=input_dim),
        Dense(32, activation='relu'),
        Dense(64, activation='relu'),
        Dense(input_dim, activation='sigmoid'),
    ])
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    return model

def autoencoder_anomaly_detection(X, outliers_fraction=0.01, epochs=50):
    model = build_autoencoder(X.shape[1])
    model.fit(X, X, epochs=epochs, batch_size=32, verbose=0)
    reconstruction = model.predict(X)
    reconstruction_error = np.mean((X - reconstruction) ** 2, axis=1)
    threshold = np.percentile(reconstruction_error, 100 * (1 - outliers_fraction))
    y_pred = (reconstruction_error > threshold).astype(int)
    return y_pred

# Function to generate scatter plots
def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
    X = prepare_data(input_data, n_samples)
    plt.figure(figsize=(6, 6))
    plt.scatter(X[:, 0], X[:, 1], alpha=0.8, c="blue", s=20, label="Data Points")
    plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
    plt.xlabel(feature_x)
    plt.ylabel(feature_y)
    plt.legend()
    return plt.gcf()

# Function to train models and generate comparison plots
def train_models(input_data, outliers_fraction, n_samples, clf_name):
    X = prepare_data(input_data, n_samples, outliers_fraction)
    NAME_CLF_MAPPING = {
        "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
        "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
        "One-Class SVM (SGD)": make_pipeline(
            Nystroem(gamma=0.1, random_state=42, n_components=150),
            SGDOneClassSVM(nu=outliers_fraction, random_state=42)
        ),
        "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
        "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
        "Autoencoders": autoencoder_anomaly_detection(X, outliers_fraction)
    }
    clf = NAME_CLF_MAPPING.get(clf_name, None)
    if clf_name == "Autoencoders":
        y_pred = clf
    else:
        if clf_name == "Local Outlier Factor":
            y_pred = clf.fit_predict(X)
        else:
            clf.fit(X)
            y_pred = clf.predict(X)

    # Plot results
    plt.figure(figsize=(5, 5))
    colors = np.array(["#377eb8", "#ff7f00"])
    plt.scatter(X[:, 0], X[:, 1], c=colors[(y_pred + 1) // 2], s=20)
    plt.title(clf_name)
    return plt.gcf()

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## Anomaly Detection Comparison App")
    
    # Interactive Scatter Plot
    gr.Markdown("### Interactive Feature Scatter Plot")
    input_data = gr.Radio(choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"], value="Moons", label="Dataset")
    feature_x = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature1", label="Feature 1")
    feature_y = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature2", label="Feature 2")
    n_samples = gr.Slider(minimum=10, maximum=10000, step=100, value=500, label="Number of Samples")
    scatter_plot_button = gr.Button("Generate Scatter Plot")
    scatter_plot = gr.Plot(label="Feature Scatter Plot")

    scatter_plot_button.click(
        fn=plot_interactive_feature_scatter,
        inputs=[input_data, feature_x, feature_y, n_samples],
        outputs=scatter_plot,
    )
    
    # Compare Anomaly Detection Algorithms
    gr.Markdown("### Compare Anomaly Detection Algorithms")
    outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.01, value=0.01, label="Fraction of Outliers")
    input_models = gr.Radio(
        choices=["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor", "Autoencoders"],
        value="Isolation Forest",
        label="Select Model"
    )
    comparison_plot = gr.Plot(label="Model Comparison Results")
    generate_comparison_plot = gr.Button("Generate Comparison Plot")

    generate_comparison_plot.click(
        fn=train_models,
        inputs=[input_data, outliers_fraction, n_samples, input_models],
        outputs=comparison_plot,
    )

demo.launch()