Spaces:
Sleeping
Sleeping
File size: 5,872 Bytes
077cbed 551187b 5555147 ab8a593 91d1b3e ab8a593 91d1b3e 5555147 91d1b3e f8721a9 ab8a593 5555147 e9e1584 f7681d1 ab8a593 91d1b3e ab8a593 91d1b3e b6e0d81 ab8a593 b6e0d81 ab8a593 b6e0d81 ab8a593 b6e0d81 91d1b3e ab8a593 91d1b3e b6e0d81 ab8a593 91d1b3e b6e0d81 ab8a593 b6e0d81 91d1b3e 077cbed ab8a593 91d1b3e ab8a593 91d1b3e 992cdf4 91d1b3e 992cdf4 ab8a593 f8721a9 91d1b3e f8721a9 ab8a593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.covariance import EllipticEnvelope
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.linear_model import SGDOneClassSVM
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_pipeline
from sklearn.datasets import make_blobs, make_moons
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import gradio as gr
import time
# Helper function: Prepare data
def prepare_data(input_data, n_samples, outliers_fraction=0.01):
n_outliers = max(int(outliers_fraction * n_samples), 1)
n_inliers = n_samples - n_outliers
blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
DATA_MAPPING = {
"Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
"Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
"Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
"Moons": 4.0 * (make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] - np.array([0.5, 0.25])),
"Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
}
X = DATA_MAPPING[input_data]
rng = np.random.RandomState(42)
outliers = rng.uniform(low=-6, high=6, size=(n_outliers, 2))
X = np.concatenate([X, outliers], axis=0)
return X
# Autoencoder Anomaly Detection
def build_autoencoder(input_dim):
model = Sequential([
Dense(64, activation='relu', input_dim=input_dim),
Dense(32, activation='relu'),
Dense(64, activation='relu'),
Dense(input_dim, activation='sigmoid'),
])
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
return model
def autoencoder_anomaly_detection(X, outliers_fraction=0.01, epochs=50):
model = build_autoencoder(X.shape[1])
model.fit(X, X, epochs=epochs, batch_size=32, verbose=0)
reconstruction = model.predict(X)
reconstruction_error = np.mean((X - reconstruction) ** 2, axis=1)
threshold = np.percentile(reconstruction_error, 100 * (1 - outliers_fraction))
y_pred = (reconstruction_error > threshold).astype(int)
return y_pred
# Function to generate scatter plots
def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
X = prepare_data(input_data, n_samples)
plt.figure(figsize=(6, 6))
plt.scatter(X[:, 0], X[:, 1], alpha=0.8, c="blue", s=20, label="Data Points")
plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
plt.xlabel(feature_x)
plt.ylabel(feature_y)
plt.legend()
return plt.gcf()
# Function to train models and generate comparison plots
def train_models(input_data, outliers_fraction, n_samples, clf_name):
X = prepare_data(input_data, n_samples, outliers_fraction)
NAME_CLF_MAPPING = {
"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
"One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
"One-Class SVM (SGD)": make_pipeline(
Nystroem(gamma=0.1, random_state=42, n_components=150),
SGDOneClassSVM(nu=outliers_fraction, random_state=42)
),
"Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
"Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
"Autoencoders": autoencoder_anomaly_detection(X, outliers_fraction)
}
clf = NAME_CLF_MAPPING.get(clf_name, None)
if clf_name == "Autoencoders":
y_pred = clf
else:
if clf_name == "Local Outlier Factor":
y_pred = clf.fit_predict(X)
else:
clf.fit(X)
y_pred = clf.predict(X)
# Plot results
plt.figure(figsize=(5, 5))
colors = np.array(["#377eb8", "#ff7f00"])
plt.scatter(X[:, 0], X[:, 1], c=colors[(y_pred + 1) // 2], s=20)
plt.title(clf_name)
return plt.gcf()
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## Anomaly Detection Comparison App")
# Interactive Scatter Plot
gr.Markdown("### Interactive Feature Scatter Plot")
input_data = gr.Radio(choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"], value="Moons", label="Dataset")
feature_x = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature1", label="Feature 1")
feature_y = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature2", label="Feature 2")
n_samples = gr.Slider(minimum=10, maximum=10000, step=100, value=500, label="Number of Samples")
scatter_plot_button = gr.Button("Generate Scatter Plot")
scatter_plot = gr.Plot(label="Feature Scatter Plot")
scatter_plot_button.click(
fn=plot_interactive_feature_scatter,
inputs=[input_data, feature_x, feature_y, n_samples],
outputs=scatter_plot,
)
# Compare Anomaly Detection Algorithms
gr.Markdown("### Compare Anomaly Detection Algorithms")
outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.01, value=0.01, label="Fraction of Outliers")
input_models = gr.Radio(
choices=["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor", "Autoencoders"],
value="Isolation Forest",
label="Select Model"
)
comparison_plot = gr.Plot(label="Model Comparison Results")
generate_comparison_plot = gr.Button("Generate Comparison Plot")
generate_comparison_plot.click(
fn=train_models,
inputs=[input_data, outliers_fraction, n_samples, input_models],
outputs=comparison_plot,
)
demo.launch()
|