rtik007 commited on
Commit
551187b
·
verified ·
1 Parent(s): 077cbed

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -53
app.py CHANGED
@@ -1,29 +1,45 @@
1
  import numpy as np
2
- import pandas as pd
3
- from sklearn.datasets import make_classification, make_blobs, make_moons
4
- from sklearn.ensemble import IsolationForest
 
 
 
 
 
 
 
 
 
 
 
5
  from sklearn.covariance import EllipticEnvelope
 
6
  from sklearn.neighbors import LocalOutlierFactor
7
  from sklearn.linear_model import SGDOneClassSVM
8
- from sklearn.pipeline import make_pipeline
9
  from sklearn.kernel_approximation import Nystroem
10
- from sklearn import svm
11
- import matplotlib.pyplot as plt
12
- import gradio as gr
13
- import time
14
- from functools import partial
 
 
 
 
15
 
 
 
 
16
 
17
- # Function to train and visualize anomaly detection models
18
- def train_models(input_data, outliers_fraction, n_samples, clf_name):
19
- """Train anomaly detection models and plot results."""
20
  n_outliers = int(outliers_fraction * n_samples)
21
  n_inliers = n_samples - n_outliers
22
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
23
- NAME_CLF_MAPPING = {
24
- "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
25
- "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
26
- "One-Class SVM (SGD)": make_pipeline(
27
  Nystroem(gamma=0.1, random_state=42, n_components=150),
28
  SGDOneClassSVM(
29
  nu=outliers_fraction,
@@ -35,76 +51,110 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
35
  ),
36
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
37
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
38
- }
39
  DATA_MAPPING = {
40
- "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
41
- "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
42
- "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
43
- "Moons": 4.0
44
- * (make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] - np.array([0.5, 0.25])),
45
- "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
 
 
 
46
  }
 
 
 
 
 
 
 
 
 
 
 
 
47
  xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
48
  clf = NAME_CLF_MAPPING[clf_name]
49
- plt.figure(figsize=(10, 8))
50
- X = DATA_MAPPING[input_data]
51
- rng = np.random.RandomState(42)
52
- X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
53
 
54
- # Convert X to DataFrame if using IsolationForest to ensure feature names
55
- if clf_name == "Isolation Forest":
56
- X = pd.DataFrame(X, columns=["Feature1", "Feature2"])
57
 
 
 
 
 
 
58
  t0 = time.time()
59
  clf.fit(X)
60
  t1 = time.time()
61
-
62
  if clf_name == "Local Outlier Factor":
63
  y_pred = clf.fit_predict(X)
64
  else:
65
  y_pred = clf.fit(X).predict(X)
66
 
67
- if clf_name != "Local Outlier Factor":
 
68
  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
69
  Z = Z.reshape(xx.shape)
70
- plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
71
 
72
  colors = np.array(["#377eb8", "#ff7f00"])
73
- plt.scatter(X.iloc[:, 0], X.iloc[:, 1], s=30, color=colors[(y_pred + 1) // 2])
74
 
75
  plt.xlim(-7, 7)
76
  plt.ylim(-7, 7)
77
  plt.xticks(())
78
  plt.yticks(())
79
- plt.title(f"{clf_name} (time: {t1 - t0:.2f}s)")
 
 
 
 
 
 
 
 
 
80
  return plt
81
 
 
82
 
83
- # Gradio interface setup
 
 
 
 
 
 
 
 
84
  with gr.Blocks() as demo:
85
- gr.Markdown("# Anomaly Detection Algorithms Comparison")
 
86
 
87
- input_models = [
88
- "Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"
89
- ]
90
  input_data = gr.Radio(
91
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
92
- value="Moons",
93
- label="Dataset Type"
94
- )
95
- n_samples = gr.Slider(
96
- minimum=100, maximum=500, step=25, value=300, label="Number of Samples"
97
- )
98
- outliers_fraction = gr.Slider(
99
- minimum=0.1, maximum=0.9, step=0.1, value=0.2, label="Outlier Fraction"
100
  )
 
 
 
 
 
 
 
 
101
 
102
- for clf_name in input_models:
103
- plot = gr.Plot(label=clf_name)
104
- fn = partial(train_models, clf_name=clf_name)
105
  input_data.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
106
  n_samples.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
107
  outliers_fraction.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
 
108
 
109
- # Launch the app
110
- demo.launch()
 
1
  import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from threading import Thread
4
+ from matplotlib.colors import ListedColormap
5
+ from sklearn.datasets import make_moons, make_circles, make_classification
6
+ from sklearn.datasets import make_blobs, make_circles, make_moons
7
+ import gradio as gr
8
+ import math
9
+ from functools import partial
10
+ import time
11
+
12
+ import matplotlib
13
+
14
+ from sklearn import svm
15
+ from sklearn.datasets import make_moons, make_blobs
16
  from sklearn.covariance import EllipticEnvelope
17
+ from sklearn.ensemble import IsolationForest
18
  from sklearn.neighbors import LocalOutlierFactor
19
  from sklearn.linear_model import SGDOneClassSVM
 
20
  from sklearn.kernel_approximation import Nystroem
21
+ from sklearn.pipeline import make_pipeline
22
+
23
+ def get_groundtruth_model(X, labels):
24
+ # dummy model to show true label distribution
25
+ class Dummy:
26
+ def __init__(self, y):
27
+ self.labels_ = labels
28
+
29
+ return Dummy(labels)
30
 
31
+ #### PLOT
32
+ FIGSIZE = 10,10
33
+ figure = plt.figure(figsize=(25, 10))
34
 
35
+
36
+ def train_models(input_data, outliers_fraction, n_samples, clf_name):
 
37
  n_outliers = int(outliers_fraction * n_samples)
38
  n_inliers = n_samples - n_outliers
39
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
40
+ NAME_CLF_MAPPING = {"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
41
+ "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
42
+ "One-Class SVM (SGD)":make_pipeline(
 
43
  Nystroem(gamma=0.1, random_state=42, n_components=150),
44
  SGDOneClassSVM(
45
  nu=outliers_fraction,
 
51
  ),
52
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
53
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
54
+ }
55
  DATA_MAPPING = {
56
+ "Central Blob":make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
57
+ "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
58
+ "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
59
+ "Moons": 4.0
60
+ * (
61
+ make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
62
+ - np.array([0.5, 0.25])
63
+ ),
64
+ "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
65
  }
66
+ DATASETS = [
67
+ make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
68
+ make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
69
+ make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
70
+ 4.0
71
+ * (
72
+ make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
73
+ - np.array([0.5, 0.25])
74
+ ),
75
+ 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
76
+ ]
77
+
78
  xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
79
  clf = NAME_CLF_MAPPING[clf_name]
80
+ plt.figure(figsize=(len(NAME_CLF_MAPPING) * 2 + 4, 12.5))
 
 
 
81
 
 
 
 
82
 
83
+ plot_num = 1
84
+ rng = np.random.RandomState(42)
85
+ X = DATA_MAPPING[input_data]
86
+ X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
87
+
88
  t0 = time.time()
89
  clf.fit(X)
90
  t1 = time.time()
91
+ # fit the data and tag outliers
92
  if clf_name == "Local Outlier Factor":
93
  y_pred = clf.fit_predict(X)
94
  else:
95
  y_pred = clf.fit(X).predict(X)
96
 
97
+ # plot the levels lines and the points
98
+ if clf_name != "Local Outlier Factor":
99
  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
100
  Z = Z.reshape(xx.shape)
101
+ plt.contour(xx, yy, Z, levels=[0], linewidths=10, colors="black")
102
 
103
  colors = np.array(["#377eb8", "#ff7f00"])
104
+ plt.scatter(X[:, 0], X[:, 1], s=100, color=colors[(y_pred + 1) // 2])
105
 
106
  plt.xlim(-7, 7)
107
  plt.ylim(-7, 7)
108
  plt.xticks(())
109
  plt.yticks(())
110
+ plt.text(
111
+ 0.99,
112
+ 0.01,
113
+ ("%.2fs" % (t1 - t0)).lstrip("0"),
114
+ transform=plt.gca().transAxes,
115
+ size=60,
116
+ horizontalalignment="right",
117
+ )
118
+ plot_num += 1
119
+
120
  return plt
121
 
122
+ description = "Learn how different anomaly detection algorithms perform in different datasets."
123
 
124
+ def iter_grid(n_rows, n_cols):
125
+ # create a grid using gradio Block
126
+ for _ in range(n_rows):
127
+ with gr.Row():
128
+ for _ in range(n_cols):
129
+ with gr.Column():
130
+ yield
131
+
132
+ title = "🕵️‍♀️ compare anomaly detection algorithms 🕵️‍♂️"
133
  with gr.Blocks() as demo:
134
+ gr.Markdown(f"## {title}")
135
+ gr.Markdown(description)
136
 
137
+ input_models = ["Robust covariance","One-Class SVM","One-Class SVM (SGD)","Isolation Forest",
138
+ "Local Outlier Factor"]
 
139
  input_data = gr.Radio(
140
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
141
+ value="Moons"
 
 
 
 
 
 
 
142
  )
143
+ n_samples = gr.Slider(minimum=100, maximum=500, step=25, label="Number of Samples")
144
+ outliers_fraction = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, label="Fraction of Outliers")
145
+ counter = 0
146
+
147
+
148
+ for _ in iter_grid(5, 5):
149
+ if counter >= len(input_models):
150
+ break
151
 
152
+ input_model = input_models[counter]
153
+ plot = gr.Plot(label=input_model)
154
+ fn = partial(train_models, clf_name=input_model)
155
  input_data.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
156
  n_samples.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
157
  outliers_fraction.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
158
+ counter += 1
159
 
160
+ demo.launch(enable_queue=True, debug=True)