rtik007 commited on
Commit
5555147
·
verified ·
1 Parent(s): e534cc9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +114 -1
app.py CHANGED
@@ -1,7 +1,120 @@
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
- from sklearn.datasets import make_moons, make_blobs
 
 
 
 
 
 
 
4
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
 
7
  # Function to generate interactive feature scatter plots
 
1
  import numpy as np
2
  import matplotlib.pyplot as plt
3
+ from sklearn import svm
4
+ from sklearn.covariance import EllipticEnvelope
5
+ from sklearn.ensemble import IsolationForest
6
+ from sklearn.neighbors import LocalOutlierFactor
7
+ from sklearn.linear_model import SGDOneClassSVM
8
+ from sklearn.kernel_approximation import Nystroem
9
+ from sklearn.pipeline import make_pipeline
10
+ from sklearn.datasets import make_blobs, make_moons
11
  import gradio as gr
12
+ import time
13
+
14
+ # Function to train models and generate plots
15
+ def train_models(input_data, outliers_fraction, n_samples, clf_name):
16
+ # Prepare data
17
+ n_outliers = int(outliers_fraction * n_samples)
18
+ n_inliers = n_samples - n_outliers
19
+ blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
20
+
21
+ DATA_MAPPING = {
22
+ "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
23
+ "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
24
+ "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
25
+ "Moons": 4.0 * (make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0] - np.array([0.5, 0.25])),
26
+ "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
27
+ }
28
+
29
+ NAME_CLF_MAPPING = {
30
+ "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
31
+ "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
32
+ "One-Class SVM (SGD)": make_pipeline(
33
+ Nystroem(gamma=0.1, random_state=42, n_components=150),
34
+ SGDOneClassSVM(
35
+ nu=outliers_fraction,
36
+ shuffle=True,
37
+ fit_intercept=True,
38
+ random_state=42,
39
+ tol=1e-6,
40
+ ),
41
+ ),
42
+ "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
43
+ "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
44
+ }
45
+
46
+ X = DATA_MAPPING[input_data]
47
+ rng = np.random.RandomState(42)
48
+ X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
49
+
50
+ xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
51
+ clf = NAME_CLF_MAPPING[clf_name]
52
+
53
+ t0 = time.time()
54
+ if clf_name == "Local Outlier Factor":
55
+ y_pred = clf.fit_predict(X)
56
+ else:
57
+ clf.fit(X)
58
+ y_pred = clf.predict(X)
59
+ t1 = time.time()
60
+
61
+ # Plot
62
+ plt.figure(figsize=(5, 5))
63
+ if clf_name != "Local Outlier Factor":
64
+ Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
65
+ Z = Z.reshape(xx.shape)
66
+ plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
67
+
68
+ colors = np.array(["#377eb8", "#ff7f00"])
69
+ plt.scatter(X[:, 0], X[:, 1], s=30, color=colors[(y_pred + 1) // 2])
70
+ plt.title(f"{clf_name} ({t1 - t0:.2f}s)")
71
+ plt.xlim(-7, 7)
72
+ plt.ylim(-7, 7)
73
+ plt.xticks(())
74
+ plt.yticks(())
75
+ return plt.gcf()
76
+
77
+ # Gradio Interface
78
+ description = "Compare how different anomaly detection algorithms perform on various datasets."
79
+ title = "🕵️‍♀️ Compare Anomaly Detection Algorithms 🕵️‍♂️"
80
+
81
+ with gr.Blocks() as demo:
82
+ gr.Markdown(f"## {title}")
83
+ gr.Markdown(description)
84
+
85
+ # Inputs
86
+ input_data = gr.Radio(
87
+ choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
88
+ value="Moons",
89
+ label="Dataset"
90
+ )
91
+ n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
92
+ outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
93
+
94
+ # Models and their plots in a row
95
+ input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
96
+ plots = []
97
+
98
+ with gr.Row():
99
+ for model_name in input_models:
100
+ plot = gr.Plot(label=model_name)
101
+ plots.append((model_name, plot))
102
+
103
+ # Update function
104
+ def update(input_data, outliers_fraction, n_samples):
105
+ results = []
106
+ for clf_name, plot in plots:
107
+ fig = train_models(input_data, outliers_fraction, n_samples, clf_name)
108
+ results.append(fig)
109
+ return results
110
+
111
+ # Set change triggers
112
+ inputs = [input_data, outliers_fraction, n_samples]
113
+ demo_outputs = [plot for _, plot in plots]
114
+ input_data.change(fn=update, inputs=inputs, outputs=demo_outputs)
115
+ n_samples.change(fn=update, inputs=inputs, outputs=demo_outputs)
116
+ outliers_fraction.change(fn=update, inputs=inputs, outputs=demo_outputs)
117
+
118
 
119
 
120
  # Function to generate interactive feature scatter plots