rtik007 commited on
Commit
ab8a593
·
verified ·
1 Parent(s): e32effe

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -188
app.py CHANGED
@@ -9,15 +9,17 @@ from sklearn.kernel_approximation import Nystroem
9
  from sklearn.pipeline import make_pipeline
10
  from sklearn.datasets import make_blobs, make_moons
11
  import gradio as gr
12
- import pandas as pd
 
 
 
13
  import time
14
 
15
- # Helper function to prepare data
16
  def prepare_data(input_data, n_samples, outliers_fraction=0.01):
17
- n_outliers = max(int(outliers_fraction * n_samples), 1) # At least 1 outlier
18
  n_inliers = n_samples - n_outliers
19
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
20
-
21
  DATA_MAPPING = {
22
  "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
23
  "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
@@ -29,205 +31,70 @@ def prepare_data(input_data, n_samples, outliers_fraction=0.01):
29
  rng = np.random.RandomState(42)
30
  outliers = rng.uniform(low=-6, high=6, size=(n_outliers, 2))
31
  X = np.concatenate([X, outliers], axis=0)
32
- labels = np.array(["Normal"] * len(X))
33
- labels[-len(outliers):] = "Anomaly"
34
- return X, labels
35
-
36
- # Function to train models and generate plots
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def train_models(input_data, outliers_fraction, n_samples, clf_name):
38
- # Ensure contamination is valid
39
- outliers_fraction = max(outliers_fraction, 0.01) # At least 0.01
40
-
41
- X, _ = prepare_data(input_data, n_samples, outliers_fraction)
42
-
43
- # Define classifiers
44
  NAME_CLF_MAPPING = {
45
  "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
46
  "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
47
  "One-Class SVM (SGD)": make_pipeline(
48
  Nystroem(gamma=0.1, random_state=42, n_components=150),
49
- SGDOneClassSVM(
50
- nu=outliers_fraction,
51
- shuffle=True,
52
- fit_intercept=True,
53
- random_state=42,
54
- tol=1e-6,
55
- ),
56
  ),
57
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
58
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
 
59
  }
60
-
61
- clf = NAME_CLF_MAPPING[clf_name]
62
- xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
63
-
64
- t0 = time.time()
65
- if clf_name == "Local Outlier Factor":
66
- y_pred = clf.fit_predict(X)
67
  else:
68
- clf.fit(X)
69
- y_pred = clf.predict(X)
70
- t1 = time.time()
71
-
72
- # Plotting
 
73
  plt.figure(figsize=(5, 5))
74
- if clf_name != "Local Outlier Factor":
75
- Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
76
- Z = Z.reshape(xx.shape)
77
- plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
78
-
79
  colors = np.array(["#377eb8", "#ff7f00"])
80
- plt.scatter(X[:, 0], X[:, 1], s=30, color=colors[(y_pred + 1) // 2])
81
- plt.title(f"{clf_name} ({t1 - t0:.2f}s)")
82
- plt.xlim(-7, 7)
83
- plt.ylim(-7, 7)
84
- plt.xticks(())
85
- plt.yticks(())
86
  return plt.gcf()
87
 
88
- # Function to detect anomalies and generate anomaly records
89
- def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
90
- X, labels = prepare_data(input_data, n_samples, outliers_fraction)
91
-
92
- # Define classifiers
93
- NAME_CLF_MAPPING = {
94
- "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
95
- "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
96
- "One-Class SVM (SGD)": make_pipeline(
97
- Nystroem(gamma=0.1, random_state=42, n_components=150),
98
- SGDOneClassSVM(
99
- nu=outliers_fraction,
100
- shuffle=True,
101
- fit_intercept=True,
102
- random_state=42,
103
- tol=1e-6,
104
- ),
105
- ),
106
- "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
107
- "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
108
- }
109
-
110
- clf = NAME_CLF_MAPPING[model_name]
111
- if model_name == "Local Outlier Factor":
112
- clf.fit(X)
113
- scores = -clf.negative_outlier_factor_
114
- else:
115
- clf.fit(X)
116
- scores = -clf.decision_function(X)
117
-
118
- # Normalize scores to a consistent range
119
- scores = (scores - scores.min()) / (scores.max() - scores.min())
120
-
121
- # Create DataFrame
122
- df = pd.DataFrame({
123
- "Feature1": X[:, 0],
124
- "Feature2": X[:, 1],
125
- "Anomaly_Score": scores,
126
- "Anomaly_Label": labels,
127
- })
128
-
129
- # Sort by anomaly score in descending order
130
- df = df.sort_values("Anomaly_Score", ascending=False).reset_index(drop=True)
131
-
132
- return df
133
-
134
- # Function to get anomaly samples
135
- def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
136
- outliers_fraction = max(outliers_fraction, 0.01) # Ensure fraction is valid
137
- df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
138
-
139
- # Debugging: Check anomaly label counts
140
- print("Anomaly Label Counts:", df["Anomaly_Label"].value_counts())
141
-
142
- # Top 10 anomalies
143
- top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
144
-
145
- if top_10.empty:
146
- print("No anomalies found in Top 10 Anomalies.")
147
- top_10 = pd.DataFrame({"Message": ["No anomalies found"]})
148
-
149
- # Middle 10 (mixed records)
150
- mid_start = len(df) // 2 - 5
151
- middle_10 = df.iloc[mid_start: mid_start + 10]
152
-
153
- # Bottom 10 normals
154
- bottom_10 = df[df["Anomaly_Label"] == "Normal"].tail(10)
155
-
156
- return top_10, middle_10, bottom_10
157
-
158
- # Function to plot scatter plots
159
- def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
160
- data, _ = prepare_data(input_data, n_samples)
161
- x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
162
- y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
163
-
164
- plt.figure(figsize=(6, 6))
165
- plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
166
- plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
167
- plt.xlabel(feature_x)
168
- plt.ylabel(feature_y)
169
- plt.legend()
170
- return plt.gcf()
171
-
172
- # Gradio Interface
173
  with gr.Blocks() as demo:
174
- gr.Markdown("## 🕵️‍♀️ Anomaly Detection App 🕵️‍♂️")
175
-
176
- # Interactive Feature Scatter Plot
177
- gr.Markdown("### 1. Interactive Feature Scatter Plot")
178
- input_data = gr.Radio(
179
- choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
180
- value="Moons",
181
- label="Dataset"
182
- )
183
- feature_x = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature1", label="Feature 1")
184
- feature_y = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature2", label="Feature 2")
185
- n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
186
- scatter_plot_button = gr.Button("Generate Scatter Plot")
187
- scatter_plot = gr.Plot(label="Feature Scatter Plot")
188
-
189
- scatter_plot_button.click(
190
- fn=plot_interactive_feature_scatter,
191
- inputs=[input_data, feature_x, feature_y, n_samples],
192
- outputs=scatter_plot,
193
  )
 
 
 
194
 
195
- # Compare Anomaly Detection Algorithms
196
- gr.Markdown("### 2. Compare Anomaly Detection Algorithms")
197
- outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
198
- input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
199
- plots = []
200
- with gr.Row():
201
- for model_name in input_models:
202
- plot = gr.Plot(label=model_name)
203
- plots.append((model_name, plot))
204
-
205
- def update_anomaly_comparison(input_data, outliers_fraction, n_samples):
206
- results = []
207
- for clf_name, plot in plots:
208
- fig = train_models(input_data, outliers_fraction, n_samples, clf_name)
209
- results.append(fig)
210
- return results
211
-
212
- anomaly_inputs = [input_data, outliers_fraction, n_samples]
213
- anomaly_outputs = [plot for _, plot in plots]
214
- input_data.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
215
- n_samples.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
216
- outliers_fraction.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
217
-
218
- '''
219
- # Example Anomaly Records
220
- gr.Markdown("### 3. Example Anomaly Records")
221
- model_dropdown = gr.Dropdown(choices=input_models, value="Isolation Forest", label="Select Model")
222
- top_table = gr.Dataframe(label="Top 10 Anomalies")
223
- middle_table = gr.Dataframe(label="Middle 10 Records")
224
- bottom_table = gr.Dataframe(label="Bottom 10 Normals")
225
- anomaly_samples_button = gr.Button("Show Anomaly Samples")
226
-
227
- anomaly_samples_button.click(
228
- fn=get_anomaly_samples,
229
- inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
230
- outputs=[top_table, middle_table, bottom_table],
231
- )
232
- '''
233
- demo.launch(debug=True)
 
9
  from sklearn.pipeline import make_pipeline
10
  from sklearn.datasets import make_blobs, make_moons
11
  import gradio as gr
12
+ import tensorflow as tf
13
+ from tensorflow.keras.models import Sequential
14
+ from tensorflow.keras.layers import Dense, Input
15
+ from tensorflow.keras.optimizers import Adam
16
  import time
17
 
18
+ # 1. Helper function: Prepare data
19
  def prepare_data(input_data, n_samples, outliers_fraction=0.01):
20
+ n_outliers = max(int(outliers_fraction * n_samples), 1)
21
  n_inliers = n_samples - n_outliers
22
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
 
23
  DATA_MAPPING = {
24
  "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
25
  "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
 
31
  rng = np.random.RandomState(42)
32
  outliers = rng.uniform(low=-6, high=6, size=(n_outliers, 2))
33
  X = np.concatenate([X, outliers], axis=0)
34
+ return X
35
+
36
+ # 2. Autoencoder Anomaly Detection
37
+ def build_autoencoder(input_dim):
38
+ model = Sequential([
39
+ Dense(64, activation='relu', input_dim=input_dim),
40
+ Dense(32, activation='relu'),
41
+ Dense(64, activation='relu'),
42
+ Dense(input_dim, activation='sigmoid'),
43
+ ])
44
+ model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
45
+ return model
46
+
47
+ def autoencoder_anomaly_detection(X, outliers_fraction=0.01, epochs=50):
48
+ model = build_autoencoder(X.shape[1])
49
+ model.fit(X, X, epochs=epochs, batch_size=32, verbose=0)
50
+ reconstruction = model.predict(X)
51
+ reconstruction_error = np.mean((X - reconstruction) ** 2, axis=1)
52
+ threshold = np.percentile(reconstruction_error, 100 * (1 - outliers_fraction))
53
+ y_pred = (reconstruction_error > threshold).astype(int)
54
+ return y_pred
55
+
56
+ # 3. Function to train models and generate plots
57
  def train_models(input_data, outliers_fraction, n_samples, clf_name):
58
+ X = prepare_data(input_data, n_samples, outliers_fraction)
 
 
 
 
 
59
  NAME_CLF_MAPPING = {
60
  "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
61
  "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
62
  "One-Class SVM (SGD)": make_pipeline(
63
  Nystroem(gamma=0.1, random_state=42, n_components=150),
64
+ SGDOneClassSVM(nu=outliers_fraction, random_state=42)
 
 
 
 
 
 
65
  ),
66
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
67
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
68
+ "Autoencoders": autoencoder_anomaly_detection(X, outliers_fraction)
69
  }
70
+ if clf_name == "Autoencoders":
71
+ y_pred = NAME_CLF_MAPPING[clf_name]
 
 
 
 
 
72
  else:
73
+ clf = NAME_CLF_MAPPING[clf_name]
74
+ if clf_name == "Local Outlier Factor":
75
+ y_pred = clf.fit_predict(X)
76
+ else:
77
+ clf.fit(X)
78
+ y_pred = clf.predict(X)
79
  plt.figure(figsize=(5, 5))
 
 
 
 
 
80
  colors = np.array(["#377eb8", "#ff7f00"])
81
+ plt.scatter(X[:, 0], X[:, 1], c=colors[(y_pred + 1) // 2], s=20)
82
+ plt.title(clf_name)
 
 
 
 
83
  return plt.gcf()
84
 
85
+ # 4. Gradio Interface
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  with gr.Blocks() as demo:
87
+ gr.Markdown("## Anomaly Detection Comparison App")
88
+ input_data = gr.Radio(choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"], value="Moons", label="Dataset")
89
+ n_samples = gr.Slider(minimum=50, maximum=2000, step=50, value=500, label="Number of Samples")
90
+ outliers_fraction = gr.Slider(minimum=0.01, maximum=0.5, step=0.01, value=0.05, label="Fraction of Outliers")
91
+ input_models = gr.Radio(
92
+ choices=["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor", "Autoencoders"],
93
+ value="Isolation Forest",
94
+ label="Select Model"
 
 
 
 
 
 
 
 
 
 
 
95
  )
96
+ plot = gr.Plot(label="Model Results")
97
+ generate_plot = gr.Button("Generate Plot")
98
+ generate_plot.click(fn=train_models, inputs=[input_data, outliers_fraction, n_samples, input_models], outputs=plot)
99
 
100
+ demo.launch()