rtik007 commited on
Commit
e9e1584
·
verified ·
1 Parent(s): be8b1b9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -120
app.py CHANGED
@@ -9,15 +9,15 @@ from sklearn.kernel_approximation import Nystroem
9
  from sklearn.pipeline import make_pipeline
10
  from sklearn.datasets import make_blobs, make_moons
11
  import gradio as gr
 
12
  import time
13
 
14
- # Function to train models and generate plots
15
- def train_models(input_data, outliers_fraction, n_samples, clf_name):
16
- # Prepare data
17
  n_outliers = int(outliers_fraction * n_samples)
18
  n_inliers = n_samples - n_outliers
19
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
20
-
21
  DATA_MAPPING = {
22
  "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
23
  "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
@@ -26,6 +26,16 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
26
  "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
27
  }
28
 
 
 
 
 
 
 
 
 
 
 
29
  NAME_CLF_MAPPING = {
30
  "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
31
  "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
@@ -42,13 +52,9 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
42
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
43
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
44
  }
45
-
46
- X = DATA_MAPPING[input_data]
47
- rng = np.random.RandomState(42)
48
- X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
49
 
50
- xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
51
  clf = NAME_CLF_MAPPING[clf_name]
 
52
 
53
  t0 = time.time()
54
  if clf_name == "Local Outlier Factor":
@@ -58,7 +64,7 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
58
  y_pred = clf.predict(X)
59
  t1 = time.time()
60
 
61
- # Plot
62
  plt.figure(figsize=(5, 5))
63
  if clf_name != "Local Outlier Factor":
64
  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
@@ -74,91 +80,25 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
74
  plt.yticks(())
75
  return plt.gcf()
76
 
77
- # Gradio Interface
78
- description = "Compare how different anomaly detection algorithms perform on various datasets."
79
- title = "🕵️‍♀️ Compare Anomaly Detection Algorithms 🕵️‍♂️"
80
-
81
- with gr.Blocks() as demo:
82
- gr.Markdown(f"## {title}")
83
- gr.Markdown(description)
84
-
85
- # Inputs
86
- input_data = gr.Radio(
87
- choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
88
- value="Moons",
89
- label="Dataset"
90
- )
91
- n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
92
- outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
93
-
94
- # Models and their plots in a row
95
- input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
96
- plots = []
97
-
98
- with gr.Row():
99
- for model_name in input_models:
100
- plot = gr.Plot(label=model_name)
101
- plots.append((model_name, plot))
102
-
103
- # Update function
104
- def update(input_data, outliers_fraction, n_samples):
105
- results = []
106
- for clf_name, plot in plots:
107
- fig = train_models(input_data, outliers_fraction, n_samples, clf_name)
108
- results.append(fig)
109
- return results
110
-
111
- # Set change triggers
112
- inputs = [input_data, outliers_fraction, n_samples]
113
- demo_outputs = [plot for _, plot in plots]
114
- input_data.change(fn=update, inputs=inputs, outputs=demo_outputs)
115
- n_samples.change(fn=update, inputs=inputs, outputs=demo_outputs)
116
- outliers_fraction.change(fn=update, inputs=inputs, outputs=demo_outputs)
117
-
118
-
119
 
120
- # Function to generate interactive feature scatter plots
121
- def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
122
- # Generate data based on the selected dataset
123
- if input_data == "Moons":
124
- data, _ = make_moons(n_samples=n_samples, noise=0.05)
125
- else:
126
- data, _ = make_blobs(n_samples=n_samples, random_state=0)
127
-
128
- # Simulate feature selection by indexing
129
- x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
130
- y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
131
-
132
- # Generate scatter plot
133
- plt.figure(figsize=(6, 6))
134
- plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
135
- plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
136
- plt.xlabel(feature_x)
137
- plt.ylabel(feature_y)
138
- plt.legend()
139
- return plt.gcf()
140
 
 
 
141
 
142
- # Function for anomaly examples (Optional feature row)
143
- def get_anomaly_samples():
144
- """Returns formatted top, middle, and bottom 10 records based on anomaly score."""
145
- sorted_df = df.sort_values("Anomaly_Score", ascending=False)
146
-
147
- # Top 10 anomalies
148
- top_10 = sorted_df[sorted_df["Anomaly_Label"] == "Anomaly"].head(10)
149
-
150
- # Middle 10 (mix of anomalies and normal)
151
- mid_start = len(sorted_df) // 2 - 50 # Get a broader middle slice
152
- middle_section = sorted_df.iloc[mid_start: mid_start + 100] # Consider a larger middle slice
153
- middle_anomalies = middle_section[middle_section["Anomaly_Label"] == "Anomaly"].sample(n=5, random_state=42)
154
- middle_normals = middle_section[middle_section["Anomaly_Label"] == "Normal"].sample(n=5, random_state=42)
155
- middle_10 = pd.concat([middle_anomalies, middle_normals]).sort_values("Anomaly_Score", ascending=False)
156
-
157
- # Bottom 10 normal records
158
- bottom_10 = sorted_df[sorted_df["Anomaly_Label"] == "Normal"].tail(10)
159
-
160
- return top_10, middle_10, bottom_10
161
 
 
162
 
163
  # Gradio Interface
164
  with gr.Blocks() as demo:
@@ -173,12 +113,8 @@ with gr.Blocks() as demo:
173
  value="Moons",
174
  label="Dataset"
175
  )
176
- n_samples = gr.Slider(
177
- minimum=10, maximum=10000, step=25, value=500, label="Number of Samples"
178
- )
179
- outliers_fraction = gr.Slider(
180
- minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers"
181
- )
182
 
183
  input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
184
  plots = []
@@ -200,34 +136,17 @@ with gr.Blocks() as demo:
200
  n_samples.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
201
  outliers_fraction.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
202
 
203
- # Interactive Feature Scatter Plot
204
- gr.Markdown("### 2. Interactive Feature Scatter Plot")
205
- feature_x = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature1", label="Feature 1")
206
- feature_y = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature2", label="Feature 2")
207
- scatter_plot_button = gr.Button("Generate Scatter Plot")
208
- scatter_plot = gr.Plot(label="Feature Scatter Plot")
209
-
210
- scatter_plot_button.click(
211
- fn=plot_interactive_feature_scatter,
212
- inputs=[input_data, feature_x, feature_y, n_samples],
213
- outputs=scatter_plot,
214
- )
215
-
216
  with gr.Tab("Anomaly Samples"):
217
- gr.HTML("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Top 10 Records (Anomalies)</h3>")
218
- top_table = gr.Dataframe(label="Top 10 Records")
219
-
220
- gr.HTML("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Middle 10 Records (Mixed)</h3>")
221
  middle_table = gr.Dataframe(label="Middle 10 Records")
222
-
223
- gr.HTML("<h3 style='text-align: center; font-size: 18px; font-weight: bold;'>Bottom 10 Records (Normal)</h3>")
224
- bottom_table = gr.Dataframe(label="Bottom 10 Records")
225
-
226
  anomaly_samples_button = gr.Button("Show Anomaly Samples")
 
227
  anomaly_samples_button.click(
228
- get_anomaly_samples,
229
  outputs=[top_table, middle_table, bottom_table]
230
  )
231
- )
232
 
233
  demo.launch(debug=True)
 
9
  from sklearn.pipeline import make_pipeline
10
  from sklearn.datasets import make_blobs, make_moons
11
  import gradio as gr
12
+ import pandas as pd # Needed for dataframe operations
13
  import time
14
 
15
+ # Helper function to prepare data
16
+ def prepare_data(input_data, n_samples, outliers_fraction):
 
17
  n_outliers = int(outliers_fraction * n_samples)
18
  n_inliers = n_samples - n_outliers
19
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
20
+
21
  DATA_MAPPING = {
22
  "Central Blob": make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
23
  "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
 
26
  "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
27
  }
28
 
29
+ X = DATA_MAPPING[input_data]
30
+ rng = np.random.RandomState(42)
31
+ X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
32
+ return X
33
+
34
+ # Function to train models and generate plots
35
+ def train_models(input_data, outliers_fraction, n_samples, clf_name):
36
+ X = prepare_data(input_data, n_samples, outliers_fraction)
37
+
38
+ # Define classifiers
39
  NAME_CLF_MAPPING = {
40
  "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
41
  "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
 
52
  "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
53
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
54
  }
 
 
 
 
55
 
 
56
  clf = NAME_CLF_MAPPING[clf_name]
57
+ xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
58
 
59
  t0 = time.time()
60
  if clf_name == "Local Outlier Factor":
 
64
  y_pred = clf.predict(X)
65
  t1 = time.time()
66
 
67
+ # Plotting
68
  plt.figure(figsize=(5, 5))
69
  if clf_name != "Local Outlier Factor":
70
  Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
 
80
  plt.yticks(())
81
  return plt.gcf()
82
 
83
+ # Function to simulate anomaly samples
84
+ def get_anomaly_samples():
85
+ # Simulated dataframe
86
+ data = {
87
+ "Anomaly_Score": np.random.random(100),
88
+ "Anomaly_Label": np.random.choice(["Anomaly", "Normal"], size=100, p=[0.2, 0.8]),
89
+ }
90
+ df = pd.DataFrame(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
+ # Top 10 anomalies
93
+ top_10 = df.sort_values("Anomaly_Score", ascending=False).head(10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
 
95
+ # Middle 10
96
+ middle = df.iloc[len(df) // 2 - 5 : len(df) // 2 + 5]
97
 
98
+ # Bottom 10 normals
99
+ bottom_10 = df[df["Anomaly_Label"] == "Normal"].tail(10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ return top_10, middle, bottom_10
102
 
103
  # Gradio Interface
104
  with gr.Blocks() as demo:
 
113
  value="Moons",
114
  label="Dataset"
115
  )
116
+ n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
117
+ outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
 
 
 
 
118
 
119
  input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
120
  plots = []
 
136
  n_samples.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
137
  outliers_fraction.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
138
 
139
+ # Anomaly Samples Tab
 
 
 
 
 
 
 
 
 
 
 
 
140
  with gr.Tab("Anomaly Samples"):
141
+ gr.Markdown("### Example Anomaly Records")
142
+ top_table = gr.Dataframe(label="Top 10 Anomalies")
 
 
143
  middle_table = gr.Dataframe(label="Middle 10 Records")
144
+ bottom_table = gr.Dataframe(label="Bottom 10 Normals")
 
 
 
145
  anomaly_samples_button = gr.Button("Show Anomaly Samples")
146
+
147
  anomaly_samples_button.click(
148
+ fn=get_anomaly_samples,
149
  outputs=[top_table, middle_table, bottom_table]
150
  )
 
151
 
152
  demo.launch(debug=True)