rtik007 commited on
Commit
f8721a9
·
verified ·
1 Parent(s): ca7fd2c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -12
app.py CHANGED
@@ -13,8 +13,8 @@ import pandas as pd
13
  import time
14
 
15
  # Helper function to prepare data
16
- def prepare_data(input_data, n_samples, outliers_fraction=0.0):
17
- n_outliers = int(outliers_fraction * n_samples)
18
  n_inliers = n_samples - n_outliers
19
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
20
 
@@ -35,6 +35,9 @@ def prepare_data(input_data, n_samples, outliers_fraction=0.0):
35
 
36
  # Function to train models and generate plots
37
  def train_models(input_data, outliers_fraction, n_samples, clf_name):
 
 
 
38
  X, _ = prepare_data(input_data, n_samples, outliers_fraction)
39
 
40
  # Define classifiers
@@ -130,20 +133,20 @@ def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
130
 
131
  # Function to get anomaly samples
132
  def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
 
133
  df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
134
 
135
- # Debugging: Check the distribution of anomaly labels
136
- print("Anomaly Label Counts:")
137
- print(df["Anomaly_Label"].value_counts())
138
 
139
  # Top 10 anomalies
140
  top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
141
 
142
- # If no anomalies are found, show a message
143
  if top_10.empty:
 
144
  top_10 = pd.DataFrame({"Message": ["No anomalies found"]})
145
 
146
- # Middle 10 (mixed)
147
  mid_start = len(df) // 2 - 5
148
  middle_10 = df.iloc[mid_start: mid_start + 10]
149
 
@@ -152,25 +155,76 @@ def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
152
 
153
  return top_10, middle_10, bottom_10
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  # Gradio Interface
156
  with gr.Blocks() as demo:
157
- gr.Markdown("## Anomaly Detection App")
 
 
 
158
  input_data = gr.Radio(
159
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
160
  value="Moons",
161
  label="Dataset"
162
  )
 
 
163
  n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
164
- outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
165
- model_dropdown = gr.Dropdown(choices=["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"], label="Select Model")
166
 
167
- # Anomaly Samples Output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  top_table = gr.Dataframe(label="Top 10 Anomalies")
169
  middle_table = gr.Dataframe(label="Middle 10 Records")
170
  bottom_table = gr.Dataframe(label="Bottom 10 Normals")
171
  anomaly_samples_button = gr.Button("Show Anomaly Samples")
 
172
  anomaly_samples_button.click(
173
- fn=get_anomaly_samples,
174
  inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
175
  outputs=[top_table, middle_table, bottom_table],
176
  )
 
13
  import time
14
 
15
  # Helper function to prepare data
16
+ def prepare_data(input_data, n_samples, outliers_fraction=0.01):
17
+ n_outliers = max(int(outliers_fraction * n_samples), 1) # At least 1 outlier
18
  n_inliers = n_samples - n_outliers
19
  blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
20
 
 
35
 
36
  # Function to train models and generate plots
37
  def train_models(input_data, outliers_fraction, n_samples, clf_name):
38
+ # Ensure contamination is valid
39
+ outliers_fraction = max(outliers_fraction, 0.01) # At least 0.01
40
+
41
  X, _ = prepare_data(input_data, n_samples, outliers_fraction)
42
 
43
  # Define classifiers
 
133
 
134
  # Function to get anomaly samples
135
  def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
136
+ outliers_fraction = max(outliers_fraction, 0.01) # Ensure fraction is valid
137
  df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
138
 
139
+ # Debugging: Check anomaly label counts
140
+ print("Anomaly Label Counts:", df["Anomaly_Label"].value_counts())
 
141
 
142
  # Top 10 anomalies
143
  top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
144
 
 
145
  if top_10.empty:
146
+ print("No anomalies found in Top 10 Anomalies.")
147
  top_10 = pd.DataFrame({"Message": ["No anomalies found"]})
148
 
149
+ # Middle 10 (mixed records)
150
  mid_start = len(df) // 2 - 5
151
  middle_10 = df.iloc[mid_start: mid_start + 10]
152
 
 
155
 
156
  return top_10, middle_10, bottom_10
157
 
158
+ # Function to plot scatter plots
159
+ def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
160
+ data, _ = prepare_data(input_data, n_samples)
161
+ x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
162
+ y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
163
+
164
+ plt.figure(figsize=(6, 6))
165
+ plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
166
+ plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
167
+ plt.xlabel(feature_x)
168
+ plt.ylabel(feature_y)
169
+ plt.legend()
170
+ return plt.gcf()
171
+
172
  # Gradio Interface
173
  with gr.Blocks() as demo:
174
+ gr.Markdown("## 🕵️‍♀️ Anomaly Detection App 🕵️‍♂️")
175
+
176
+ # Interactive Feature Scatter Plot
177
+ gr.Markdown("### 1. Interactive Feature Scatter Plot")
178
  input_data = gr.Radio(
179
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
180
  value="Moons",
181
  label="Dataset"
182
  )
183
+ feature_x = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature1", label="Feature 1")
184
+ feature_y = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature2", label="Feature 2")
185
  n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
186
+ scatter_plot_button = gr.Button("Generate Scatter Plot")
187
+ scatter_plot = gr.Plot(label="Feature Scatter Plot")
188
 
189
+ scatter_plot_button.click(
190
+ fn=plot_interactive_feature_scatter,
191
+ inputs=[input_data, feature_x, feature_y, n_samples],
192
+ outputs=scatter_plot,
193
+ )
194
+
195
+ # Compare Anomaly Detection Algorithms
196
+ gr.Markdown("### 2. Compare Anomaly Detection Algorithms")
197
+ outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
198
+ input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
199
+ plots = []
200
+ with gr.Row():
201
+ for model_name in input_models:
202
+ plot = gr.Plot(label=model_name)
203
+ plots.append((model_name, plot))
204
+
205
+ def update_anomaly_comparison(input_data, outliers_fraction, n_samples):
206
+ results = []
207
+ for clf_name, plot in plots:
208
+ fig = train_models(input_data, outliers_fraction, n_samples, clf_name)
209
+ results.append(fig)
210
+ return results
211
+
212
+ anomaly_inputs = [input_data, outliers_fraction, n_samples]
213
+ anomaly_outputs = [plot for _, plot in plots]
214
+ input_data.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
215
+ n_samples.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
216
+ outliers_fraction.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
217
+
218
+ # Example Anomaly Records
219
+ gr.Markdown("### 3. Example Anomaly Records")
220
+ model_dropdown = gr.Dropdown(choices=input_models, value="Isolation Forest", label="Select Model")
221
  top_table = gr.Dataframe(label="Top 10 Anomalies")
222
  middle_table = gr.Dataframe(label="Middle 10 Records")
223
  bottom_table = gr.Dataframe(label="Bottom 10 Normals")
224
  anomaly_samples_button = gr.Button("Show Anomaly Samples")
225
+
226
  anomaly_samples_button.click(
227
+ fn=get_anomaly_samples,
228
  inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
229
  outputs=[top_table, middle_table, bottom_table],
230
  )