rtik007 commited on
Commit
ca7fd2c
·
verified ·
1 Parent(s): 0e10b41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -74
app.py CHANGED
@@ -106,47 +106,41 @@ def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
106
 
107
  clf = NAME_CLF_MAPPING[model_name]
108
  if model_name == "Local Outlier Factor":
109
- scores = -clf.fit_predict(X) # Negative for LOF: higher is more anomalous
110
- anomaly_scores = clf.negative_outlier_factor_ # LOF specific
111
  else:
112
  clf.fit(X)
113
- scores = -clf.decision_function(X) # Higher score indicates greater anomaly
114
- anomaly_scores = scores
115
 
116
- # Normalize scores to [0, 1]
117
- normalized_scores = (anomaly_scores - anomaly_scores.min()) / (anomaly_scores.max() - anomaly_scores.min())
118
 
119
  # Create DataFrame
120
  df = pd.DataFrame({
121
  "Feature1": X[:, 0],
122
  "Feature2": X[:, 1],
123
- "Anomaly_Score": normalized_scores,
124
  "Anomaly_Label": labels,
125
  })
126
 
127
- # Sort by Anomaly Score in descending order
128
- df = df.sort_values("Anomaly_Score", ascending=False)
129
-
130
- # Round values to 3 decimal places
131
- df = df.round({"Feature1": 3, "Feature2": 3, "Anomaly_Score": 3})
132
 
133
  return df
134
 
135
  # Function to get anomaly samples
136
  def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
137
- # Detect anomalies
138
  df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
139
 
140
  # Debugging: Check the distribution of anomaly labels
141
  print("Anomaly Label Counts:")
142
  print(df["Anomaly_Label"].value_counts())
143
 
144
- # Ensure filtering for anomalies works as expected
145
  top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
146
 
147
  # If no anomalies are found, show a message
148
  if top_10.empty:
149
- print("No anomalies found in the top 10 results.")
150
  top_10 = pd.DataFrame({"Message": ["No anomalies found"]})
151
 
152
  # Middle 10 (mixed)
@@ -158,79 +152,25 @@ def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
158
 
159
  return top_10, middle_10, bottom_10
160
 
161
- # Function to generate feature scatter plots
162
- def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
163
- data, _ = prepare_data(input_data, n_samples)
164
- x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
165
- y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
166
-
167
- # Generate scatter plot
168
- plt.figure(figsize=(6, 6))
169
- plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
170
- plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
171
- plt.xlabel(feature_x)
172
- plt.ylabel(feature_y)
173
- plt.legend()
174
- return plt.gcf()
175
-
176
  # Gradio Interface
177
  with gr.Blocks() as demo:
178
- # App Title and Description
179
- gr.Markdown("## 🕵️‍♀️ Anomaly Detection App 🕵️‍♂️")
180
- gr.Markdown("Explore anomaly detection models, feature interactions, and anomaly examples.")
181
-
182
- # Interactive Feature Scatter Plot
183
- gr.Markdown("### 1. Interactive Feature Scatter Plot")
184
  input_data = gr.Radio(
185
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
186
  value="Moons",
187
  label="Dataset"
188
  )
189
- feature_x = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature1", label="Feature 1")
190
- feature_y = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature2", label="Feature 2")
191
  n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
192
- scatter_plot_button = gr.Button("Generate Scatter Plot")
193
- scatter_plot = gr.Plot(label="Feature Scatter Plot")
194
-
195
- scatter_plot_button.click(
196
- fn=plot_interactive_feature_scatter,
197
- inputs=[input_data, feature_x, feature_y, n_samples],
198
- outputs=scatter_plot,
199
- )
200
-
201
- # Compare Anomaly Detection Algorithms
202
- gr.Markdown("### 2. Compare Anomaly Detection Algorithms")
203
  outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
204
- input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
205
- plots = []
206
- with gr.Row():
207
- for model_name in input_models:
208
- plot = gr.Plot(label=model_name)
209
- plots.append((model_name, plot))
210
-
211
- def update_anomaly_comparison(input_data, outliers_fraction, n_samples):
212
- results = []
213
- for clf_name, plot in plots:
214
- fig = train_models(input_data, outliers_fraction, n_samples, clf_name)
215
- results.append(fig)
216
- return results
217
-
218
- anomaly_inputs = [input_data, outliers_fraction, n_samples]
219
- anomaly_outputs = [plot for _, plot in plots]
220
- input_data.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
221
- n_samples.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
222
- outliers_fraction.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
223
-
224
- # Anomaly Samples Tab
225
- gr.Markdown("### 3. Example Anomaly Records")
226
- model_dropdown = gr.Dropdown(choices=input_models, value="Isolation Forest", label="Select Model")
227
  top_table = gr.Dataframe(label="Top 10 Anomalies")
228
  middle_table = gr.Dataframe(label="Middle 10 Records")
229
  bottom_table = gr.Dataframe(label="Bottom 10 Normals")
230
  anomaly_samples_button = gr.Button("Show Anomaly Samples")
231
-
232
  anomaly_samples_button.click(
233
- fn=get_anomaly_samples,
234
  inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
235
  outputs=[top_table, middle_table, bottom_table],
236
  )
 
106
 
107
  clf = NAME_CLF_MAPPING[model_name]
108
  if model_name == "Local Outlier Factor":
109
+ clf.fit(X)
110
+ scores = -clf.negative_outlier_factor_
111
  else:
112
  clf.fit(X)
113
+ scores = -clf.decision_function(X)
 
114
 
115
+ # Normalize scores to a consistent range
116
+ scores = (scores - scores.min()) / (scores.max() - scores.min())
117
 
118
  # Create DataFrame
119
  df = pd.DataFrame({
120
  "Feature1": X[:, 0],
121
  "Feature2": X[:, 1],
122
+ "Anomaly_Score": scores,
123
  "Anomaly_Label": labels,
124
  })
125
 
126
+ # Sort by anomaly score in descending order
127
+ df = df.sort_values("Anomaly_Score", ascending=False).reset_index(drop=True)
 
 
 
128
 
129
  return df
130
 
131
  # Function to get anomaly samples
132
  def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
 
133
  df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
134
 
135
  # Debugging: Check the distribution of anomaly labels
136
  print("Anomaly Label Counts:")
137
  print(df["Anomaly_Label"].value_counts())
138
 
139
+ # Top 10 anomalies
140
  top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
141
 
142
  # If no anomalies are found, show a message
143
  if top_10.empty:
 
144
  top_10 = pd.DataFrame({"Message": ["No anomalies found"]})
145
 
146
  # Middle 10 (mixed)
 
152
 
153
  return top_10, middle_10, bottom_10
154
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  # Gradio Interface
156
  with gr.Blocks() as demo:
157
+ gr.Markdown("## Anomaly Detection App")
 
 
 
 
 
158
  input_data = gr.Radio(
159
  choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
160
  value="Moons",
161
  label="Dataset"
162
  )
 
 
163
  n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
 
 
 
 
 
 
 
 
 
 
 
164
  outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
165
+ model_dropdown = gr.Dropdown(choices=["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"], label="Select Model")
166
+
167
+ # Anomaly Samples Output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  top_table = gr.Dataframe(label="Top 10 Anomalies")
169
  middle_table = gr.Dataframe(label="Middle 10 Records")
170
  bottom_table = gr.Dataframe(label="Bottom 10 Normals")
171
  anomaly_samples_button = gr.Button("Show Anomaly Samples")
 
172
  anomaly_samples_button.click(
173
+ fn=get_anomaly_samples,
174
  inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
175
  outputs=[top_table, middle_table, bottom_table],
176
  )