rtik007 commited on
Commit
bc4ace7
·
verified ·
1 Parent(s): fbc6f20

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -56
app.py CHANGED
@@ -33,9 +33,9 @@ def prepare_data(input_data, n_samples, outliers_fraction=0.0):
33
  labels[-len(outliers):] = "Anomaly"
34
  return X, labels
35
 
36
- # Function to train models and generate plots
37
- def train_models(input_data, outliers_fraction, n_samples, clf_name):
38
- X, _ = prepare_data(input_data, n_samples, outliers_fraction)
39
 
40
  # Define classifiers
41
  NAME_CLF_MAPPING = {
@@ -55,62 +55,17 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
55
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
56
  }
57
 
58
- clf = NAME_CLF_MAPPING[clf_name]
59
- xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
60
-
61
- t0 = time.time()
62
- if clf_name == "Local Outlier Factor":
63
- y_pred = clf.fit_predict(X)
64
  else:
65
  clf.fit(X)
66
- y_pred = clf.predict(X)
67
- t1 = time.time()
68
-
69
- # Plotting
70
- plt.figure(figsize=(5, 5))
71
- if clf_name != "Local Outlier Factor":
72
- Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
73
- Z = Z.reshape(xx.shape)
74
- plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
75
-
76
- colors = np.array(["#377eb8", "#ff7f00"])
77
- plt.scatter(X[:, 0], X[:, 1], s=30, color=colors[(y_pred + 1) // 2])
78
- plt.title(f"{clf_name} ({t1 - t0:.2f}s)")
79
- plt.xlim(-7, 7)
80
- plt.ylim(-7, 7)
81
- plt.xticks(())
82
- plt.yticks(())
83
- return plt.gcf()
84
-
85
- # Function to generate feature scatter plots
86
- def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
87
- data, _ = prepare_data(input_data, n_samples)
88
- x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
89
- y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
90
-
91
- # Generate scatter plot
92
- plt.figure(figsize=(6, 6))
93
- plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
94
- plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
95
- plt.xlabel(feature_x)
96
- plt.ylabel(feature_y)
97
- plt.legend()
98
- return plt.gcf()
99
-
100
- # Function to simulate anomaly samples
101
- def get_anomaly_samples(input_data, n_samples, outliers_fraction):
102
- # Prepare data with labels
103
- X, labels = prepare_data(input_data, n_samples, outliers_fraction)
104
 
105
- # Assign anomaly scores with higher values for anomalies
106
- rng = np.random.default_rng(42)
107
- scores = np.where(
108
- labels == "Anomaly",
109
- rng.uniform(0.7, 1.0, len(labels)), # Higher scores for anomalies
110
- rng.uniform(0.0, 0.7, len(labels)), # Lower scores for normals
111
- )
112
 
113
- # Create a DataFrame
114
  df = pd.DataFrame({
115
  "Feature1": X[:, 0],
116
  "Feature2": X[:, 1],
@@ -124,6 +79,12 @@ def get_anomaly_samples(input_data, n_samples, outliers_fraction):
124
  # Round values to 3 decimal places
125
  df = df.round({"Feature1": 3, "Feature2": 3, "Anomaly_Score": 3})
126
 
 
 
 
 
 
 
127
  # Top 10 anomalies
128
  top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
129
 
@@ -136,6 +97,21 @@ def get_anomaly_samples(input_data, n_samples, outliers_fraction):
136
 
137
  return top_10, middle_10, bottom_10
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  # Gradio Interface
140
  with gr.Blocks() as demo:
141
  # App Title and Description
@@ -186,6 +162,7 @@ with gr.Blocks() as demo:
186
 
187
  # Anomaly Samples Tab
188
  gr.Markdown("### 3. Example Anomaly Records")
 
189
  top_table = gr.Dataframe(label="Top 10 Anomalies")
190
  middle_table = gr.Dataframe(label="Middle 10 Records")
191
  bottom_table = gr.Dataframe(label="Bottom 10 Normals")
@@ -193,7 +170,7 @@ with gr.Blocks() as demo:
193
 
194
  anomaly_samples_button.click(
195
  fn=get_anomaly_samples,
196
- inputs=[input_data, n_samples, outliers_fraction],
197
  outputs=[top_table, middle_table, bottom_table],
198
  )
199
 
 
33
  labels[-len(outliers):] = "Anomaly"
34
  return X, labels
35
 
36
+ # Function to train and detect anomalies
37
+ def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
38
+ X, labels = prepare_data(input_data, n_samples, outliers_fraction)
39
 
40
  # Define classifiers
41
  NAME_CLF_MAPPING = {
 
55
  "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
56
  }
57
 
58
+ clf = NAME_CLF_MAPPING[model_name]
59
+ if model_name == "Local Outlier Factor":
60
+ scores = -clf.fit_predict(X) # Negative for LOF: higher is more anomalous
 
 
 
61
  else:
62
  clf.fit(X)
63
+ scores = -clf.decision_function(X) # Higher score indicates greater anomaly
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
+ # Normalize scores to [0, 1]
66
+ scores = (scores - scores.min()) / (scores.max() - scores.min())
 
 
 
 
 
67
 
68
+ # Create DataFrame
69
  df = pd.DataFrame({
70
  "Feature1": X[:, 0],
71
  "Feature2": X[:, 1],
 
79
  # Round values to 3 decimal places
80
  df = df.round({"Feature1": 3, "Feature2": 3, "Anomaly_Score": 3})
81
 
82
+ return df
83
+
84
+ # Function to fetch anomaly records based on the selected model
85
+ def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
86
+ df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
87
+
88
  # Top 10 anomalies
89
  top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
90
 
 
97
 
98
  return top_10, middle_10, bottom_10
99
 
100
+ # Function to generate feature scatter plots
101
+ def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
102
+ data, _ = prepare_data(input_data, n_samples)
103
+ x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
104
+ y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
105
+
106
+ # Generate scatter plot
107
+ plt.figure(figsize=(6, 6))
108
+ plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
109
+ plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
110
+ plt.xlabel(feature_x)
111
+ plt.ylabel(feature_y)
112
+ plt.legend()
113
+ return plt.gcf()
114
+
115
  # Gradio Interface
116
  with gr.Blocks() as demo:
117
  # App Title and Description
 
162
 
163
  # Anomaly Samples Tab
164
  gr.Markdown("### 3. Example Anomaly Records")
165
+ model_dropdown = gr.Dropdown(choices=input_models, value="Isolation Forest", label="Select Model")
166
  top_table = gr.Dataframe(label="Top 10 Anomalies")
167
  middle_table = gr.Dataframe(label="Middle 10 Records")
168
  bottom_table = gr.Dataframe(label="Bottom 10 Normals")
 
170
 
171
  anomaly_samples_button.click(
172
  fn=get_anomaly_samples,
173
+ inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
174
  outputs=[top_table, middle_table, bottom_table],
175
  )
176