Spaces:

rtik007
/

AnomalyDetectionExample2

Sleeping

App Files Files Community

rtik007 commited on Nov 23, 2024

Commit

bc4ace7

verified ·

1 Parent(s): fbc6f20

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -56

app.py CHANGED Viewed

@@ -33,9 +33,9 @@ def prepare_data(input_data, n_samples, outliers_fraction=0.0):
     labels[-len(outliers):] = "Anomaly"
     return X, labels
-# Function to train models and generate plots
-def train_models(input_data, outliers_fraction, n_samples, clf_name):
-    X, _ = prepare_data(input_data, n_samples, outliers_fraction)
     # Define classifiers
     NAME_CLF_MAPPING = {
@@ -55,62 +55,17 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
         "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
     }
-    clf = NAME_CLF_MAPPING[clf_name]
-    xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
-    t0 = time.time()
-    if clf_name == "Local Outlier Factor":
-        y_pred = clf.fit_predict(X)
     else:
         clf.fit(X)
-        y_pred = clf.predict(X)
-    t1 = time.time()
-    # Plotting
-    plt.figure(figsize=(5, 5))
-    if clf_name != "Local Outlier Factor":
-        Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
-        Z = Z.reshape(xx.shape)
-        plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
-    colors = np.array(["#377eb8", "#ff7f00"])
-    plt.scatter(X[:, 0], X[:, 1], s=30, color=colors[(y_pred + 1) // 2])
-    plt.title(f"{clf_name} ({t1 - t0:.2f}s)")
-    plt.xlim(-7, 7)
-    plt.ylim(-7, 7)
-    plt.xticks(())
-    plt.yticks(())
-    return plt.gcf()
-# Function to generate feature scatter plots
-def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
-    data, _ = prepare_data(input_data, n_samples)
-    x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
-    y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
-    # Generate scatter plot
-    plt.figure(figsize=(6, 6))
-    plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
-    plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
-    plt.xlabel(feature_x)
-    plt.ylabel(feature_y)
-    plt.legend()
-    return plt.gcf()
-# Function to simulate anomaly samples
-def get_anomaly_samples(input_data, n_samples, outliers_fraction):
-    # Prepare data with labels
-    X, labels = prepare_data(input_data, n_samples, outliers_fraction)
-    # Assign anomaly scores with higher values for anomalies
-    rng = np.random.default_rng(42)
-    scores = np.where(
-        labels == "Anomaly",
-        rng.uniform(0.7, 1.0, len(labels)),  # Higher scores for anomalies
-        rng.uniform(0.0, 0.7, len(labels)),  # Lower scores for normals
-    )
-    # Create a DataFrame
     df = pd.DataFrame({
         "Feature1": X[:, 0],
         "Feature2": X[:, 1],
@@ -124,6 +79,12 @@ def get_anomaly_samples(input_data, n_samples, outliers_fraction):
     # Round values to 3 decimal places
     df = df.round({"Feature1": 3, "Feature2": 3, "Anomaly_Score": 3})
     # Top 10 anomalies
     top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
@@ -136,6 +97,21 @@ def get_anomaly_samples(input_data, n_samples, outliers_fraction):
     return top_10, middle_10, bottom_10
 # Gradio Interface
 with gr.Blocks() as demo:
     # App Title and Description
@@ -186,6 +162,7 @@ with gr.Blocks() as demo:
     # Anomaly Samples Tab
     gr.Markdown("### 3. Example Anomaly Records")
     top_table = gr.Dataframe(label="Top 10 Anomalies")
     middle_table = gr.Dataframe(label="Middle 10 Records")
     bottom_table = gr.Dataframe(label="Bottom 10 Normals")
@@ -193,7 +170,7 @@ with gr.Blocks() as demo:
     anomaly_samples_button.click(
         fn=get_anomaly_samples,
-        inputs=[input_data, n_samples, outliers_fraction],
         outputs=[top_table, middle_table, bottom_table],
     )

     labels[-len(outliers):] = "Anomaly"
     return X, labels
+# Function to train and detect anomalies
+def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
+    X, labels = prepare_data(input_data, n_samples, outliers_fraction)
     # Define classifiers
     NAME_CLF_MAPPING = {
         "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
     }
+    clf = NAME_CLF_MAPPING[model_name]
+    if model_name == "Local Outlier Factor":
+        scores = -clf.fit_predict(X)  # Negative for LOF: higher is more anomalous
     else:
         clf.fit(X)
+        scores = -clf.decision_function(X)  # Higher score indicates greater anomaly
+    # Normalize scores to [0, 1]
+    scores = (scores - scores.min()) / (scores.max() - scores.min())
+    # Create DataFrame
     df = pd.DataFrame({
         "Feature1": X[:, 0],
         "Feature2": X[:, 1],
     # Round values to 3 decimal places
     df = df.round({"Feature1": 3, "Feature2": 3, "Anomaly_Score": 3})
+    return df
+# Function to fetch anomaly records based on the selected model
+def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
+    df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
     # Top 10 anomalies
     top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
     return top_10, middle_10, bottom_10
+# Function to generate feature scatter plots
+def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
+    data, _ = prepare_data(input_data, n_samples)
+    x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
+    y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
+    # Generate scatter plot
+    plt.figure(figsize=(6, 6))
+    plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
+    plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
+    plt.xlabel(feature_x)
+    plt.ylabel(feature_y)
+    plt.legend()
+    return plt.gcf()
 # Gradio Interface
 with gr.Blocks() as demo:
     # App Title and Description
     # Anomaly Samples Tab
     gr.Markdown("### 3. Example Anomaly Records")
+    model_dropdown = gr.Dropdown(choices=input_models, value="Isolation Forest", label="Select Model")
     top_table = gr.Dataframe(label="Top 10 Anomalies")
     middle_table = gr.Dataframe(label="Middle 10 Records")
     bottom_table = gr.Dataframe(label="Bottom 10 Normals")
     anomaly_samples_button.click(
         fn=get_anomaly_samples,
+        inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
         outputs=[top_table, middle_table, bottom_table],
     )