Spaces:

rtik007
/

AnomalyDetectionExample2

Sleeping

App Files Files Community

rtik007 commited on Nov 23, 2024

Commit

ca7fd2c

verified ·

1 Parent(s): 0e10b41

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -74

app.py CHANGED Viewed

@@ -106,47 +106,41 @@ def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
     clf = NAME_CLF_MAPPING[model_name]
     if model_name == "Local Outlier Factor":
-        scores = -clf.fit_predict(X)  # Negative for LOF: higher is more anomalous
-        anomaly_scores = clf.negative_outlier_factor_  # LOF specific
     else:
         clf.fit(X)
-        scores = -clf.decision_function(X)  # Higher score indicates greater anomaly
-        anomaly_scores = scores
-    # Normalize scores to [0, 1]
-    normalized_scores = (anomaly_scores - anomaly_scores.min()) / (anomaly_scores.max() - anomaly_scores.min())
     # Create DataFrame
     df = pd.DataFrame({
         "Feature1": X[:, 0],
         "Feature2": X[:, 1],
-        "Anomaly_Score": normalized_scores,
         "Anomaly_Label": labels,
     })
-    # Sort by Anomaly Score in descending order
-    df = df.sort_values("Anomaly_Score", ascending=False)
-    # Round values to 3 decimal places
-    df = df.round({"Feature1": 3, "Feature2": 3, "Anomaly_Score": 3})
     return df
 # Function to get anomaly samples
 def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
-    # Detect anomalies
     df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
     # Debugging: Check the distribution of anomaly labels
     print("Anomaly Label Counts:")
     print(df["Anomaly_Label"].value_counts())
-    # Ensure filtering for anomalies works as expected
     top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
     # If no anomalies are found, show a message
     if top_10.empty:
-        print("No anomalies found in the top 10 results.")
         top_10 = pd.DataFrame({"Message": ["No anomalies found"]})
     # Middle 10 (mixed)
@@ -158,79 +152,25 @@ def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
     return top_10, middle_10, bottom_10
-# Function to generate feature scatter plots
-def plot_interactive_feature_scatter(input_data, feature_x, feature_y, n_samples):
-    data, _ = prepare_data(input_data, n_samples)
-    x_data = data[:, 0] if feature_x == "Feature1" else data[:, 1]
-    y_data = data[:, 1] if feature_y == "Feature2" else data[:, 0]
-    # Generate scatter plot
-    plt.figure(figsize=(6, 6))
-    plt.scatter(x_data, y_data, alpha=0.8, c="blue", s=20, label="Features")
-    plt.title(f"Feature Interaction Scatter Plot - {feature_x} vs {feature_y}")
-    plt.xlabel(feature_x)
-    plt.ylabel(feature_y)
-    plt.legend()
-    return plt.gcf()
 # Gradio Interface
 with gr.Blocks() as demo:
-    # App Title and Description
-    gr.Markdown("## 🕵️‍♀️ Anomaly Detection App 🕵️‍♂️")
-    gr.Markdown("Explore anomaly detection models, feature interactions, and anomaly examples.")
-    # Interactive Feature Scatter Plot
-    gr.Markdown("### 1. Interactive Feature Scatter Plot")
     input_data = gr.Radio(
         choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
         value="Moons",
         label="Dataset"
     )
-    feature_x = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature1", label="Feature 1")
-    feature_y = gr.Dropdown(choices=["Feature1", "Feature2"], value="Feature2", label="Feature 2")
     n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
-    scatter_plot_button = gr.Button("Generate Scatter Plot")
-    scatter_plot = gr.Plot(label="Feature Scatter Plot")
-    scatter_plot_button.click(
-        fn=plot_interactive_feature_scatter,
-        inputs=[input_data, feature_x, feature_y, n_samples],
-        outputs=scatter_plot,
-    )
-    # Compare Anomaly Detection Algorithms
-    gr.Markdown("### 2. Compare Anomaly Detection Algorithms")
     outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
-    input_models = ["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"]
-    plots = []
-    with gr.Row():
-        for model_name in input_models:
-            plot = gr.Plot(label=model_name)
-            plots.append((model_name, plot))
-    def update_anomaly_comparison(input_data, outliers_fraction, n_samples):
-        results = []
-        for clf_name, plot in plots:
-            fig = train_models(input_data, outliers_fraction, n_samples, clf_name)
-            results.append(fig)
-        return results
-    anomaly_inputs = [input_data, outliers_fraction, n_samples]
-    anomaly_outputs = [plot for _, plot in plots]
-    input_data.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
-    n_samples.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
-    outliers_fraction.change(fn=update_anomaly_comparison, inputs=anomaly_inputs, outputs=anomaly_outputs)
-    # Anomaly Samples Tab
-    gr.Markdown("### 3. Example Anomaly Records")
-    model_dropdown = gr.Dropdown(choices=input_models, value="Isolation Forest", label="Select Model")
     top_table = gr.Dataframe(label="Top 10 Anomalies")
     middle_table = gr.Dataframe(label="Middle 10 Records")
     bottom_table = gr.Dataframe(label="Bottom 10 Normals")
     anomaly_samples_button = gr.Button("Show Anomaly Samples")
     anomaly_samples_button.click(
-        fn=get_anomaly_samples,
         inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
         outputs=[top_table, middle_table, bottom_table],
     )

     clf = NAME_CLF_MAPPING[model_name]
     if model_name == "Local Outlier Factor":
+        clf.fit(X)
+        scores = -clf.negative_outlier_factor_
     else:
         clf.fit(X)
+        scores = -clf.decision_function(X)
+    # Normalize scores to a consistent range
+    scores = (scores - scores.min()) / (scores.max() - scores.min())
     # Create DataFrame
     df = pd.DataFrame({
         "Feature1": X[:, 0],
         "Feature2": X[:, 1],
+        "Anomaly_Score": scores,
         "Anomaly_Label": labels,
     })
+    # Sort by anomaly score in descending order
+    df = df.sort_values("Anomaly_Score", ascending=False).reset_index(drop=True)
     return df
 # Function to get anomaly samples
 def get_anomaly_samples(input_data, n_samples, outliers_fraction, model_name):
     df = detect_anomalies(input_data, n_samples, outliers_fraction, model_name)
     # Debugging: Check the distribution of anomaly labels
     print("Anomaly Label Counts:")
     print(df["Anomaly_Label"].value_counts())
+    # Top 10 anomalies
     top_10 = df[df["Anomaly_Label"] == "Anomaly"].head(10)
     # If no anomalies are found, show a message
     if top_10.empty:
         top_10 = pd.DataFrame({"Message": ["No anomalies found"]})
     # Middle 10 (mixed)
     return top_10, middle_10, bottom_10
 # Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("## Anomaly Detection App")
     input_data = gr.Radio(
         choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
         value="Moons",
         label="Dataset"
     )
     n_samples = gr.Slider(minimum=10, maximum=10000, step=25, value=500, label="Number of Samples")
     outliers_fraction = gr.Slider(minimum=0.001, maximum=0.999, step=0.1, value=0.2, label="Fraction of Outliers")
+    model_dropdown = gr.Dropdown(choices=["Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"], label="Select Model")
+    # Anomaly Samples Output
     top_table = gr.Dataframe(label="Top 10 Anomalies")
     middle_table = gr.Dataframe(label="Middle 10 Records")
     bottom_table = gr.Dataframe(label="Bottom 10 Normals")
     anomaly_samples_button = gr.Button("Show Anomaly Samples")
     anomaly_samples_button.click(
+        fn=get_anomaly_samples,
         inputs=[input_data, n_samples, outliers_fraction, model_dropdown],
         outputs=[top_table, middle_table, bottom_table],
     )