Spaces:

rtik007
/

AnomalyDetectionExample2

Sleeping

App Files Files Community

rtik007 commited on Nov 23, 2024

Commit

de5a06f

verified ·

1 Parent(s): 13c5031

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -51

app.py CHANGED Viewed

@@ -33,55 +33,6 @@ def prepare_data(input_data, n_samples, outliers_fraction=0.0):
     labels[-len(outliers):] = "Anomaly"
     return X, labels
-# Function to train models and generate plots
-def train_models(input_data, outliers_fraction, n_samples, clf_name):
-    X, _ = prepare_data(input_data, n_samples, outliers_fraction)
-    # Define classifiers
-    NAME_CLF_MAPPING = {
-        "Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
-        "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
-        "One-Class SVM (SGD)": make_pipeline(
-            Nystroem(gamma=0.1, random_state=42, n_components=150),
-            SGDOneClassSVM(
-                nu=outliers_fraction,
-                shuffle=True,
-                fit_intercept=True,
-                random_state=42,
-                tol=1e-6,
-            ),
-        ),
-        "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
-        "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
-    }
-    clf = NAME_CLF_MAPPING[clf_name]
-    xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
-    t0 = time.time()
-    if clf_name == "Local Outlier Factor":
-        y_pred = clf.fit_predict(X)
-    else:
-        clf.fit(X)
-        y_pred = clf.predict(X)
-    t1 = time.time()
-    # Plotting
-    plt.figure(figsize=(5, 5))
-    if clf_name != "Local Outlier Factor":
-        Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
-        Z = Z.reshape(xx.shape)
-        plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
-    colors = np.array(["#377eb8", "#ff7f00"])
-    plt.scatter(X[:, 0], X[:, 1], s=30, color=colors[(y_pred + 1) // 2])
-    plt.title(f"{clf_name} ({t1 - t0:.2f}s)")
-    plt.xlim(-7, 7)
-    plt.ylim(-7, 7)
-    plt.xticks(())
-    plt.yticks(())
-    return plt.gcf()
 # Function to detect anomalies and generate anomaly records
 def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
     X, labels = prepare_data(input_data, n_samples, outliers_fraction)
@@ -107,18 +58,20 @@ def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
     clf = NAME_CLF_MAPPING[model_name]
     if model_name == "Local Outlier Factor":
         scores = -clf.fit_predict(X)  # Negative for LOF: higher is more anomalous
     else:
         clf.fit(X)
         scores = -clf.decision_function(X)  # Higher score indicates greater anomaly
     # Normalize scores to [0, 1]
-    scores = (scores - scores.min()) / (scores.max() - scores.min())
     # Create DataFrame
     df = pd.DataFrame({
         "Feature1": X[:, 0],
         "Feature2": X[:, 1],
-        "Anomaly_Score": scores,
         "Anomaly_Label": labels,
     })

     labels[-len(outliers):] = "Anomaly"
     return X, labels
 # Function to detect anomalies and generate anomaly records
 def detect_anomalies(input_data, n_samples, outliers_fraction, model_name):
     X, labels = prepare_data(input_data, n_samples, outliers_fraction)
     clf = NAME_CLF_MAPPING[model_name]
     if model_name == "Local Outlier Factor":
         scores = -clf.fit_predict(X)  # Negative for LOF: higher is more anomalous
+        anomaly_scores = clf.negative_outlier_factor_  # LOF specific
     else:
         clf.fit(X)
         scores = -clf.decision_function(X)  # Higher score indicates greater anomaly
+        anomaly_scores = scores
     # Normalize scores to [0, 1]
+    normalized_scores = (anomaly_scores - anomaly_scores.min()) / (anomaly_scores.max() - anomaly_scores.min())
     # Create DataFrame
     df = pd.DataFrame({
         "Feature1": X[:, 0],
         "Feature2": X[:, 1],
+        "Anomaly_Score": normalized_scores,
         "Anomaly_Label": labels,
     })