rtik007 commited on
Commit
83ce25d
·
verified ·
1 Parent(s): c4e463e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -82
app.py CHANGED
@@ -1,55 +1,4 @@
1
- import numpy as np
2
- import pandas as pd
3
- from sklearn.datasets import make_classification, make_blobs, make_moons
4
- from sklearn.ensemble import IsolationForest
5
- from sklearn.metrics import roc_curve, auc
6
- import matplotlib.pyplot as plt
7
- import gradio as gr
8
- from sklearn.covariance import EllipticEnvelope
9
- from sklearn.neighbors import LocalOutlierFactor
10
- from sklearn.linear_model import SGDOneClassSVM
11
- from sklearn.pipeline import make_pipeline
12
- from sklearn.kernel_approximation import Nystroem
13
- from sklearn import svm
14
- import time
15
- from functools import partial
16
-
17
-
18
- # Generate synthetic data with 20 features
19
- np.random.seed(42)
20
- X, _ = make_classification(
21
- n_samples=500,
22
- n_features=20,
23
- n_informative=10,
24
- n_redundant=5,
25
- n_clusters_per_class=1,
26
- random_state=42
27
- )
28
- outliers = np.random.uniform(low=-6, high=6, size=(50, 20)) # Add outliers
29
- X = np.vstack([X, outliers])
30
-
31
- # Convert to DataFrame
32
- columns = [f"Feature{i+1}" for i in range(20)]
33
- df = pd.DataFrame(X, columns=columns)
34
-
35
- # Fit Isolation Forest
36
- iso_forest = IsolationForest(
37
- n_estimators=100,
38
- max_samples=256,
39
- contamination=0.1,
40
- random_state=42
41
- )
42
- iso_forest.fit(df)
43
-
44
- # Predict anomaly scores
45
- anomaly_scores = iso_forest.decision_function(df) # Negative values indicate anomalies
46
- anomaly_labels = iso_forest.predict(df) # -1 for anomaly, 1 for normal
47
-
48
- # Add results to DataFrame
49
- df["Anomaly_Score"] = anomaly_scores
50
- df["Anomaly_Label"] = np.where(anomaly_labels == -1, "Anomaly", "Normal")
51
-
52
- # Functions for Anomaly Detection Algorithms tab
53
  def train_models(input_data, outliers_fraction, n_samples, clf_name):
54
  """Train anomaly detection models and plot results."""
55
  n_outliers = int(outliers_fraction * n_samples)
@@ -85,6 +34,11 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
85
  X = DATA_MAPPING[input_data]
86
  rng = np.random.RandomState(42)
87
  X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
 
 
 
 
 
88
  t0 = time.time()
89
  clf.fit(X)
90
  t1 = time.time()
@@ -100,7 +54,7 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
100
  plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
101
 
102
  colors = np.array(["#377eb8", "#ff7f00"])
103
- plt.scatter(X[:, 0], X[:, 1], s=30, color=colors[(y_pred + 1) // 2])
104
 
105
  plt.xlim(-7, 7)
106
  plt.ylim(-7, 7)
@@ -108,32 +62,3 @@ def train_models(input_data, outliers_fraction, n_samples, clf_name):
108
  plt.yticks(())
109
  plt.title(f"{clf_name} (time: {t1 - t0:.2f}s)")
110
  return plt
111
-
112
-
113
- # Create Gradio interface
114
- with gr.Blocks() as demo:
115
- gr.Markdown("# Anomaly Detection Algorithms Comparison")
116
-
117
- input_models = [
118
- "Robust covariance", "One-Class SVM", "One-Class SVM (SGD)", "Isolation Forest", "Local Outlier Factor"
119
- ]
120
- input_data = gr.Radio(
121
- choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
122
- value="Moons",
123
- label="Dataset Type"
124
- )
125
- n_samples = gr.Slider(
126
- minimum=100, maximum=500, step=25, value=300, label="Number of Samples"
127
- )
128
- outliers_fraction = gr.Slider(
129
- minimum=0.1, maximum=0.9, step=0.1, value=0.2, label="Outlier Fraction"
130
- )
131
-
132
- for clf_name in input_models:
133
- plot = gr.Plot(label=clf_name)
134
- fn = partial(train_models, clf_name=clf_name)
135
- input_data.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
136
- n_samples.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
137
- outliers_fraction.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
138
-
139
- demo.launch()
 
1
+ # Updated train_models function with feature name compatibility for IsolationForest
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  def train_models(input_data, outliers_fraction, n_samples, clf_name):
3
  """Train anomaly detection models and plot results."""
4
  n_outliers = int(outliers_fraction * n_samples)
 
34
  X = DATA_MAPPING[input_data]
35
  rng = np.random.RandomState(42)
36
  X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
37
+
38
+ # Convert X to DataFrame if using IsolationForest to ensure feature names
39
+ if clf_name == "Isolation Forest":
40
+ X = pd.DataFrame(X, columns=["Feature1", "Feature2"])
41
+
42
  t0 = time.time()
43
  clf.fit(X)
44
  t1 = time.time()
 
54
  plt.contour(xx, yy, Z, levels=[0], linewidths=2, colors="black")
55
 
56
  colors = np.array(["#377eb8", "#ff7f00"])
57
+ plt.scatter(X.iloc[:, 0], X.iloc[:, 1], s=30, color=colors[(y_pred + 1) // 2])
58
 
59
  plt.xlim(-7, 7)
60
  plt.ylim(-7, 7)
 
62
  plt.yticks(())
63
  plt.title(f"{clf_name} (time: {t1 - t0:.2f}s)")
64
  return plt