Ashar086 commited on
Commit
53c25fe
·
verified ·
1 Parent(s): cfe2f48

Create anomaly_detection.py

Browse files
Files changed (1) hide show
  1. anomaly_detection.py +37 -0
anomaly_detection.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.ensemble import IsolationForest
3
+ from sklearn.preprocessing import StandardScaler
4
+
5
+ class AnomalyDetector:
6
+ def __init__(self):
7
+ self.model = IsolationForest(contamination=0.1, random_state=42)
8
+ self.scaler = StandardScaler()
9
+
10
+ def detect(self, data):
11
+ # Select numeric columns
12
+ numeric_columns = data.select_dtypes(include=[np.number]).columns
13
+ X = data[numeric_columns]
14
+
15
+ # Scale the data
16
+ X_scaled = self.scaler.fit_transform(X)
17
+
18
+ # Fit the model and predict
19
+ self.model.fit(X_scaled)
20
+ anomaly_labels = self.model.predict(X_scaled)
21
+
22
+ # Create a DataFrame with anomaly information
23
+ anomaly_data = data.copy()
24
+ anomaly_data['is_anomaly'] = anomaly_labels == -1
25
+
26
+ # Calculate anomaly scores
27
+ anomaly_scores = self.model.decision_function(X_scaled)
28
+ anomaly_data['anomaly_score'] = anomaly_scores
29
+
30
+ # Sort by anomaly score (most anomalous first)
31
+ anomaly_data = anomaly_data.sort_values('anomaly_score')
32
+
33
+ # Return summary of anomalies
34
+ n_anomalies = anomaly_data['is_anomaly'].sum()
35
+ summary = f"Detected {n_anomalies} anomalies out of {len(data)} data points."
36
+
37
+ return summary, anomaly_data[anomaly_data['is_anomaly']]