File size: 1,318 Bytes
53c25fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
class AnomalyDetector:
def __init__(self):
self.model = IsolationForest(contamination=0.1, random_state=42)
self.scaler = StandardScaler()
def detect(self, data):
# Select numeric columns
numeric_columns = data.select_dtypes(include=[np.number]).columns
X = data[numeric_columns]
# Scale the data
X_scaled = self.scaler.fit_transform(X)
# Fit the model and predict
self.model.fit(X_scaled)
anomaly_labels = self.model.predict(X_scaled)
# Create a DataFrame with anomaly information
anomaly_data = data.copy()
anomaly_data['is_anomaly'] = anomaly_labels == -1
# Calculate anomaly scores
anomaly_scores = self.model.decision_function(X_scaled)
anomaly_data['anomaly_score'] = anomaly_scores
# Sort by anomaly score (most anomalous first)
anomaly_data = anomaly_data.sort_values('anomaly_score')
# Return summary of anomalies
n_anomalies = anomaly_data['is_anomaly'].sum()
summary = f"Detected {n_anomalies} anomalies out of {len(data)} data points."
return summary, anomaly_data[anomaly_data['is_anomaly']] |