File size: 1,779 Bytes
e573a4e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
import joblib
from sklearn.ensemble import IsolationForest
import os
def train_isolation_forest(df, sensor_cols, contamination=0.1):
"""
Train Isolation Forest model for anomaly detection
"""
print("Training Isolation Forest model...")
# Initialize and train the model
iso_forest = IsolationForest(
contamination=contamination,
random_state=42,
n_estimators=100
)
iso_forest.fit(df[sensor_cols])
# Predict anomalies and scores
df['anomaly'] = iso_forest.predict(df[sensor_cols])
df['anomaly_score'] = iso_forest.decision_function(df[sensor_cols])
print("Model training completed!")
return iso_forest, df
def save_model(model, filepath='isolation_forest_model.pkl'):
"""
Save trained model to disk
"""
joblib.dump(model, filepath)
print(f"Model saved to {filepath}")
def load_model(filepath='isolation_forest_model.pkl'):
"""
Load trained model from disk
"""
if not os.path.exists(filepath):
return None
model = joblib.load(filepath)
print(f"Model loaded from {filepath}")
return model
def add_anomaly_scores(df, model, sensor_cols):
"""
Add anomaly predictions to dataframe
"""
df['anomaly'] = model.predict(df[sensor_cols])
df['anomaly_score'] = model.decision_function(df[sensor_cols])
return df
if __name__ == "__main__":
# Test the model trainer
try:
from data_processor import load_and_process_data
df, sensor_cols = load_and_process_data()
model, df_with_anomalies = train_isolation_forest(df, sensor_cols)
save_model(model)
print("Model training and saving completed successfully!")
except Exception as e:
print(f"Error in model training: {e}")
|