|
|
|
|
|
import joblib |
|
|
from sklearn.ensemble import IsolationForest |
|
|
import os |
|
|
|
|
|
def train_isolation_forest(df, sensor_cols, contamination=0.1): |
|
|
""" |
|
|
Train Isolation Forest model for anomaly detection |
|
|
""" |
|
|
print("Training Isolation Forest model...") |
|
|
|
|
|
|
|
|
iso_forest = IsolationForest( |
|
|
contamination=contamination, |
|
|
random_state=42, |
|
|
n_estimators=100 |
|
|
) |
|
|
|
|
|
iso_forest.fit(df[sensor_cols]) |
|
|
|
|
|
|
|
|
df['anomaly'] = iso_forest.predict(df[sensor_cols]) |
|
|
df['anomaly_score'] = iso_forest.decision_function(df[sensor_cols]) |
|
|
|
|
|
print("Model training completed!") |
|
|
return iso_forest, df |
|
|
|
|
|
def save_model(model, filepath='isolation_forest_model.pkl'): |
|
|
""" |
|
|
Save trained model to disk |
|
|
""" |
|
|
joblib.dump(model, filepath) |
|
|
print(f"Model saved to {filepath}") |
|
|
|
|
|
def load_model(filepath='isolation_forest_model.pkl'): |
|
|
""" |
|
|
Load trained model from disk |
|
|
""" |
|
|
if not os.path.exists(filepath): |
|
|
return None |
|
|
|
|
|
model = joblib.load(filepath) |
|
|
print(f"Model loaded from {filepath}") |
|
|
return model |
|
|
|
|
|
def add_anomaly_scores(df, model, sensor_cols): |
|
|
""" |
|
|
Add anomaly predictions to dataframe |
|
|
""" |
|
|
df['anomaly'] = model.predict(df[sensor_cols]) |
|
|
df['anomaly_score'] = model.decision_function(df[sensor_cols]) |
|
|
return df |
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
try: |
|
|
from data_processor import load_and_process_data |
|
|
df, sensor_cols = load_and_process_data() |
|
|
model, df_with_anomalies = train_isolation_forest(df, sensor_cols) |
|
|
save_model(model) |
|
|
print("Model training and saving completed successfully!") |
|
|
except Exception as e: |
|
|
print(f"Error in model training: {e}") |
|
|
|