| #!/usr/bin/env python3 | |
| # -*- coding: utf-8 -*- | |
| import joblib | |
| import numpy as np | |
| from sklearn.datasets import fetch_openml | |
| from sklearn.ensemble import IsolationForest | |
| from sklearn.model_selection import train_test_split | |
| # Set the random seed | |
| random_seed = 0 | |
| np.random.seed(random_seed) | |
| # Load the dataset | |
| dataset_name = "cardiotocography" | |
| dataset = fetch_openml(name=dataset_name, version=1, as_frame=False) | |
| X, y = dataset.data, dataset.target | |
| s = y == "3" | |
| y = s.astype(int) | |
| # Split the dataset into training and testing sets | |
| X_train, _, y_train, _ = train_test_split(X, y, test_size=0.25, random_state=random_seed) | |
| # Define the number of estimators for Isolation Forest algorithm | |
| n_estimators = 100 | |
| # Create and train the Isolation Forest model | |
| model = IsolationForest(n_estimators=n_estimators, random_state=random_seed) | |
| model.fit(X_train, y_train) | |
| # Save the trained model to a file | |
| joblib.dump(model, 'isolation_forest.joblib') | |