Spaces:

mboukabous
/

train_unsupervised

Sleeping

App Files Files Community

mboukabous commited on Jan 2, 2025

Commit

3977aa0

1 Parent(s): 0922d39

fixe model path

Browse files

Files changed (3) hide show

scripts/train_anomaly_detection.py +9 -3
scripts/train_clustering_model.py +8 -3
scripts/train_dimred_model.py +8 -3

scripts/train_anomaly_detection.py CHANGED Viewed

@@ -43,9 +43,16 @@ def main(args):
     # Prepare results directory
     if args.results_path is None:
         args.results_path = os.path.join("results", f"{estimator.__class__.__name__}_Anomaly")
     os.makedirs(args.results_path, exist_ok=True)
     # Load data
     df = pd.read_csv(args.data_path)
     print(f"Data loaded from {args.data_path}, initial shape: {df.shape}")
@@ -84,8 +91,7 @@ def main(args):
     print(f"Anomaly detection training with {args.model_module} completed in {train_time:.2f} seconds.")
     # Save the model
-    model_output_path = os.path.join(args.results_path, "anomaly_model.pkl")
-    os.makedirs(args.model_path, exist_ok=True)
     joblib.dump(estimator, model_output_path)
     print(f"Model saved to {model_output_path}")
@@ -149,7 +155,7 @@ if __name__ == "__main__":
                         help='Name of the anomaly detection model (e.g. isolation_forest, one_class_svm).')
     parser.add_argument('--data_path', type=str, required=True,
                         help='Path to the CSV dataset file.')
-    parser.add_argument('--model_path', type=str, default='saved_models/Anomaly',
                         help='Path to save the trained model.')
     parser.add_argument('--results_path', type=str, default=None,
                         help='Directory to save results (predictions, plots).')

     # Prepare results directory
     if args.results_path is None:
+        # e.g., 'results/IsolationForest_Anomaly'
         args.results_path = os.path.join("results", f"{estimator.__class__.__name__}_Anomaly")
     os.makedirs(args.results_path, exist_ok=True)
+    # Prepare model directory
+    if args.model_path is None:
+        # e.g., 'saved_model/IsolationForest_Anomaly'
+        args.model_path = os.path.join('saved_models', f"{estimator.__class__.__name__}_Anomaly")
+    os.makedirs(args.model_path, exist_ok=True)
     # Load data
     df = pd.read_csv(args.data_path)
     print(f"Data loaded from {args.data_path}, initial shape: {df.shape}")
     print(f"Anomaly detection training with {args.model_module} completed in {train_time:.2f} seconds.")
     # Save the model
+    model_output_path = os.path.join(args.model_path, "anomaly_model.pkl")
     joblib.dump(estimator, model_output_path)
     print(f"Model saved to {model_output_path}")
                         help='Name of the anomaly detection model (e.g. isolation_forest, one_class_svm).')
     parser.add_argument('--data_path', type=str, required=True,
                         help='Path to the CSV dataset file.')
+    parser.add_argument('--model_path', type=str, default=None,
                         help='Path to save the trained model.')
     parser.add_argument('--results_path', type=str, default=None,
                         help='Directory to save results (predictions, plots).')

scripts/train_clustering_model.py CHANGED Viewed

@@ -45,6 +45,12 @@ def main(args):
         # e.g., 'results/KMeans_Clustering'
         args.results_path = os.path.join('results', f"{estimator.__class__.__name__}_Clustering")
     os.makedirs(args.results_path, exist_ok=True)
     # Load data from CSV
     df = pd.read_csv(args.data_path)
@@ -94,8 +100,7 @@ def main(args):
         print(f"Training time (no tuning): {end_time - start_time:.2f}s")
     # Ensure the model is fitted at this point
-    model_output_path = os.path.join(args.results_path, "best_model.pkl")
-    os.makedirs(args.model_path, exist_ok=True)  # ensure directory exists
     joblib.dump(estimator, model_output_path)
     print(f"Model saved to {model_output_path}")
@@ -165,7 +170,7 @@ if __name__ == "__main__":
                         help='Name of the clustering model module (e.g. kmeans, dbscan, etc.).')
     parser.add_argument('--data_path', type=str, required=True,
                         help='Path to the CSV dataset.')
-    parser.add_argument('--model_path', type=str, default='saved_models/Clustering',
                         help='Path to save the trained model.')
     parser.add_argument('--results_path', type=str, default=None,
                         help='Directory to save results (metrics, plots).')

         # e.g., 'results/KMeans_Clustering'
         args.results_path = os.path.join('results', f"{estimator.__class__.__name__}_Clustering")
     os.makedirs(args.results_path, exist_ok=True)
+    # Prepare model directory
+    if args.model_path is None:
+        # e.g., 'saved_model/KMeans_Clustering'
+        args.model_path = os.path.join('saved_models', f"{estimator.__class__.__name__}_Clustering")
+    os.makedirs(args.model_path, exist_ok=True)
     # Load data from CSV
     df = pd.read_csv(args.data_path)
         print(f"Training time (no tuning): {end_time - start_time:.2f}s")
     # Ensure the model is fitted at this point
+    model_output_path = os.path.join(args.model_path, "best_model.pkl")
     joblib.dump(estimator, model_output_path)
     print(f"Model saved to {model_output_path}")
                         help='Name of the clustering model module (e.g. kmeans, dbscan, etc.).')
     parser.add_argument('--data_path', type=str, required=True,
                         help='Path to the CSV dataset.')
+    parser.add_argument('--model_path', type=str, default=None,
                         help='Path to save the trained model.')
     parser.add_argument('--results_path', type=str, default=None,
                         help='Directory to save results (metrics, plots).')

scripts/train_dimred_model.py CHANGED Viewed

@@ -47,6 +47,12 @@ def main(args):
         # e.g., 'results/PCA_DimRed'
         args.results_path = os.path.join('results', f"{estimator.__class__.__name__}_DimRed")
     os.makedirs(args.results_path, exist_ok=True)
     # Load data from CSV
     df = pd.read_csv(args.data_path)
@@ -89,8 +95,7 @@ def main(args):
     print(f"Dimensionality reduction done using {args.model_module}. Output shape: {X_transformed.shape}")
     # Save the model
-    model_output_path = os.path.join(args.results_path, "dimred_model.pkl")
-    os.makedirs(args.model_path, exist_ok=True)  # ensure directory
     joblib.dump(estimator, model_output_path)
     print(f"Model saved to {model_output_path}")
@@ -135,7 +140,7 @@ if __name__ == "__main__":
                         help='Name of the dimred model module (e.g. pca, tsne, umap).')
     parser.add_argument('--data_path', type=str, required=True,
                         help='Path to the CSV dataset file.')
-    parser.add_argument('--model_path', type=str, default='saved_models/DimRed',
                         help='Where to save the fitted model.')
     parser.add_argument('--results_path', type=str, default=None,
                         help='Directory to store results (transformed data, plots).')

         # e.g., 'results/PCA_DimRed'
         args.results_path = os.path.join('results', f"{estimator.__class__.__name__}_DimRed")
     os.makedirs(args.results_path, exist_ok=True)
+    # Prepare model directory
+    if args.model_path is None:
+        # e.g., 'saved_model/PCA_DimRed'
+        args.model_path = os.path.join('saved_models', f"{estimator.__class__.__name__}_DimRed")
+    os.makedirs(args.model_path, exist_ok=True)
     # Load data from CSV
     df = pd.read_csv(args.data_path)
     print(f"Dimensionality reduction done using {args.model_module}. Output shape: {X_transformed.shape}")
     # Save the model
+    model_output_path = os.path.join(args.model_path, "dimred_model.pkl")
     joblib.dump(estimator, model_output_path)
     print(f"Model saved to {model_output_path}")
                         help='Name of the dimred model module (e.g. pca, tsne, umap).')
     parser.add_argument('--data_path', type=str, required=True,
                         help='Path to the CSV dataset file.')
+    parser.add_argument('--model_path', type=str, default=None,
                         help='Where to save the fitted model.')
     parser.add_argument('--results_path', type=str, default=None,
                         help='Directory to store results (transformed data, plots).')