| |
| """ |
| Clean up script for removing training runs without exported models. |
| Removes all directories in runs/ folder that don't have a corresponding exported model file. |
| """ |
|
|
| import argparse |
| import os |
| import shutil |
| from pathlib import Path |
| import glob |
|
|
|
|
| def find_exported_models(): |
| """Find all exported model files in the current directory""" |
| exported_models = [] |
| seen_files = set() |
|
|
| |
| |
| patterns = [ |
| "*_20[0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9][0-9][0-9].joblib" |
| ] |
|
|
| for pattern in patterns: |
| for filepath in glob.glob(pattern): |
| |
| if filepath in seen_files: |
| continue |
| seen_files.add(filepath) |
|
|
| |
| |
| filename = os.path.basename(filepath) |
| parts = filename.replace(".joblib", "").split("_") |
| if len(parts) >= 4: |
| |
| timestamp = "_".join(parts[-2:]) |
| exported_models.append({ |
| "file": filepath, |
| "timestamp": timestamp |
| }) |
|
|
| return exported_models |
|
|
|
|
| def find_all_runs(): |
| """Find all run directories in the runs folder""" |
| runs_dir = Path("runs") |
| if not runs_dir.exists(): |
| return [] |
|
|
| runs = [] |
| for run_path in runs_dir.iterdir(): |
| if run_path.is_dir(): |
| |
| run_name = run_path.name |
| runs.append({ |
| "path": run_path, |
| "timestamp": run_name |
| }) |
|
|
| return runs |
|
|
|
|
| def clean_runs(dry_run=False, verbose=False): |
| """ |
| Remove all run directories that don't have exported models. |
| |
| Args: |
| dry_run: If True, only show what would be deleted without actually deleting |
| verbose: If True, show detailed information |
| |
| Returns: |
| Tuple of (runs_to_keep, runs_to_delete) |
| """ |
| |
| exported_models = find_exported_models() |
| exported_timestamps = {model["timestamp"] for model in exported_models} |
|
|
| |
| all_runs = find_all_runs() |
|
|
| |
| runs_to_keep = [] |
| runs_to_delete = [] |
|
|
| for run in all_runs: |
| if run["timestamp"] in exported_timestamps: |
| runs_to_keep.append(run) |
| else: |
| runs_to_delete.append(run) |
|
|
| |
| print(f"Found {len(all_runs)} total runs") |
| print(f"Found {len(exported_models)} exported models") |
| print(f"Runs to keep: {len(runs_to_keep)}") |
| print(f"Runs to delete: {len(runs_to_delete)}") |
|
|
| if verbose and exported_models: |
| print("\nExported models found:") |
| for model in exported_models: |
| print(f" - {model['file']} (timestamp: {model['timestamp']})") |
|
|
| if verbose and runs_to_keep: |
| print("\nRuns with exported models (will be kept):") |
| for run in runs_to_keep: |
| print(f" - {run['path']}") |
|
|
| if runs_to_delete: |
| print("\nRuns without exported models (will be deleted):") |
| for run in runs_to_delete: |
| print(f" - {run['path']}") |
| if verbose: |
| |
| metadata_path = run["path"] / "metadata.json" |
| if metadata_path.exists(): |
| import json |
| try: |
| with open(metadata_path) as f: |
| metadata = json.load(f) |
| print(f" Model: {metadata.get('model_name', 'unknown')}, " |
| f"Dataset: {metadata.get('dataset', 'unknown')}, " |
| f"Accuracy: {metadata.get('test_accuracy', 'N/A'):.4f}") |
| except (json.JSONDecodeError, KeyError): |
| pass |
|
|
| |
| total_size = 0 |
| for run in runs_to_delete: |
| total_size += sum(f.stat().st_size for f in run["path"].rglob("*") if f.is_file()) |
|
|
| if total_size > 0: |
| size_mb = total_size / (1024 * 1024) |
| print(f"\nTotal space to be freed: {size_mb:.2f} MB") |
|
|
| |
| if not dry_run and runs_to_delete: |
| deleted_count = 0 |
| for run in runs_to_delete: |
| try: |
| shutil.rmtree(run["path"]) |
| deleted_count += 1 |
| if verbose: |
| print(f"Deleted: {run['path']}") |
| except Exception as e: |
| print(f"Error deleting {run['path']}: {e}") |
|
|
| print(f"\nSuccessfully deleted {deleted_count} run(s)") |
| elif dry_run and runs_to_delete: |
| print("\nDry run mode - no files were deleted") |
| print("Run without --dry-run to actually delete these directories") |
| elif not runs_to_delete: |
| print("\nNo runs to delete - all runs have exported models or no runs found") |
|
|
| return runs_to_keep, runs_to_delete |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser( |
| description="Clean up training runs without exported models" |
| ) |
| parser.add_argument( |
| "--dry-run", |
| action="store_true", |
| help="Show what would be deleted without actually deleting", |
| ) |
| parser.add_argument( |
| "--verbose", |
| "-v", |
| action="store_true", |
| help="Show detailed information about runs", |
| ) |
| parser.add_argument( |
| "--yes", |
| "-y", |
| action="store_true", |
| help="Skip confirmation prompt", |
| ) |
|
|
| args = parser.parse_args() |
|
|
| |
| if not Path("runs").exists(): |
| print("No 'runs' directory found. Nothing to clean.") |
| return |
|
|
| |
| print("Analyzing runs directory...\n") |
|
|
| |
| _, runs_to_delete = clean_runs(dry_run=True, verbose=args.verbose) |
|
|
| if not runs_to_delete: |
| return |
|
|
| |
| if not args.dry_run and not args.yes and runs_to_delete: |
| print("\n" + "=" * 60) |
| response = input(f"Are you sure you want to delete {len(runs_to_delete)} run(s)? [y/N]: ") |
| if response.lower() != 'y': |
| print("Cleanup cancelled") |
| return |
|
|
| |
| if not args.dry_run: |
| print("\nPerforming cleanup...") |
| clean_runs(dry_run=False, verbose=args.verbose) |
|
|
|
|
| if __name__ == "__main__": |
| main() |