Spaces:

CSI-4CAST
/

README

Running

App Files Files Community

SIKAI-C commited on Oct 13, 2025

Commit

aa28c84

verified ·

1 Parent(s): 078d201

Create reconstruction.py

Browse files

Files changed (1) hide show

reconstruction.py +146 -0

reconstruction.py ADDED Viewed

	@@ -0,0 +1,146 @@

+#!/usr/bin/env python3
+"""
+Reconstruction script for CSI-4CAST datasets.
+This script helps users reconstruct the original folder structure after downloading
+datasets from the CSI-4CAST Hugging Face organization.
+Usage:
+    python reconstruction.py [--input-dir INPUT_DIR] [--output-dir OUTPUT_DIR]
+If no arguments provided, it will look for downloaded datasets in the current directory
+and reconstruct the structure in a 'data' folder.
+"""
+import argparse
+import shutil
+from pathlib import Path
+def create_directory_structure(base_path: Path):
+    """Create the original directory structure"""
+    dirs_to_create = [
+        "stats",
+        "test/regular",
+        "test/generalization",
+        "train/regular"
+    ]
+    for dir_path in dirs_to_create:
+        full_path = base_path / dir_path
+        full_path.mkdir(parents=True, exist_ok=True)
+        print(f"Created directory: {full_path}")
+def find_downloaded_datasets(input_dir: Path):
+    """Find all downloaded dataset folders"""
+    datasets = {
+        'stats': [],
+        'test_regular': [],
+        'test_generalization': [],
+        'train_regular': []
+    }
+    # Look for folders that match our naming patterns
+    for item in input_dir.iterdir():
+        if item.is_dir():
+            if item.name == "stats":
+                datasets['stats'].append(item.name)
+            elif item.name.startswith("test_regular_"):
+                datasets['test_regular'].append(item.name)
+            elif item.name.startswith("test_generalization_"):
+                datasets['test_generalization'].append(item.name)
+            elif item.name.startswith("train_regular_"):
+                datasets['train_regular'].append(item.name)
+    return datasets
+def reconstruct_dataset(dataset_name: str, source_path: Path, target_path: Path, prefix_to_remove: str) -> bool:
+    """Reconstruct a single dataset by removing prefix and moving to target location"""
+    if prefix_to_remove:
+        # Remove the prefix to get the original folder name
+        original_name = dataset_name[len(prefix_to_remove):]
+    else:
+        original_name = dataset_name
+    target_folder = target_path / original_name
+    if target_folder.exists():
+        print(f"Warning: {target_folder} already exists, skipping...")
+        return False
+    try:
+        shutil.copytree(str(source_path), str(target_folder))
+        print(f"Reconstructed: {dataset_name} -> {target_folder}")
+        return True
+    except Exception as e:
+        print(f"Error reconstructing {dataset_name}: {e}")
+        return False
+def main():
+    parser = argparse.ArgumentParser(description="Reconstruct CSI-4CAST dataset folder structure")
+    parser.add_argument("--input-dir", "-i", default=".",
+                       help="Directory containing downloaded datasets (default: current directory)")
+    parser.add_argument("--output-dir", "-o", default="data",
+                       help="Output directory for reconstructed structure (default: 'data')")
+    args = parser.parse_args()
+    input_dir = Path(args.input_dir).resolve()
+    output_dir = Path(args.output_dir).resolve()
+    print(f"Looking for datasets in: {input_dir}")
+    print(f"Reconstructing structure in: {output_dir}")
+    print()
+    # Create the directory structure
+    create_directory_structure(output_dir)
+    # Find all downloaded datasets
+    datasets = find_downloaded_datasets(input_dir)
+    total_reconstructed = 0
+    # Reconstruct stats
+    for dataset in datasets['stats']:
+        source_path = input_dir / dataset
+        target_path = output_dir / "stats"
+        if reconstruct_dataset(dataset, source_path, target_path, ""):
+            total_reconstructed += 1
+    # Reconstruct test/regular datasets
+    for dataset in datasets['test_regular']:
+        source_path = input_dir / dataset
+        target_path = output_dir / "test" / "regular"
+        if reconstruct_dataset(dataset, source_path, target_path, "test_regular_"):
+            total_reconstructed += 1
+    # Reconstruct test/generalization datasets
+    for dataset in datasets['test_generalization']:
+        source_path = input_dir / dataset
+        target_path = output_dir / "test" / "generalization"
+        if reconstruct_dataset(dataset, source_path, target_path, "test_generalization_"):
+            total_reconstructed += 1
+    # Reconstruct train/regular datasets
+    for dataset in datasets['train_regular']:
+        source_path = input_dir / dataset
+        target_path = output_dir / "train" / "regular"
+        if reconstruct_dataset(dataset, source_path, target_path, "train_regular_"):
+            total_reconstructed += 1
+    print()
+    print("✅ Reconstruction complete!")
+    print(f"Total datasets reconstructed: {total_reconstructed}")
+    print(f"Reconstructed structure available at: {output_dir}")
+    print()
+    print("Final structure:")
+    print("data/")
+    print("├── stats/")
+    print("├── test/")
+    print("│   ├── regular/")
+    print("│   └── generalization/")
+    print("└── train/")
+    print("    └── regular/")
+if __name__ == "__main__":
+    main()