SIKAI-C commited on
Commit
aa28c84
Β·
verified Β·
1 Parent(s): 078d201

Create reconstruction.py

Browse files
Files changed (1) hide show
  1. reconstruction.py +146 -0
reconstruction.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Reconstruction script for CSI-4CAST datasets.
4
+
5
+ This script helps users reconstruct the original folder structure after downloading
6
+ datasets from the CSI-4CAST Hugging Face organization.
7
+
8
+ Usage:
9
+ python reconstruction.py [--input-dir INPUT_DIR] [--output-dir OUTPUT_DIR]
10
+
11
+ If no arguments provided, it will look for downloaded datasets in the current directory
12
+ and reconstruct the structure in a 'data' folder.
13
+ """
14
+
15
+ import argparse
16
+ import shutil
17
+ from pathlib import Path
18
+
19
+
20
+ def create_directory_structure(base_path: Path):
21
+ """Create the original directory structure"""
22
+ dirs_to_create = [
23
+ "stats",
24
+ "test/regular",
25
+ "test/generalization",
26
+ "train/regular"
27
+ ]
28
+
29
+ for dir_path in dirs_to_create:
30
+ full_path = base_path / dir_path
31
+ full_path.mkdir(parents=True, exist_ok=True)
32
+ print(f"Created directory: {full_path}")
33
+
34
+ def find_downloaded_datasets(input_dir: Path):
35
+ """Find all downloaded dataset folders"""
36
+ datasets = {
37
+ 'stats': [],
38
+ 'test_regular': [],
39
+ 'test_generalization': [],
40
+ 'train_regular': []
41
+ }
42
+
43
+ # Look for folders that match our naming patterns
44
+ for item in input_dir.iterdir():
45
+ if item.is_dir():
46
+ if item.name == "stats":
47
+ datasets['stats'].append(item.name)
48
+ elif item.name.startswith("test_regular_"):
49
+ datasets['test_regular'].append(item.name)
50
+ elif item.name.startswith("test_generalization_"):
51
+ datasets['test_generalization'].append(item.name)
52
+ elif item.name.startswith("train_regular_"):
53
+ datasets['train_regular'].append(item.name)
54
+
55
+ return datasets
56
+
57
+ def reconstruct_dataset(dataset_name: str, source_path: Path, target_path: Path, prefix_to_remove: str) -> bool:
58
+ """Reconstruct a single dataset by removing prefix and moving to target location"""
59
+ if prefix_to_remove:
60
+ # Remove the prefix to get the original folder name
61
+ original_name = dataset_name[len(prefix_to_remove):]
62
+ else:
63
+ original_name = dataset_name
64
+
65
+ target_folder = target_path / original_name
66
+
67
+ if target_folder.exists():
68
+ print(f"Warning: {target_folder} already exists, skipping...")
69
+ return False
70
+
71
+ try:
72
+ shutil.copytree(str(source_path), str(target_folder))
73
+ print(f"Reconstructed: {dataset_name} -> {target_folder}")
74
+ return True
75
+ except Exception as e:
76
+ print(f"Error reconstructing {dataset_name}: {e}")
77
+ return False
78
+
79
+ def main():
80
+ parser = argparse.ArgumentParser(description="Reconstruct CSI-4CAST dataset folder structure")
81
+ parser.add_argument("--input-dir", "-i", default=".",
82
+ help="Directory containing downloaded datasets (default: current directory)")
83
+ parser.add_argument("--output-dir", "-o", default="data",
84
+ help="Output directory for reconstructed structure (default: 'data')")
85
+
86
+ args = parser.parse_args()
87
+
88
+ input_dir = Path(args.input_dir).resolve()
89
+ output_dir = Path(args.output_dir).resolve()
90
+
91
+ print(f"Looking for datasets in: {input_dir}")
92
+ print(f"Reconstructing structure in: {output_dir}")
93
+ print()
94
+
95
+ # Create the directory structure
96
+ create_directory_structure(output_dir)
97
+
98
+ # Find all downloaded datasets
99
+ datasets = find_downloaded_datasets(input_dir)
100
+
101
+ total_reconstructed = 0
102
+
103
+ # Reconstruct stats
104
+ for dataset in datasets['stats']:
105
+ source_path = input_dir / dataset
106
+ target_path = output_dir / "stats"
107
+ if reconstruct_dataset(dataset, source_path, target_path, ""):
108
+ total_reconstructed += 1
109
+
110
+ # Reconstruct test/regular datasets
111
+ for dataset in datasets['test_regular']:
112
+ source_path = input_dir / dataset
113
+ target_path = output_dir / "test" / "regular"
114
+ if reconstruct_dataset(dataset, source_path, target_path, "test_regular_"):
115
+ total_reconstructed += 1
116
+
117
+ # Reconstruct test/generalization datasets
118
+ for dataset in datasets['test_generalization']:
119
+ source_path = input_dir / dataset
120
+ target_path = output_dir / "test" / "generalization"
121
+ if reconstruct_dataset(dataset, source_path, target_path, "test_generalization_"):
122
+ total_reconstructed += 1
123
+
124
+ # Reconstruct train/regular datasets
125
+ for dataset in datasets['train_regular']:
126
+ source_path = input_dir / dataset
127
+ target_path = output_dir / "train" / "regular"
128
+ if reconstruct_dataset(dataset, source_path, target_path, "train_regular_"):
129
+ total_reconstructed += 1
130
+
131
+ print()
132
+ print("βœ… Reconstruction complete!")
133
+ print(f"Total datasets reconstructed: {total_reconstructed}")
134
+ print(f"Reconstructed structure available at: {output_dir}")
135
+ print()
136
+ print("Final structure:")
137
+ print("data/")
138
+ print("β”œβ”€β”€ stats/")
139
+ print("β”œβ”€β”€ test/")
140
+ print("β”‚ β”œβ”€β”€ regular/")
141
+ print("β”‚ └── generalization/")
142
+ print("└── train/")
143
+ print(" └── regular/")
144
+
145
+ if __name__ == "__main__":
146
+ main()