christophschirninger commited on
Commit
8ce6086
·
1 Parent(s): 6a114fd

clear code for ITI preprocessing including normalization

Browse files
Files changed (1) hide show
  1. flaring/iti_data_processing.py +58 -82
flaring/iti_data_processing.py CHANGED
@@ -1,93 +1,69 @@
1
- from itipy.data.dataset import BaseDataset
2
- from itipy.data.editor import LoadMapEditor, NormalizeRadiusEditor, MapToDataEditor, AIAPrepEditor
 
 
 
 
 
 
 
 
 
3
  import os
4
  import glob
5
- from astropy.io import fits
6
- from astropy.io.fits import Header, PrimaryHDU
7
  import tqdm as tqdm
8
  import multiprocessing as mp
9
  from functools import partial
10
 
11
  # Configuration for all wavelengths to process
12
  wavelengths = [94, 131, 171, 193, 211, 304]
13
- base_input_folder = '/mnt/data/SDO-AIA'
14
- output_folder = '/mnt/data2/AIA_processed_data'
15
  os.makedirs(output_folder, exist_ok=True)
16
 
17
-
18
- def process_file(fits_file, wavelength, resolution=512):
19
- """Process a single FITS file with the specified wavelength and resolution."""
20
- try:
21
- editors = [
22
- LoadMapEditor(),
23
- NormalizeRadiusEditor(resolution),
24
- AIAPrepEditor(calibration='auto'),
25
- MapToDataEditor()
26
- ]
27
-
28
- dataset = BaseDataset([fits_file], editors=editors, ext='.fits',
29
- wavelength=wavelength, resolution=resolution)
30
-
31
- data, meta = dataset.convertData([fits_file])
32
- meta_header = meta['header']
33
- del meta_header['keycomments']
34
-
35
- # Create wavelength subfolder
36
- wavelength_folder = os.path.join(output_folder, str(wavelength))
37
- os.makedirs(wavelength_folder, exist_ok=True)
38
-
39
- output_file = os.path.join(wavelength_folder, os.path.basename(fits_file))
40
- fits.writeto(output_file, data, header=Header(meta_header), overwrite=True)
41
- return output_file
42
- except Exception as e:
43
- pass
44
-
45
-
46
- def process_wavelength(wavelength):
47
- """Process files for a specific wavelength."""
48
- input_folder = os.path.join(base_input_folder, str(wavelength))
49
-
50
- # 🔎 Collect all .fits files
51
- fits_files = glob.glob(os.path.join(input_folder, '*.fits'))
52
-
53
- files_to_process = []
54
- skipped_count = 0
55
-
56
- for fits_file in fits_files:
57
- # Generate expected output filename (adjust this logic based on your process_file function)
58
- base_name = os.path.splitext(os.path.basename(fits_file))[0]
59
- output_file = os.path.join(output_folder, str(wavelength), f"{base_name}.fits")
60
-
61
- # Check if output file already exists
62
- if os.path.exists(output_file):
63
- skipped_count += 1
64
  else:
65
- files_to_process.append(fits_file)
66
-
67
- print(f"Found {len(files_to_process)} files for wavelength {wavelength}")
68
- print(f"Skipping {skipped_count} already processed files")
69
- print(f"Processing {len(files_to_process)} remaining files...")
70
-
71
- if not files_to_process:
72
- print("All files already processed!")
73
- return []
74
-
75
- print(f"Processing {len(fits_files)} files for wavelength {wavelength}...")
76
-
77
- # Create partial function with wavelength parameter fixed
78
- process_func = partial(process_file, wavelength=wavelength)
79
-
80
- # Process files with multiprocessing
81
- with mp.Pool(processes=mp.cpu_count()) as pool:
82
- results = list(tqdm.tqdm(
83
- pool.imap(process_func, files_to_process),
84
- total=len(files_to_process),
85
- desc=f"Processing {wavelength}Å files"
86
- ))
87
-
88
- return results
89
-
90
-
91
- # Process all wavelengths
92
- for wavelength in wavelengths:
93
- process_wavelength(wavelength)
 
1
+ import collections.abc
2
+ collections.Iterable = collections.abc.Iterable
3
+ collections.Mapping = collections.abc.Mapping
4
+ collections.MutableSet = collections.abc.MutableSet
5
+ collections.MutableMapping = collections.abc.MutableMapping
6
+ # Now import hyper
7
+ import numpy as np
8
+ from astropy.visualization import ImageNormalize, AsinhStretch
9
+ from itipy.data.dataset import SDODataset, StackDataset, get_intersecting_files, AIADataset
10
+ from itipy.data.editor import LoadMapEditor, NormalizeRadiusEditor, MapToDataEditor, AIAPrepEditor, \
11
+ BrightestPixelPatchEditor
12
  import os
13
  import glob
14
+ from multiprocessing import Pool, cpu_count
 
15
  import tqdm as tqdm
16
  import multiprocessing as mp
17
  from functools import partial
18
 
19
  # Configuration for all wavelengths to process
20
  wavelengths = [94, 131, 171, 193, 211, 304]
21
+ base_input_folder = '/mnt/data2/SDO-AIA'
22
+ output_folder = '/mnt/data2/AIA_processed'
23
  os.makedirs(output_folder, exist_ok=True)
24
 
25
+ sdo_norms = {
26
+ '94': ImageNormalize(vmin=0, vmax=np.float32(16.560747), stretch=AsinhStretch(0.005), clip=True),
27
+ '131': ImageNormalize(vmin=0, vmax=np.float32(75.84181), stretch=AsinhStretch(0.005), clip=True),
28
+ '171': ImageNormalize(vmin=0, vmax=np.float32(1536.1443), stretch=AsinhStretch(0.005), clip=True),
29
+ '193': ImageNormalize(vmin=0, vmax=np.float32(2288.1), stretch=AsinhStretch(0.005), clip=True),
30
+ '211': ImageNormalize(vmin=0, vmax=np.float32(1163.9178), stretch=AsinhStretch(0.005), clip=True),
31
+ '304': ImageNormalize(vmin=0, vmax=np.float32(401.82352), stretch=AsinhStretch(0.001), clip=True),
32
+ }
33
+
34
+ class SDODataset_flaring(StackDataset):
35
+ """
36
+ Dataset for SDO data
37
+
38
+ Args:
39
+ data: Data
40
+ patch_shape (tuple): Patch shape
41
+ wavelengths (list): List of wavelengths
42
+ resolution (int): Resolution
43
+ ext (str): File extension
44
+ **kwargs: Additional arguments
45
+ """
46
+ def __init__(self, data, patch_shape=None, wavelengths=None, resolution=2048, ext='.fits', **kwargs):
47
+ wavelengths = [171, 193, 211, 304, 6173, ] if wavelengths is None else wavelengths
48
+ if isinstance(data, list):
49
+ paths = data
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  else:
51
+ paths = get_intersecting_files(data, wavelengths, ext=ext, **kwargs)
52
+ ds_mapping = {94:AIADataset, 131: AIADataset, 171: AIADataset, 193: AIADataset, 211: AIADataset, 304: AIADataset}
53
+ data_sets = [ds_mapping[wl_id](files, wavelength=wl_id, resolution=resolution, ext=ext)
54
+ for wl_id, files in zip(wavelengths, paths)]
55
+ super().__init__(data_sets, **kwargs)
56
+ if patch_shape is not None:
57
+ self.addEditor(BrightestPixelPatchEditor(patch_shape))
58
+
59
+ aia_dataset = SDODataset_flaring(data=base_input_folder, wavelengths=wavelengths, resolution=512)
60
+
61
+ def save_sample(i):
62
+ data = aia_dataset[i]
63
+ file_path = os.path.join(output_folder, aia_dataset.getId(i)) + '.npy'
64
+ if os.path.exists(file_path):
65
+ return # Skip if file already exists
66
+ np.save(file_path, data)
67
+
68
+ with Pool(processes=90) as pool:
69
+ list(tqdm.tqdm(pool.imap(save_sample, range(len(aia_dataset))), total=len(aia_dataset)))