Chromaniquej1 commited on
Commit
203ac2f
·
1 Parent(s): 9f2159d

Update sxr_normalization.py

Browse files

Added DocStrings data_loaders/sxr_normalization.py

forecasting/data_loaders/sxr_normalization.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import numpy as np
3
  from pathlib import Path
4
  import glob
@@ -6,13 +5,37 @@ import os
6
 
7
  def compute_sxr_norm(sxr_dir):
8
  """
9
- Compute mean and standard deviation of log10-transformed SXR values.
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- Args:
12
- sxr_dir (str): Path to directory containing SXR .npy files.
 
 
 
13
 
14
- Returns:
15
- tuple: (mean, std) of log10(SXR + 1e-8) values.
 
 
 
 
 
 
 
 
 
 
16
  """
17
  sxr_dir = Path(sxr_dir).resolve()
18
  print(f"Checking SXR directory: {sxr_dir}")
@@ -23,7 +46,7 @@ def compute_sxr_norm(sxr_dir):
23
  sxr_files = sorted(glob.glob(os.path.join(sxr_dir, "*.npy")))
24
  print(f"Found {len(sxr_files)} SXR files in {sxr_dir}")
25
  if len(sxr_files) == 0:
26
- print(f"No files matching '*_sxr.npy' found. Listing directory contents:")
27
  print(os.listdir(sxr_dir)[:10]) # Show first 10 files
28
  raise ValueError(f"No SXR files found in {sxr_dir}")
29
 
@@ -49,9 +72,22 @@ def compute_sxr_norm(sxr_dir):
49
  print(f"Computed SXR normalization: mean={mean}, std={std}")
50
  return mean, std
51
 
 
52
  if __name__ == "__main__":
 
 
 
 
 
 
 
 
 
 
 
 
53
  # Update this path to your real data SXR directory
54
  sxr_dir = "/mnt/data/PAPER_DATA_B/SXR/train" # Replace with actual path
55
  sxr_norm = compute_sxr_norm(sxr_dir)
56
  np.save("/mnt/data/PAPER_DATA_B/SXR/normalized_sxr.npy", sxr_norm)
57
- #print(f"Saved SXR normalization to /mnt/data/ML-Ready-Data-No-Intensity-Cut/normalized_sxr")
 
 
1
  import numpy as np
2
  from pathlib import Path
3
  import glob
 
5
 
6
  def compute_sxr_norm(sxr_dir):
7
  """
8
+ Compute the mean and standard deviation of log10-transformed Soft X-Ray (SXR) flux values.
9
+
10
+ This function scans a given directory containing `.npy` SXR data files,
11
+ loads each file, filters out invalid or non-finite values, applies a logarithmic
12
+ transformation (`log10(SXR + 1e-8)`), and computes the mean and standard deviation
13
+ for normalization purposes. These normalization statistics are typically used
14
+ during model training and inference to ensure consistent SXR scaling.
15
+
16
+ Parameters
17
+ ----------
18
+ sxr_dir : str or Path
19
+ Path to the directory containing `.npy` SXR flux files.
20
 
21
+ Returns
22
+ -------
23
+ tuple of (float, float)
24
+ - mean : Mean of log10-transformed SXR flux values.
25
+ - std : Standard deviation of log10-transformed SXR flux values.
26
 
27
+ Raises
28
+ ------
29
+ FileNotFoundError
30
+ If the specified SXR directory does not exist.
31
+ ValueError
32
+ If no valid `.npy` files or no valid SXR values are found.
33
+
34
+ Notes
35
+ -----
36
+ - Files are expected to contain scalar SXR flux values in W/m².
37
+ - Invalid (non-finite or negative) values are automatically skipped.
38
+ - The logarithmic transform helps stabilize the variance and normalize scale differences.
39
  """
40
  sxr_dir = Path(sxr_dir).resolve()
41
  print(f"Checking SXR directory: {sxr_dir}")
 
46
  sxr_files = sorted(glob.glob(os.path.join(sxr_dir, "*.npy")))
47
  print(f"Found {len(sxr_files)} SXR files in {sxr_dir}")
48
  if len(sxr_files) == 0:
49
+ print(f"No files matching '*.npy' found. Listing directory contents:")
50
  print(os.listdir(sxr_dir)[:10]) # Show first 10 files
51
  raise ValueError(f"No SXR files found in {sxr_dir}")
52
 
 
72
  print(f"Computed SXR normalization: mean={mean}, std={std}")
73
  return mean, std
74
 
75
+
76
  if __name__ == "__main__":
77
+ """
78
+ Command-line entry point for computing and saving SXR normalization statistics.
79
+
80
+ This block allows the script to be executed directly. It:
81
+ 1. Computes log10(SXR) normalization parameters (mean, std) for the dataset.
82
+ 2. Saves the computed normalization values as a NumPy `.npy` file for later use.
83
+
84
+ Notes
85
+ -----
86
+ - Update the `sxr_dir` variable below to point to your actual SXR data directory.
87
+ - The resulting file `normalized_sxr.npy` will be saved in the same SXR directory.
88
+ """
89
  # Update this path to your real data SXR directory
90
  sxr_dir = "/mnt/data/PAPER_DATA_B/SXR/train" # Replace with actual path
91
  sxr_norm = compute_sxr_norm(sxr_dir)
92
  np.save("/mnt/data/PAPER_DATA_B/SXR/normalized_sxr.npy", sxr_norm)
93
+ # print(f"Saved SXR normalization to /mnt/data/ML-Ready-Data-No-Intensity-Cut/normalized_sxr")