Chromaniquej1 commited on
Commit ·
203ac2f
1
Parent(s): 9f2159d
Update sxr_normalization.py
Browse filesAdded DocStrings data_loaders/sxr_normalization.py
forecasting/data_loaders/sxr_normalization.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import numpy as np
|
| 3 |
from pathlib import Path
|
| 4 |
import glob
|
|
@@ -6,13 +5,37 @@ import os
|
|
| 6 |
|
| 7 |
def compute_sxr_norm(sxr_dir):
|
| 8 |
"""
|
| 9 |
-
Compute mean and standard deviation of log10-transformed SXR values.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
"""
|
| 17 |
sxr_dir = Path(sxr_dir).resolve()
|
| 18 |
print(f"Checking SXR directory: {sxr_dir}")
|
|
@@ -23,7 +46,7 @@ def compute_sxr_norm(sxr_dir):
|
|
| 23 |
sxr_files = sorted(glob.glob(os.path.join(sxr_dir, "*.npy")))
|
| 24 |
print(f"Found {len(sxr_files)} SXR files in {sxr_dir}")
|
| 25 |
if len(sxr_files) == 0:
|
| 26 |
-
print(f"No files matching '*
|
| 27 |
print(os.listdir(sxr_dir)[:10]) # Show first 10 files
|
| 28 |
raise ValueError(f"No SXR files found in {sxr_dir}")
|
| 29 |
|
|
@@ -49,9 +72,22 @@ def compute_sxr_norm(sxr_dir):
|
|
| 49 |
print(f"Computed SXR normalization: mean={mean}, std={std}")
|
| 50 |
return mean, std
|
| 51 |
|
|
|
|
| 52 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
# Update this path to your real data SXR directory
|
| 54 |
sxr_dir = "/mnt/data/PAPER_DATA_B/SXR/train" # Replace with actual path
|
| 55 |
sxr_norm = compute_sxr_norm(sxr_dir)
|
| 56 |
np.save("/mnt/data/PAPER_DATA_B/SXR/normalized_sxr.npy", sxr_norm)
|
| 57 |
-
#print(f"Saved SXR normalization to /mnt/data/ML-Ready-Data-No-Intensity-Cut/normalized_sxr")
|
|
|
|
|
|
|
| 1 |
import numpy as np
|
| 2 |
from pathlib import Path
|
| 3 |
import glob
|
|
|
|
| 5 |
|
| 6 |
def compute_sxr_norm(sxr_dir):
|
| 7 |
"""
|
| 8 |
+
Compute the mean and standard deviation of log10-transformed Soft X-Ray (SXR) flux values.
|
| 9 |
+
|
| 10 |
+
This function scans a given directory containing `.npy` SXR data files,
|
| 11 |
+
loads each file, filters out invalid or non-finite values, applies a logarithmic
|
| 12 |
+
transformation (`log10(SXR + 1e-8)`), and computes the mean and standard deviation
|
| 13 |
+
for normalization purposes. These normalization statistics are typically used
|
| 14 |
+
during model training and inference to ensure consistent SXR scaling.
|
| 15 |
+
|
| 16 |
+
Parameters
|
| 17 |
+
----------
|
| 18 |
+
sxr_dir : str or Path
|
| 19 |
+
Path to the directory containing `.npy` SXR flux files.
|
| 20 |
|
| 21 |
+
Returns
|
| 22 |
+
-------
|
| 23 |
+
tuple of (float, float)
|
| 24 |
+
- mean : Mean of log10-transformed SXR flux values.
|
| 25 |
+
- std : Standard deviation of log10-transformed SXR flux values.
|
| 26 |
|
| 27 |
+
Raises
|
| 28 |
+
------
|
| 29 |
+
FileNotFoundError
|
| 30 |
+
If the specified SXR directory does not exist.
|
| 31 |
+
ValueError
|
| 32 |
+
If no valid `.npy` files or no valid SXR values are found.
|
| 33 |
+
|
| 34 |
+
Notes
|
| 35 |
+
-----
|
| 36 |
+
- Files are expected to contain scalar SXR flux values in W/m².
|
| 37 |
+
- Invalid (non-finite or negative) values are automatically skipped.
|
| 38 |
+
- The logarithmic transform helps stabilize the variance and normalize scale differences.
|
| 39 |
"""
|
| 40 |
sxr_dir = Path(sxr_dir).resolve()
|
| 41 |
print(f"Checking SXR directory: {sxr_dir}")
|
|
|
|
| 46 |
sxr_files = sorted(glob.glob(os.path.join(sxr_dir, "*.npy")))
|
| 47 |
print(f"Found {len(sxr_files)} SXR files in {sxr_dir}")
|
| 48 |
if len(sxr_files) == 0:
|
| 49 |
+
print(f"No files matching '*.npy' found. Listing directory contents:")
|
| 50 |
print(os.listdir(sxr_dir)[:10]) # Show first 10 files
|
| 51 |
raise ValueError(f"No SXR files found in {sxr_dir}")
|
| 52 |
|
|
|
|
| 72 |
print(f"Computed SXR normalization: mean={mean}, std={std}")
|
| 73 |
return mean, std
|
| 74 |
|
| 75 |
+
|
| 76 |
if __name__ == "__main__":
|
| 77 |
+
"""
|
| 78 |
+
Command-line entry point for computing and saving SXR normalization statistics.
|
| 79 |
+
|
| 80 |
+
This block allows the script to be executed directly. It:
|
| 81 |
+
1. Computes log10(SXR) normalization parameters (mean, std) for the dataset.
|
| 82 |
+
2. Saves the computed normalization values as a NumPy `.npy` file for later use.
|
| 83 |
+
|
| 84 |
+
Notes
|
| 85 |
+
-----
|
| 86 |
+
- Update the `sxr_dir` variable below to point to your actual SXR data directory.
|
| 87 |
+
- The resulting file `normalized_sxr.npy` will be saved in the same SXR directory.
|
| 88 |
+
"""
|
| 89 |
# Update this path to your real data SXR directory
|
| 90 |
sxr_dir = "/mnt/data/PAPER_DATA_B/SXR/train" # Replace with actual path
|
| 91 |
sxr_norm = compute_sxr_norm(sxr_dir)
|
| 92 |
np.save("/mnt/data/PAPER_DATA_B/SXR/normalized_sxr.npy", sxr_norm)
|
| 93 |
+
# print(f"Saved SXR normalization to /mnt/data/ML-Ready-Data-No-Intensity-Cut/normalized_sxr")
|