FOXES / data /sxr_normalization.py

made changes to directory locations... removed some unnecessary files

b6d3638 5 months ago

4.39 kB

	import numpy as np
	from pathlib import Path
	import glob
	import os
	import argparse

	def compute_sxr_norm(sxr_dir):
	"""
	Compute the mean and standard deviation of log10-transformed Soft X-Ray (SXR) flux values.

	This function scans a given directory containing `.npy` SXR data files,
	loads each file, filters out invalid or non-finite values, applies a logarithmic
	transformation (`log10(SXR + 1e-8)`), and computes the mean and standard deviation
	for normalization purposes. These normalization statistics are typically used
	during model training and inference to ensure consistent SXR scaling.

	Parameters
	----------
	sxr_dir : str or Path
	Path to the directory containing `.npy` SXR flux files.

	Returns
	-------
	tuple of (float, float)
	- mean : Mean of log10-transformed SXR flux values.
	- std : Standard deviation of log10-transformed SXR flux values.

	Raises
	------
	FileNotFoundError
	If the specified SXR directory does not exist.
	ValueError
	If no valid `.npy` files or no valid SXR values are found.

	Notes
	-----
	- Files are expected to contain scalar SXR flux values in W/m².
	- Invalid (non-finite or negative) values are automatically skipped.
	- The logarithmic transform helps stabilize the variance and normalize scale differences.
	"""
	sxr_dir = Path(sxr_dir).resolve()
	print(f"Checking SXR directory: {sxr_dir}")
	if not sxr_dir.is_dir():
	raise FileNotFoundError(f"SXR directory does not exist or is not a directory: {sxr_dir}")

	# Use glob for case-insensitive matching
	sxr_files = sorted(glob.glob(os.path.join(sxr_dir, "*.npy")))
	print(f"Found {len(sxr_files)} SXR files in {sxr_dir}")
	if len(sxr_files) == 0:
	print(f"No files matching '*.npy' found. Listing directory contents:")
	print(os.listdir(sxr_dir)[:10]) # Show first 10 files
	raise ValueError(f"No SXR files found in {sxr_dir}")

	sxr_values = []
	for f in sxr_files:
	try:
	sxr = np.load(f)
	sxr = np.atleast_1d(sxr).flatten()[0]
	if not np.isfinite(sxr) or sxr < 0:
	print(f"Skipping invalid SXR value in {f}: {sxr}")
	continue
	sxr_values.append(np.log10(sxr))
	except Exception as e:
	print(f"Failed to load SXR file {f}: {e}")
	continue

	sxr_values = np.array(sxr_values)
	if len(sxr_values) == 0:
	raise ValueError(f"No valid SXR values found in {sxr_dir}. All files failed to load or contained invalid data.")

	mean = np.mean(sxr_values)
	std = np.std(sxr_values)
	print(f"Computed SXR normalization: mean={mean}, std={std}")
	return mean, std


	if __name__ == "__main__":
	"""
	Command-line interface for calculating and saving SXR normalization parameters.

	When run as a script, this entry point:
	1. Computes the normalization statistics (mean and standard deviation of log10(SXR flux))
	for all valid `.npy` SXR files in the specified directory.
	2. Saves the resulting (mean, std) tuple as a NumPy `.npy` file at the user-specified output path.

	Usage
	-----
	python sxr_normalization.py --sxr_dir /path/to/SXR/dir --output_path /where/to/save/normalized_sxr.npy

	Arguments
	---------
	--sxr_dir : str
	Path to the directory containing SXR `.npy` data files.
	--output_path : str
	Path where the computed normalization statistics (mean, std) will be saved.

	Notes
	-----
	- By default, --sxr_dir and --output_path are set for the paper dataset example.
	- The output file `normalized_sxr.npy` contains a two-element array: [mean, std]
	- These statistics are expected to be used for normalizing GOES SXR flux data in the ML pipeline.
	"""

	parser = argparse.ArgumentParser(description='Compute and save SXR normalization statistics.')
	parser.add_argument('--sxr_dir', type=str, default='/mnt/data/PAPER/SXR/train', help='Path to the SXR data directory.')
	parser.add_argument('--output_path', type=str, default='/mnt/data/PAPER/SXR/normalized_sxr.npy', help='Path to save the normalized SXR data.')
	args = parser.parse_args()

	sxr_norm = compute_sxr_norm(args.sxr_dir)
	np.save(args.output_path, sxr_norm)
	print(f"Saved SXR normalization to {args.output_path}")