griffingoodwin04 commited on
Commit
e0c471e
·
1 Parent(s): ca8833c

additional bug fixes

Browse files
flaring/aligning_data.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ from collections import defaultdict
4
+ import numpy as np
5
+ from astropy.io import fits
6
+ import warnings
7
+ import pandas as pd
8
+ warnings.filterwarnings('ignore')
9
+
10
+ import pandas as pd
11
+
12
+ # Directory paths for each wavelength folder.
13
+ wavelength_dirs = {
14
+ "94": "/mnt/data2/AIA_processed_data/94",
15
+ "131": "/mnt/data2/AIA_processed_data/131",
16
+ "171": "/mnt/data2/AIA_processed_data/171",
17
+ "193": "/mnt/data2/AIA_processed_data/193",
18
+ "211": "/mnt/data2/AIA_processed_data/211",
19
+ "304": "/mnt/data2/AIA_processed_data/304"
20
+ }
21
+
22
+ # Regular expression to extract timestamp from file names.
23
+ # Adjust this pattern to match your file naming scheme.
24
+ timestamp_pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}")
25
+
26
+ # Collect timestamps found in each wavelength directory.
27
+ timestamps_found = defaultdict(set)
28
+
29
+ for wavelength, dir_path in wavelength_dirs.items():
30
+ try:
31
+ for filename in os.listdir(dir_path):
32
+ match = timestamp_pattern.search(filename)
33
+ if match:
34
+ ts = match.group(0)
35
+ timestamps_found[ts].add(wavelength)
36
+ except Exception as e:
37
+ print(f"Could not read directory {dir_path}: {e}")
38
+
39
+ # Identify timestamps that exist in all wavelength folders.
40
+ all_wavelengths = set(wavelength_dirs.keys())
41
+ common_timestamps = [ts for ts, waves in timestamps_found.items() if waves == all_wavelengths]
42
+
43
+ # Identify which timestamps are missing files for some wavelengths.
44
+ missing_files = {
45
+ ts: list(all_wavelengths - waves)
46
+ for ts, waves in timestamps_found.items() if waves != all_wavelengths
47
+ }
48
+
49
+ print("Timestamps present in all wavelength folders:")
50
+ for ts in sorted(common_timestamps):
51
+ print(ts)
52
+
53
+ print("\nTimestamps with missing wavelength files:")
54
+ for ts, missing in missing_files.items():
55
+ print(f"{ts}: missing {', '.join(sorted(missing))}")
56
+
57
+
58
+ goes = pd.read_csv("/mnt/data/goes_combined/combined_g18_avg1m_20230701_20230815.csv")
59
+ # Convert 'time' column to datetime
60
+ goes['time'] = pd.to_datetime(goes['time'], format='%Y-%m-%d %H:%M:%S')
61
+
62
+
63
+ # Initialize the array to store all wavelength data
64
+ data_shape = (6, 512, 512)
65
+
66
+
67
+ # Map wavelengths to array indices
68
+ wavelength_to_idx = {
69
+ '94': 0,
70
+ '131': 1,
71
+ '171': 2,
72
+ '193': 3,
73
+ '211': 4,
74
+ '304': 5
75
+ }
76
+
77
+ # Load data for each timestamp and wavelength
78
+ for time_idx, timestamp in enumerate(common_timestamps):
79
+ sxr = goes[goes['time'] == pd.to_datetime(timestamp)]
80
+ sxr_a = sxr['xrsa_flux'].values[0] if not sxr.empty else None
81
+ sxr_b = sxr['xrsb_flux'].values[0] if not sxr.empty else None
82
+ if sxr_a is None or sxr_b is None:
83
+ print(f"Missing SXR data for timestamp {timestamp}, skipping...")
84
+ continue
85
+ wavelength_data = np.zeros(data_shape, dtype=np.float32)
86
+ sxr_a_data = np.zeros(1, dtype=np.float32)
87
+ sxr_b_data = np.zeros(1, dtype=np.float32)
88
+ sxr_a_data[0] = sxr_a if sxr_a is not None else np.nan
89
+ sxr_b_data[0] = sxr_b if sxr_b is not None else np.nan
90
+ print(f"Processing timestamp: {timestamp} (Index: {time_idx})")
91
+ for wavelength, wave_idx in wavelength_to_idx.items():
92
+ filepath = os.path.join(wavelength_dirs[wavelength], f"{timestamp}.fits")
93
+ with fits.open(filepath) as hdul:
94
+ wavelength_data[wave_idx] = hdul[0].data
95
+ # Store the wavelength data for this timestamp
96
+ np.save(f"/mnt/data2/ML-Ready-Data-No-Intensity-Cut/AIA-Data/{timestamp}.npy", wavelength_data)
97
+ # Store the SXR data
98
+ np.save(f"/mnt/data2/ML-Ready-Data-No-Intensity-Cut/GOES-18-SXR-A/{timestamp}.npy", sxr_a_data)
99
+ np.save(f"/mnt/data2/ML-Ready-Data-No-Intensity-Cut/GOES-18-SXR-B/{timestamp}.npy", sxr_b_data)
100
+ print(f"Saved data for timestamp {timestamp} to disk.")
101
+ print(f"Percent: {time_idx + 1} / {len(common_timestamps)}")
flaring/sxr_downloader.py CHANGED
@@ -111,10 +111,12 @@ class SXRDownloader:
111
 
112
  try:
113
  combined_ds = xr.concat(datasets, dim='time').sortby('time')
 
114
  if satellite_name in ['GOES-13', 'GOES-14', 'GOES-15']:
115
  combined_ds['xrsa_flux'] = combined_ds['xrsa_flux'] / .85
116
  combined_ds['xrsb_flux'] = combined_ds['xrsb_flux'] / .7
117
  df = combined_ds.to_dataframe().reset_index()
 
118
  if 'quad_diode' in df.columns:
119
  df = df[df['quad_diode'] == 0] # Filter out quad diode data
120
  df['time'] = pd.to_datetime(df['time'])
 
111
 
112
  try:
113
  combined_ds = xr.concat(datasets, dim='time').sortby('time')
114
+ #Scaling factors for GOES-13, GOES-14, and GOES-15
115
  if satellite_name in ['GOES-13', 'GOES-14', 'GOES-15']:
116
  combined_ds['xrsa_flux'] = combined_ds['xrsa_flux'] / .85
117
  combined_ds['xrsb_flux'] = combined_ds['xrsb_flux'] / .7
118
  df = combined_ds.to_dataframe().reset_index()
119
+ #
120
  if 'quad_diode' in df.columns:
121
  df = df[df['quad_diode'] == 0] # Filter out quad diode data
122
  df['time'] = pd.to_datetime(df['time'])