christophschirninger commited on
Commit
2bc3cac
·
1 Parent(s): ad8a86b

select latest available goes satellite data

Browse files
Files changed (1) hide show
  1. flaring/data/align_data.py +24 -2
flaring/data/align_data.py CHANGED
@@ -8,6 +8,7 @@ from multiprocessing import Pool, cpu_count
8
  import numpy as np
9
  import pandas as pd
10
  from tqdm import tqdm
 
11
 
12
  warnings.filterwarnings('ignore')
13
 
@@ -64,7 +65,28 @@ def main():
64
 
65
  # Load GOES data
66
  print("Loading GOES data...")
67
- goes = pd.read_csv("/mnt/data2/goes_combined/combined_g18_avg1m_20230701_20230815.csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  goes['time'] = pd.to_datetime(goes['time'], format='%Y-%m-%d %H:%M:%S')
69
 
70
  # Create output directories if they don't exist
@@ -77,7 +99,7 @@ def main():
77
  aia_files_split.append(file.split('/')[4].split('.')[0])
78
 
79
  common_timestamps = [
80
- datetime.fromisoformat(date_str).strftime('%Y-%m-%d %H:%M:%S')
81
  for date_str in aia_files_split]
82
 
83
  # Use all available CPU cores
 
8
  import numpy as np
9
  import pandas as pd
10
  from tqdm import tqdm
11
+ import re
12
 
13
  warnings.filterwarnings('ignore')
14
 
 
65
 
66
  # Load GOES data
67
  print("Loading GOES data...")
68
+ # Directory containing GOES files
69
+ directory = "/mnt/data2/goes_combined"
70
+
71
+ # Regex to match filenames and extract G-number
72
+ pattern = re.compile(r"combined_g(\d+)_avg1m_\d+_\d+\.csv")
73
+
74
+ # Find all files matching the pattern and extract G-numbers
75
+ goes_files = []
76
+ for fname in os.listdir(directory):
77
+ match = pattern.match(fname)
78
+ if match:
79
+ g_number = int(match.group(1))
80
+ goes_files.append((g_number, fname))
81
+
82
+ if not goes_files:
83
+ raise FileNotFoundError("No GOES CSV files found in directory.")
84
+
85
+ # Select file with highest G-number
86
+ goes_files.sort(reverse=True) # Highest G-number first
87
+ _, selected_file = goes_files[0]
88
+
89
+ goes = pd.read_csv(os.path.join(directory, selected_file))
90
  goes['time'] = pd.to_datetime(goes['time'], format='%Y-%m-%d %H:%M:%S')
91
 
92
  # Create output directories if they don't exist
 
99
  aia_files_split.append(file.split('/')[4].split('.')[0])
100
 
101
  common_timestamps = [
102
+ datetime.fromisoformat(date_str).strftime('%Y-%m-%dT%H:%M:%S')
103
  for date_str in aia_files_split]
104
 
105
  # Use all available CPU cores