griffingoodwin04 commited on
Commit
e522a44
·
1 Parent(s): e91166a

Fixed normalization of timestamp names

Browse files
data/split_data.py CHANGED
@@ -5,6 +5,14 @@ import argparse
5
  from datetime import datetime
6
  from pathlib import Path
7
 
 
 
 
 
 
 
 
 
8
  def split_data(input_folder, output_dir, data_type, flare_events_csv=None, repartition=False,
9
  train_start=None, train_end=None, val_start=None, val_end=None,
10
  test_start=None, test_end=None, use_buffer_strategy=False, copy_files=False):
@@ -138,7 +146,7 @@ def split_data(input_folder, output_dir, data_type, flare_events_csv=None, repar
138
 
139
  try:
140
  # Extract timestamp from filename (assuming format like "2012-01-01T00:00:00.npy")
141
- file_time = pd.to_datetime(file.split(".")[0])
142
  except ValueError:
143
  print(f"Skipping file {file}: Invalid timestamp format")
144
  skipped_count += 1
 
5
  from datetime import datetime
6
  from pathlib import Path
7
 
8
+
9
+ def _normalize_timestamp(ts: str) -> str:
10
+ """Normalize timestamp strings with underscores instead of colons (cross-platform filenames)."""
11
+ if 'T' in ts:
12
+ date_part, time_part = ts.split('T', 1)
13
+ return f"{date_part}T{time_part.replace('_', ':')}"
14
+ return ts
15
+
16
  def split_data(input_folder, output_dir, data_type, flare_events_csv=None, repartition=False,
17
  train_start=None, train_end=None, val_start=None, val_end=None,
18
  test_start=None, test_end=None, use_buffer_strategy=False, copy_files=False):
 
146
 
147
  try:
148
  # Extract timestamp from filename (assuming format like "2012-01-01T00:00:00.npy")
149
+ file_time = pd.to_datetime(_normalize_timestamp(file.split(".")[0]))
150
  except ValueError:
151
  print(f"Skipping file {file}: Invalid timestamp format")
152
  skipped_count += 1
forecasting/data_loaders/SDOAIA_dataloader.py CHANGED
@@ -12,6 +12,14 @@ import glob
12
  import os
13
 
14
 
 
 
 
 
 
 
 
 
15
  class SXRLogNormTransform:
16
  """Picklable SXR log-normalization transform (replaces T.Lambda for spawn compatibility)."""
17
  def __init__(self, mean: float, std: float):
@@ -85,7 +93,7 @@ class AIA_GOESDataset(torch.utils.data.Dataset):
85
  valid_samples = []
86
  for f in aia_files:
87
  timestamp = f.stem
88
- timestamp_dt = pd.to_datetime(timestamp)
89
 
90
  if self.reference_time is None:
91
  self.reference_time = timestamp_dt
 
12
  import os
13
 
14
 
15
+ def _normalize_timestamp(ts: str) -> str:
16
+ """Normalize timestamp strings with underscores instead of colons (cross-platform filenames)."""
17
+ if 'T' in ts:
18
+ date_part, time_part = ts.split('T', 1)
19
+ return f"{date_part}T{time_part.replace('_', ':')}"
20
+ return ts
21
+
22
+
23
  class SXRLogNormTransform:
24
  """Picklable SXR log-normalization transform (replaces T.Lambda for spawn compatibility)."""
25
  def __init__(self, mean: float, std: float):
 
93
  valid_samples = []
94
  for f in aia_files:
95
  timestamp = f.stem
96
+ timestamp_dt = pd.to_datetime(_normalize_timestamp(timestamp))
97
 
98
  if self.reference_time is None:
99
  self.reference_time = timestamp_dt