Commit ·
e522a44
1
Parent(s): e91166a
Fixed normalization of timestamp names
Browse files
data/split_data.py
CHANGED
|
@@ -5,6 +5,14 @@ import argparse
|
|
| 5 |
from datetime import datetime
|
| 6 |
from pathlib import Path
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
def split_data(input_folder, output_dir, data_type, flare_events_csv=None, repartition=False,
|
| 9 |
train_start=None, train_end=None, val_start=None, val_end=None,
|
| 10 |
test_start=None, test_end=None, use_buffer_strategy=False, copy_files=False):
|
|
@@ -138,7 +146,7 @@ def split_data(input_folder, output_dir, data_type, flare_events_csv=None, repar
|
|
| 138 |
|
| 139 |
try:
|
| 140 |
# Extract timestamp from filename (assuming format like "2012-01-01T00:00:00.npy")
|
| 141 |
-
file_time = pd.to_datetime(file.split(".")[0])
|
| 142 |
except ValueError:
|
| 143 |
print(f"Skipping file {file}: Invalid timestamp format")
|
| 144 |
skipped_count += 1
|
|
|
|
| 5 |
from datetime import datetime
|
| 6 |
from pathlib import Path
|
| 7 |
|
| 8 |
+
|
| 9 |
+
def _normalize_timestamp(ts: str) -> str:
|
| 10 |
+
"""Normalize timestamp strings with underscores instead of colons (cross-platform filenames)."""
|
| 11 |
+
if 'T' in ts:
|
| 12 |
+
date_part, time_part = ts.split('T', 1)
|
| 13 |
+
return f"{date_part}T{time_part.replace('_', ':')}"
|
| 14 |
+
return ts
|
| 15 |
+
|
| 16 |
def split_data(input_folder, output_dir, data_type, flare_events_csv=None, repartition=False,
|
| 17 |
train_start=None, train_end=None, val_start=None, val_end=None,
|
| 18 |
test_start=None, test_end=None, use_buffer_strategy=False, copy_files=False):
|
|
|
|
| 146 |
|
| 147 |
try:
|
| 148 |
# Extract timestamp from filename (assuming format like "2012-01-01T00:00:00.npy")
|
| 149 |
+
file_time = pd.to_datetime(_normalize_timestamp(file.split(".")[0]))
|
| 150 |
except ValueError:
|
| 151 |
print(f"Skipping file {file}: Invalid timestamp format")
|
| 152 |
skipped_count += 1
|
forecasting/data_loaders/SDOAIA_dataloader.py
CHANGED
|
@@ -12,6 +12,14 @@ import glob
|
|
| 12 |
import os
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
class SXRLogNormTransform:
|
| 16 |
"""Picklable SXR log-normalization transform (replaces T.Lambda for spawn compatibility)."""
|
| 17 |
def __init__(self, mean: float, std: float):
|
|
@@ -85,7 +93,7 @@ class AIA_GOESDataset(torch.utils.data.Dataset):
|
|
| 85 |
valid_samples = []
|
| 86 |
for f in aia_files:
|
| 87 |
timestamp = f.stem
|
| 88 |
-
timestamp_dt = pd.to_datetime(timestamp)
|
| 89 |
|
| 90 |
if self.reference_time is None:
|
| 91 |
self.reference_time = timestamp_dt
|
|
|
|
| 12 |
import os
|
| 13 |
|
| 14 |
|
| 15 |
+
def _normalize_timestamp(ts: str) -> str:
|
| 16 |
+
"""Normalize timestamp strings with underscores instead of colons (cross-platform filenames)."""
|
| 17 |
+
if 'T' in ts:
|
| 18 |
+
date_part, time_part = ts.split('T', 1)
|
| 19 |
+
return f"{date_part}T{time_part.replace('_', ':')}"
|
| 20 |
+
return ts
|
| 21 |
+
|
| 22 |
+
|
| 23 |
class SXRLogNormTransform:
|
| 24 |
"""Picklable SXR log-normalization transform (replaces T.Lambda for spawn compatibility)."""
|
| 25 |
def __init__(self, mean: float, std: float):
|
|
|
|
| 93 |
valid_samples = []
|
| 94 |
for f in aia_files:
|
| 95 |
timestamp = f.stem
|
| 96 |
+
timestamp_dt = pd.to_datetime(_normalize_timestamp(timestamp))
|
| 97 |
|
| 98 |
if self.reference_time is None:
|
| 99 |
self.reference_time = timestamp_dt
|