griffingoodwin04 commited on
Commit
d39cef0
·
1 Parent(s): f6e753d

right split_data.py

Browse files
Files changed (1) hide show
  1. flaring/split_data.py +62 -89
flaring/split_data.py CHANGED
@@ -1,82 +1,3 @@
1
- <<<<<<< HEAD
2
- import os
3
- import pandas as pd
4
- import shutil
5
- from datetime import datetime
6
-
7
-
8
- data_dir = "/mnt/data/ML-Ready-Data-No-Intensity-Cut/AIA-Data"
9
- flares_event_dir = "/mnt/data/ML-Ready-Data-No-Intensity-Cut/flares_event_dir"
10
- non_flares_event_dir = "/mnt/data/ML-Ready-Data-No-Intensity-Cut/non_flares_event_dir"
11
- flare_events_csv = "/mnt/data/flare_list/flare_events_2023-07-01_2023-08-15.csv"
12
-
13
- os.makedirs(flares_event_dir, exist_ok=True)
14
- os.makedirs(non_flares_event_dir, exist_ok=True)
15
-
16
- flare_event = pd.read_csv(flare_events_csv)
17
-
18
-
19
- flaring_eve_list = []
20
- for i, row in flare_event.iterrows():
21
- start_time = pd.to_datetime(row['event_starttime'])
22
- end_time = pd.to_datetime(row['event_endtime'])
23
- flaring_eve_list.append((start_time, end_time))
24
-
25
- data_list = os.listdir(data_dir)
26
-
27
- for file in data_list:
28
- try:
29
- aia_time = pd.to_datetime(file.split(".")[0])
30
- except ValueError:
31
- print(f"Skipping file {file}: Invalid timestamp format")
32
- continue
33
-
34
- # Check if the file's time falls within any flare event
35
- is_flaring = any(start <= aia_time <= end for start, end in flaring_eve_list)
36
- if is_flaring:
37
- src = os.path.join(data_dir, file)
38
- dst = os.path.join(flares_event_dir, file)
39
- shutil.copy(src, dst)
40
- print(f"Copied {file} to {flares_event_dir}")
41
- else:
42
- src = os.path.join(data_dir, file)
43
- dst = os.path.join(non_flares_event_dir, file)
44
- shutil.copy(src, dst)
45
- print(f"Copied {file} to {non_flares_event_dir}")
46
-
47
- train_range = (datetime(2023, 7, 1), datetime(2023, 7, 20))
48
- val_range = (datetime(2023, 7, 21), datetime(2023, 8, 5))
49
- test_range = (datetime(2023, 8, 6), datetime(2023, 8, 15))
50
-
51
- print(train_range[0],train_range[1])
52
- # Create train, val, test subdirectories under flaring and non-flaring
53
- for base_dir in [flares_event_dir, non_flares_event_dir]:
54
- os.makedirs(os.path.join(base_dir, "train"), exist_ok=True)
55
- os.makedirs(os.path.join(base_dir, "val"), exist_ok=True)
56
- os.makedirs(os.path.join(base_dir, "test"), exist_ok=True)
57
-
58
- # Get list of files in the current directory (flaring or non-flaring)
59
- file_list = os.listdir(base_dir)
60
-
61
-
62
- for file in file_list:
63
- try:
64
- aia_time = pd.to_datetime(file.split(".")[0])
65
- except ValueError:
66
- print(f"Skipping file {file} in {base_dir}: Invalid timestamp format")
67
- continue
68
-
69
- # Determine split based on date
70
- if train_range[0] <= aia_time <= train_range[1]:
71
- split_dir = "train"
72
- elif val_range[0] <= aia_time <= val_range[1]:
73
- split_dir = "val"
74
- elif test_range[0] <= aia_time <= test_range[1]:
75
- split_dir = "test"
76
- else:
77
- print(f"Skipping file {file} in {base_dir}: Outside date range")
78
- continue
79
- =======
80
  #
81
 
82
 
@@ -156,14 +77,66 @@ for base_dir in [flares_event_dir, non_flares_event_dir]:
156
  # print(f"Copied {file} to {dst}")
157
 
158
 
159
- >>>>>>> 22f4a17192a3a77fa4d4fe1ae3a2aa8c0bbdb539
160
 
161
- # Move file to appropriate split directory
162
- src = os.path.join(base_dir, file)
163
- dst = os.path.join(base_dir, split_dir, file)
164
- shutil.move(src, dst)
165
- <<<<<<< HEAD
166
- print(f"Moved {file} to {base_dir}/{split_dir}")
167
- =======
168
- print(f"Moved {file} to {base_dir}/{split_dir}")
169
- >>>>>>> 22f4a17192a3a77fa4d4fe1ae3a2aa8c0bbdb539
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  #
2
 
3
 
 
77
  # print(f"Copied {file} to {dst}")
78
 
79
 
 
80
 
81
+ import os
82
+ import pandas as pd
83
+ import shutil
84
+ from datetime import datetime
85
+
86
+ data_dir = "/mnt/data/ML-Ready/AIA-Data/"
87
+ flares_event_dir = "/mnt/data/ML-Ready/flares_event_dir/"
88
+ non_flares_event_dir = "/mnt/data/ML-Ready/non_flares_event_dir/"
89
+ flare_events_csv = "/mnt/data/flare_list/flare_events_2023-07-01_2023-08-15.csv"
90
+
91
+ # Create train, val, test subdirectories under flaring and non-flaring
92
+ for base_dir in [flares_event_dir, non_flares_event_dir]:
93
+ os.makedirs(os.path.join(base_dir, "train"), exist_ok=True)
94
+ os.makedirs(os.path.join(base_dir, "val"), exist_ok=True)
95
+ os.makedirs(os.path.join(base_dir, "test"), exist_ok=True)
96
+
97
+ # Load flare events
98
+ flare_event = pd.read_csv(flare_events_csv)
99
+ print(flare_event.head())
100
+
101
+ # Create list of flare event time ranges
102
+ flaring_eve_list = []
103
+ for i, row in flare_event.iterrows():
104
+ start_time = pd.to_datetime(row['event_starttime'])
105
+ end_time = pd.to_datetime(row['event_endtime'])
106
+ flaring_eve_list.append((start_time, end_time))
107
+
108
+ # Define date ranges for splits
109
+ train_range = (datetime(2023, 7, 1), datetime(2023, 7, 25))
110
+ val_range = (datetime(2023, 7, 27), datetime(2023, 7, 30))
111
+ test_range = (datetime(2023, 8, 1), datetime(2023, 8, 15))
112
+
113
+ # Get list of files in data_dir
114
+ data_list = os.listdir(data_dir)
115
+
116
+ for file in data_list:
117
+ try:
118
+ aia_time = pd.to_datetime(file.split(".")[0])
119
+ except ValueError:
120
+ print(f"Skipping file {file}: Invalid timestamp format")
121
+ continue
122
+
123
+ # Determine if the file is during a flare event
124
+ is_flaring = any(start <= aia_time <= end for start, end in flaring_eve_list)
125
+ base_dir = flares_event_dir if is_flaring else non_flares_event_dir
126
+
127
+ # Determine split based on date
128
+ if train_range[0] <= aia_time <= train_range[1]:
129
+ split_dir = "train"
130
+ elif val_range[0] <= aia_time <= val_range[1]:
131
+ split_dir = "val"
132
+ elif test_range[0] <= aia_time <= test_range[1]:
133
+ split_dir = "test"
134
+ else:
135
+ print(f"Skipping file {file}: Outside date range")
136
+ continue
137
+
138
+ # Copy file to appropriate directory
139
+ src = os.path.join(data_dir, file)
140
+ dst = os.path.join(base_dir, split_dir, file)
141
+ shutil.copy(src, dst)
142
+ print(f"Copied {file} to {base_dir}/{split_dir}")