peterdudfield commited on
Commit
cbe6208
·
1 Parent(s): 3f81be8

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .github/workflows/release.yml +17 -0
  2. .github/workflows/test.yml +22 -0
  3. configs.example/callbacks/default.yaml +30 -0
  4. configs.example/callbacks/none.yaml +0 -0
  5. configs.example/callbacks/wandb.yaml +26 -0
  6. configs.example/config.yaml +45 -0
  7. configs.example/datamodule/configuration/example_configuration.yaml +288 -0
  8. configs.example/datamodule/premade_batches.yaml +10 -0
  9. configs.example/datamodule/streamed_batches.yaml +20 -0
  10. configs.example/experiment/baseline.yaml +21 -0
  11. configs.example/experiment/conv3d_sat_nwp.yaml +23 -0
  12. configs.example/experiment/example_simple.yaml +27 -0
  13. configs.example/experiment/test.yaml +33 -0
  14. configs.example/hparams_search/conv3d_optuna.yaml +49 -0
  15. configs.example/hydra/default.yaml +14 -0
  16. configs.example/logger/csv.yaml +9 -0
  17. configs.example/logger/many_loggers.yaml +7 -0
  18. configs.example/logger/neptune.yaml +8 -0
  19. configs.example/logger/tensorboard.yaml +11 -0
  20. configs.example/logger/wandb.yaml +17 -0
  21. configs.example/model/baseline.yaml +4 -0
  22. configs.example/model/multimodal.yaml +115 -0
  23. configs.example/model/nwp_dwsrf_weighting.yaml +21 -0
  24. configs.example/model/test.yaml +4 -0
  25. configs.example/model/wind_multimodal.yaml +83 -0
  26. configs.example/readme.md +5 -0
  27. configs.example/trainer/all_params.yaml +48 -0
  28. configs.example/trainer/default.yaml +14 -0
  29. experiments/india/001_v1/india_pv_wind.md +69 -0
  30. experiments/india/002_wind_meteomatics/india_windnet_v2.md +46 -0
  31. experiments/india/003_wind_plevels/MAE.png +3 -0
  32. experiments/india/003_wind_plevels/MAEvstimesteps.png +3 -0
  33. experiments/india/003_wind_plevels/p10.png +3 -0
  34. experiments/india/003_wind_plevels/p50.png +3 -0
  35. experiments/india/003_wind_plevels/plevel.md +54 -0
  36. experiments/india/004_n_training_samples/log-plot.py +14 -0
  37. experiments/india/004_n_training_samples/mae_samples.png +0 -0
  38. experiments/india/004_n_training_samples/mae_step.png +3 -0
  39. experiments/india/004_n_training_samples/readme.md +48 -0
  40. experiments/india/005_extra_nwp_variables/mae_steps.png +3 -0
  41. experiments/india/005_extra_nwp_variables/mae_steps_grouped.png +3 -0
  42. experiments/india/005_extra_nwp_variables/readmd.md +55 -0
  43. experiments/india/006_da_only/bad.png +3 -0
  44. experiments/india/006_da_only/da_only.md +37 -0
  45. experiments/india/006_da_only/good.png +3 -0
  46. experiments/india/006_da_only/mae_steps.png +3 -0
  47. experiments/india/007_different_seeds/mae_all_steps.png +3 -0
  48. experiments/india/007_different_seeds/mae_steps.png +3 -0
  49. experiments/india/007_different_seeds/readme.md +33 -0
  50. experiments/india/008_coarse4/mae_step.png +3 -0
.github/workflows/release.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python Bump Version & release
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ paths-ignore:
8
+ - "configs.example/**" # ignores all files in configs.example
9
+ - "**/README.md" # ignores all README files
10
+ - "experiments/**" # ignores all files in experiments directory
11
+
12
+ jobs:
13
+ release:
14
+ uses: openclimatefix/.github/.github/workflows/python-release.yml@main
15
+ secrets:
16
+ token: ${{ secrets.PYPI_API_TOKEN }}
17
+ PAT_TOKEN: ${{ secrets.PAT_TOKEN }}
.github/workflows/test.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python package tests
2
+
3
+ on:
4
+ push:
5
+ pull_request:
6
+ types: [opened, reopened]
7
+ schedule:
8
+ - cron: "0 12 * * 1"
9
+ jobs:
10
+ call-run-python-tests:
11
+ uses: openclimatefix/.github/.github/workflows/python-test.yml@main
12
+ with:
13
+ # 0 means don't use pytest-xdist
14
+ pytest_numcpus: "4"
15
+ # pytest-cov looks at this folder
16
+ pytest_cov_dir: "pvnet"
17
+ # extra things to install
18
+ sudo_apt_install: "libgeos++-dev libproj-dev proj-data proj-bin"
19
+ # brew_install: "proj geos librttopo"
20
+ os_list: '["ubuntu-latest"]'
21
+ python-version: "['3.10', '3.11']"
22
+ extra_commands: "pip3 install -e '.[all]'"
configs.example/callbacks/default.yaml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ early_stopping:
2
+ _target_: pvnet.callbacks.MainEarlyStopping
3
+ # name of the logged metric which determines when model is improving
4
+ monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
5
+ mode: "min" # can be "max" or "min"
6
+ patience: 10 # how many epochs (or val check periods) of not improving until training stops
7
+ min_delta: 0 # minimum change in the monitored metric needed to qualify as an improvement
8
+
9
+ learning_rate_monitor:
10
+ _target_: lightning.pytorch.callbacks.LearningRateMonitor
11
+ logging_interval: "epoch"
12
+
13
+ model_summary:
14
+ _target_: lightning.pytorch.callbacks.ModelSummary
15
+ max_depth: 3
16
+
17
+ model_checkpoint:
18
+ _target_: lightning.pytorch.callbacks.ModelCheckpoint
19
+ # name of the logged metric which determines when model is improving
20
+ monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
21
+ mode: "min" # can be "max" or "min"
22
+ save_top_k: 1 # save k best models (determined by above metric)
23
+ save_last: True # additionaly always save model from last epoch
24
+ every_n_epochs: 1
25
+ verbose: False
26
+ filename: "epoch={epoch}-step={step}"
27
+ # The path to where the model checkpoints will be stored
28
+ dirpath: "PLACEHOLDER/${model_name}" #${..model_name}
29
+ auto_insert_metric_name: False
30
+ save_on_train_epoch_end: False
configs.example/callbacks/none.yaml ADDED
File without changes
configs.example/callbacks/wandb.yaml ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ defaults:
2
+ - default.yaml
3
+
4
+ watch_model:
5
+ _target_: src.callbacks.wandb_callbacks.WatchModel
6
+ log: "all"
7
+ log_freq: 100
8
+
9
+ upload_code_as_artifact:
10
+ _target_: src.callbacks.wandb_callbacks.UploadCodeAsArtifact
11
+ code_dir: ${work_dir}/src
12
+
13
+ upload_ckpts_as_artifact:
14
+ _target_: src.callbacks.wandb_callbacks.UploadCheckpointsAsArtifact
15
+ ckpt_dir: "checkpoints/"
16
+ upload_best_only: True
17
+
18
+ log_f1_precision_recall_heatmap:
19
+ _target_: src.callbacks.wandb_callbacks.LogF1PrecRecHeatmap
20
+
21
+ log_confusion_matrix:
22
+ _target_: src.callbacks.wandb_callbacks.LogConfusionMatrix
23
+
24
+ log_image_predictions:
25
+ _target_: src.callbacks.wandb_callbacks.LogImagePredictions
26
+ num_samples: 8
configs.example/config.yaml ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # specify here default training configuration
4
+ defaults:
5
+ - _self_
6
+ - trainer: default.yaml
7
+ - model: multimodal.yaml
8
+ - datamodule: premade_samples.yaml
9
+ - callbacks: default.yaml # set this to null if you don't want to use callbacks
10
+ - logger: wandb.yaml # set logger here or use command line (e.g. `python run.py logger=wandb`)
11
+ - experiment: null
12
+ - hparams_search: null
13
+ - hydra: default.yaml
14
+
15
+ renewable: "pv_uk"
16
+
17
+ # enable color logging
18
+ # - override hydra/hydra_logging: colorlog
19
+ # - override hydra/job_logging: colorlog
20
+
21
+ # path to original working directory
22
+ # hydra hijacks working directory by changing it to the current log directory,
23
+ # so it's useful to have this path as a special variable
24
+ # learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
25
+ work_dir: ${hydra:runtime.cwd}
26
+
27
+ model_name: "default"
28
+
29
+ # use `python run.py debug=true` for easy debugging!
30
+ # this will run 1 train, val and test loop with only 1 batch
31
+ # equivalent to running `python run.py trainer.fast_dev_run=true`
32
+ # (this is placed here just for easier access from command line)
33
+ debug: False
34
+
35
+ # pretty print config at the start of the run using Rich library
36
+ print_config: True
37
+
38
+ # disable python warnings if they annoy you
39
+ ignore_warnings: True
40
+
41
+ # check performance on test set, using the best model achieved during training
42
+ # lightning chooses best model based on metric specified in checkpoint callback
43
+ test_after_training: False
44
+
45
+ seed: 2727831
configs.example/datamodule/configuration/example_configuration.yaml ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ general:
2
+ description: Example config for producing PVNet samples
3
+ name: example_config
4
+
5
+ input_data:
6
+
7
+ # Either use Site OR GSP configuration
8
+ site:
9
+ # Path to Site data in NetCDF format
10
+ file_path: PLACEHOLDER.nc
11
+ # Path to metadata in CSV format
12
+ metadata_file_path: PLACEHOLDER.csv
13
+ time_resolution_minutes: 15
14
+ interval_start_minutes: -60
15
+ # Specified for intraday currently
16
+ interval_end_minutes: 480
17
+ dropout_timedeltas_minutes: []
18
+ dropout_fraction: 0 # Fraction of samples with dropout
19
+
20
+ gsp:
21
+ # Path to GSP data in zarr format
22
+ # e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
23
+ zarr_path: PLACEHOLDER.zarr
24
+ interval_start_minutes: -60
25
+ # Specified for intraday currently
26
+ interval_end_minutes: 480
27
+ time_resolution_minutes: 30
28
+ # Random value from the list below will be chosen as the delay when dropout is used
29
+ # If set to null no dropout is applied. Only values before t0 are dropped out for GSP.
30
+ # Values after t0 are assumed as targets and cannot be dropped.
31
+ dropout_timedeltas_minutes: []
32
+ dropout_fraction: 0 # Fraction of samples with dropout
33
+
34
+ nwp:
35
+
36
+ ecmwf:
37
+ provider: ecmwf
38
+ # Path to ECMWF NWP data in zarr format
39
+ # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
40
+ zarr_path: PLACEHOLDER.zarr
41
+ interval_start_minutes: -60
42
+ # Specified for intraday currently
43
+ interval_end_minutes: 480
44
+ time_resolution_minutes: 60
45
+ channels:
46
+ - t2m # 2-metre temperature
47
+ - dswrf # downwards short-wave radiation flux
48
+ - dlwrf # downwards long-wave radiation flux
49
+ - hcc # high cloud cover
50
+ - mcc # medium cloud cover
51
+ - lcc # low cloud cover
52
+ - tcc # total cloud cover
53
+ - sde # snow depth water equivalent
54
+ - sr # direct solar radiation
55
+ - duvrs # downwards UV radiation at surface
56
+ - prate # precipitation rate
57
+ - u10 # 10-metre U component of wind speed
58
+ - u100 # 100-metre U component of wind speed
59
+ - u200 # 200-metre U component of wind speed
60
+ - v10 # 10-metre V component of wind speed
61
+ - v100 # 100-metre V component of wind speed
62
+ - v200 # 200-metre V component of wind speed
63
+ # The following channels are accumulated and need to be diffed
64
+ accum_channels:
65
+ - dswrf # downwards short-wave radiation flux
66
+ - dlwrf # downwards long-wave radiation flux
67
+ - sr # direct solar radiation
68
+ - duvrs # downwards UV radiation at surface
69
+ image_size_pixels_height: 24
70
+ image_size_pixels_width: 24
71
+ dropout_timedeltas_minutes: [-360]
72
+ dropout_fraction: 1.0 # Fraction of samples with dropout
73
+ max_staleness_minutes: null
74
+ normalisation_constants:
75
+ t2m:
76
+ mean: 283.48333740234375
77
+ std: 3.692270040512085
78
+ dswrf:
79
+ mean: 11458988.0
80
+ std: 13025427.0
81
+ dlwrf:
82
+ mean: 27187026.0
83
+ std: 15855867.0
84
+ hcc:
85
+ mean: 0.3961029052734375
86
+ std: 0.42244860529899597
87
+ mcc:
88
+ mean: 0.3288780450820923
89
+ std: 0.38039860129356384
90
+ lcc:
91
+ mean: 0.44901806116104126
92
+ std: 0.3791404366493225
93
+ tcc:
94
+ mean: 0.7049227356910706
95
+ std: 0.37487083673477173
96
+ sde:
97
+ mean: 8.107526082312688e-05
98
+ std: 0.000913831521756947 # Mapped from "sd" in the Python file
99
+ sr:
100
+ mean: 12905302.0
101
+ std: 16294988.0
102
+ duvrs:
103
+ mean: 1305651.25
104
+ std: 1445635.25
105
+ prate:
106
+ mean: 3.108070450252853e-05
107
+ std: 9.81039775069803e-05
108
+ u10:
109
+ mean: 1.7677178382873535
110
+ std: 5.531515598297119
111
+ u100:
112
+ mean: 2.393547296524048
113
+ std: 7.2320556640625
114
+ u200:
115
+ mean: 2.7963004112243652
116
+ std: 8.049470901489258
117
+ v10:
118
+ mean: 0.985887885093689
119
+ std: 5.411230564117432
120
+ v100:
121
+ mean: 1.4244288206100464
122
+ std: 6.944501876831055
123
+ v200:
124
+ mean: 1.6010299921035767
125
+ std: 7.561611652374268
126
+ # Added diff_ keys for the channels under accum_channels:
127
+ diff_dlwrf:
128
+ mean: 1136464.0
129
+ std: 131942.03125
130
+ diff_dswrf:
131
+ mean: 420584.6875
132
+ std: 715366.3125
133
+ diff_duvrs:
134
+ mean: 48265.4765625
135
+ std: 81605.25
136
+ diff_sr:
137
+ mean: 469169.5
138
+ std: 818950.6875
139
+
140
+ ukv:
141
+ provider: ukv
142
+ # Path to UKV NWP data in zarr format
143
+ # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
144
+ # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
145
+ zarr_path: PLACEHOLDER.zarr
146
+ interval_start_minutes: -60
147
+ # Specified for intraday currently
148
+ interval_end_minutes: 480
149
+ time_resolution_minutes: 60
150
+ channels:
151
+ - t # 2-metre temperature
152
+ - dswrf # downwards short-wave radiation flux
153
+ - dlwrf # downwards long-wave radiation flux
154
+ - hcc # high cloud cover
155
+ - mcc # medium cloud cover
156
+ - lcc # low cloud cover
157
+ - sde # snow depth water equivalent
158
+ - r # relative humidty
159
+ - vis # visibility
160
+ - si10 # 10-metre wind speed
161
+ - wdir10 # 10-metre wind direction
162
+ - prate # precipitation rate
163
+ # These variables exist in CEDA training data but not in the live MetOffice live service
164
+ - hcct # height of convective cloud top, meters above surface. NaN if no clouds
165
+ - cdcb # height of lowest cloud base > 3 oktas
166
+ - dpt # dew point temperature
167
+ - prmsl # mean sea level pressure
168
+ - h # geometrical? (maybe geopotential?) height
169
+ image_size_pixels_height: 24
170
+ image_size_pixels_width: 24
171
+ dropout_timedeltas_minutes: [-360]
172
+ dropout_fraction: 1.0 # Fraction of samples with dropout
173
+ max_staleness_minutes: null
174
+ normalisation_constants:
175
+ t:
176
+ mean: 283.64913206
177
+ std: 4.38818501
178
+ dswrf:
179
+ mean: 111.28265039
180
+ std: 190.47216887
181
+ dlwrf:
182
+ mean: 325.03130139
183
+ std: 39.45988077
184
+ hcc:
185
+ mean: 29.11949682
186
+ std: 38.07184418
187
+ mcc:
188
+ mean: 40.88984494
189
+ std: 41.91144559
190
+ lcc:
191
+ mean: 50.08362643
192
+ std: 39.33210726
193
+ sde:
194
+ mean: 0.00289545
195
+ std: 0.1029753
196
+ r:
197
+ mean: 81.79229501
198
+ std: 11.45012499
199
+ vis:
200
+ mean: 32262.03285118
201
+ std: 21578.97975625
202
+ si10:
203
+ mean: 6.88348448
204
+ std: 3.94718813
205
+ wdir10:
206
+ mean: 199.41891636
207
+ std: 94.08407495
208
+ prate:
209
+ mean: 3.45793433e-05
210
+ std: 0.00021497
211
+ hcct:
212
+ mean: -18345.97478167
213
+ std: 18382.63958991
214
+ cdcb:
215
+ mean: 1412.26599062
216
+ std: 2126.99350113
217
+ dpt:
218
+ mean: 280.54379901
219
+ std: 4.57250482
220
+ prmsl:
221
+ mean: 101321.61574029
222
+ std: 1252.71790539
223
+ h:
224
+ mean: 2096.51991356
225
+ std: 1075.77812282
226
+
227
+ satellite:
228
+ # Path to Satellite data (non-HRV) in zarr format
229
+ # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
230
+ zarr_path: PLACEHOLDER.zarr
231
+ interval_start_minutes: -30
232
+ interval_end_minutes: 0
233
+ time_resolution_minutes: 5
234
+ channels:
235
+ - IR_016 # Surface, cloud phase
236
+ - IR_039 # Surface, clouds, wind fields
237
+ - IR_087 # Surface, clouds, atmospheric instability
238
+ - IR_097 # Ozone
239
+ - IR_108 # Surface, clouds, wind fields, atmospheric instability
240
+ - IR_120 # Surface, clouds, atmospheric instability
241
+ - IR_134 # Cirrus cloud height, atmospheric instability
242
+ - VIS006 # Surface, clouds, wind fields
243
+ - VIS008 # Surface, clouds, wind fields
244
+ - WV_062 # Water vapor, high level clouds, upper air analysis
245
+ - WV_073 # Water vapor, atmospheric instability, upper-level dynamics
246
+ image_size_pixels_height: 24
247
+ image_size_pixels_width: 24
248
+ dropout_timedeltas_minutes: []
249
+ dropout_fraction: 0 # Fraction of samples with dropout
250
+ normalisation_constants:
251
+ IR_016:
252
+ mean: 0.17594202
253
+ std: 0.21462157
254
+ IR_039:
255
+ mean: 0.86167645
256
+ std: 0.04618041
257
+ IR_087:
258
+ mean: 0.7719318
259
+ std: 0.06687243
260
+ IR_097:
261
+ mean: 0.8014212
262
+ std: 0.0468558
263
+ IR_108:
264
+ mean: 0.71254843
265
+ std: 0.17482725
266
+ IR_120:
267
+ mean: 0.89058584
268
+ std: 0.06115861
269
+ IR_134:
270
+ mean: 0.944365
271
+ std: 0.04492306
272
+ VIS006:
273
+ mean: 0.09633306
274
+ std: 0.12184761
275
+ VIS008:
276
+ mean: 0.11426069
277
+ std: 0.13090034
278
+ WV_062:
279
+ mean: 0.7359355
280
+ std: 0.16111417
281
+ WV_073:
282
+ mean: 0.62479186
283
+ std: 0.12924142
284
+
285
+ solar_position:
286
+ interval_start_minutes: -60
287
+ interval_end_minutes: 480
288
+ time_resolution_minutes: 30
configs.example/datamodule/premade_batches.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: pvnet.data.DataModule
2
+ configuration: null
3
+
4
+ # The sample_dir is the location batches were saved to using the save_batches.py script
5
+ # The sample_dir should contain train and val subdirectories with batches
6
+
7
+ sample_dir: "PLACEHOLDER"
8
+ num_workers: 10
9
+ prefetch_factor: 2
10
+ batch_size: 8
configs.example/datamodule/streamed_batches.yaml ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: pvnet.data.DataModule
2
+ # Path to the data configuration yaml file. You can find examples in the configuration subdirectory
3
+ # in configs.example/datamodule/configuration
4
+ # Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml"
5
+
6
+ configuration: "PLACEHOLDER.yaml"
7
+ num_workers: 20
8
+ prefetch_factor: 2
9
+ batch_size: 8
10
+
11
+ sample_output_dir: "PLACEHOLDER"
12
+ num_train_samples: 2
13
+ num_val_samples: 1
14
+
15
+ train_period:
16
+ - null
17
+ - "2022-05-07"
18
+ val_period:
19
+ - "2022-05-08"
20
+ - "2023-05-08"
configs.example/experiment/baseline.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python run.py experiment=example_simple.yaml
5
+
6
+ defaults:
7
+ - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
8
+ - override /model: baseline.yaml
9
+ - override /datamodule: premade_samples.yaml
10
+ - override /callbacks: default.yaml
11
+ - override /logger: neptune.yaml
12
+
13
+ # all parameters below will be merged with parameters from default configurations set above
14
+ # this allows you to overwrite only specified parameters
15
+
16
+ seed: 518
17
+ validate_only: "1" # by putting this key in the config file, the model does not get trained.
18
+
19
+ trainer:
20
+ min_epochs: 1
21
+ max_epochs: 1
configs.example/experiment/conv3d_sat_nwp.yaml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python run.py experiment=example_simple.yaml
5
+
6
+ defaults:
7
+ - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
8
+ - override /model: conv3d_sat_nwp.yaml
9
+ - override /datamodule: premade_samples.yaml
10
+ - override /callbacks: default.yaml
11
+ # - override /logger: neptune.yaml
12
+
13
+ # all parameters below will be merged with parameters from default configurations set above
14
+ # this allows you to overwrite only specified parameters
15
+
16
+ seed: 518
17
+
18
+ trainer:
19
+ min_epochs: 1
20
+ max_epochs: 10
21
+
22
+ model:
23
+ conv3d_channels: 32
configs.example/experiment/example_simple.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python run.py experiment=example_simple.yaml
5
+
6
+ defaults:
7
+ - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
8
+ - override /model: conv3d_sat_nwp.yaml
9
+ - override /datamodule: premade_samples.yaml
10
+ - override /callbacks: default.yaml
11
+ - override /logger: tensorboard.yaml
12
+ - override /hparams_search: null
13
+ - override /hydra: default.yaml
14
+
15
+ # all parameters below will be merged with parameters from default configurations set above
16
+ # this allows you to overwrite only specified parameters
17
+
18
+ seed: 518
19
+
20
+ trainer:
21
+ min_epochs: 1
22
+ max_epochs: 2
23
+
24
+ datamodule:
25
+ batch_size: 16
26
+
27
+ validate_only: "1" # by putting this key in the config file, the model does not get trained.
configs.example/experiment/test.yaml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # to execute this experiment run:
4
+ # python run.py experiment=test.yaml
5
+
6
+ defaults:
7
+ - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
8
+ - override /model: test.yaml
9
+ - override /datamodule: premade_samples.yaml
10
+ - override /callbacks: default.yaml
11
+
12
+ # all parameters below will be merged with parameters from default configurations set above
13
+ # this allows you to overwrite only specified parameters
14
+
15
+ seed: 518
16
+
17
+ trainer:
18
+ min_epochs: 0
19
+ max_epochs: 2
20
+ reload_dataloaders_every_n_epochs: 0
21
+ limit_train_batches: 2000
22
+ limit_val_batches: 100
23
+ limit_test_batches: 100
24
+ val_check_interval: 100
25
+ num_sanity_val_steps: 8
26
+ accumulate_grad_batches: 4
27
+ #fast_dev_run: 3
28
+
29
+ datamodule:
30
+ num_workers: 10
31
+ prefetch_factor: 2
32
+ batch_size: 8
33
+ #validate_only: '1' # by putting this key in the config file, the model does not get trained.
configs.example/hparams_search/conv3d_optuna.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # @package _global_
2
+
3
+ # example hyperparameter optimization of some experiment with Optuna:
4
+ # python run.py -m hparams_search=conv3d_optuna experiment=conv3d_sat_nwp
5
+
6
+ defaults:
7
+ - override /hydra/sweeper: optuna
8
+
9
+ # choose metric which will be optimized by Optuna
10
+ optimized_metric: "MSE/Validation_epoch"
11
+
12
+ hydra:
13
+ # here we define Optuna hyperparameter search
14
+ # it optimizes for value returned from function with @hydra.main decorator
15
+ # learn more here: https://hydra.cc/docs/next/plugins/optuna_sweeper
16
+ sweeper:
17
+ _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
18
+ storage: null
19
+ study_name: null
20
+ n_jobs: 1
21
+
22
+ # 'minimize' or 'maximize' the objective
23
+ direction: minimize
24
+
25
+ # number of experiments that will be executed
26
+ n_trials: 20
27
+
28
+ # choose Optuna hyperparameter sampler
29
+ # learn more here: https://optuna.readthedocs.io/en/stable/reference/samplers.html
30
+ sampler:
31
+ _target_: optuna.samplers.TPESampler
32
+ seed: 12345
33
+ consider_prior: true
34
+ prior_weight: 1.0
35
+ consider_magic_clip: true
36
+ consider_endpoints: false
37
+ n_startup_trials: 10
38
+ n_ei_candidates: 24
39
+ multivariate: false
40
+ warn_independent_sampling: true
41
+
42
+ # define range of hyperparameters
43
+ search_space:
44
+ model.include_pv_yield_history:
45
+ type: categorical
46
+ choices: [true, false]
47
+ model.include_future_satellite:
48
+ type: categorical
49
+ choices: [true, false]
configs.example/hydra/default.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # output paths for hydra logs
2
+ run:
3
+ # Local log directory for hydra
4
+ dir: PLACEHOLDER/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
5
+ sweep:
6
+ # Local log directory for hydra
7
+ dir: PLACEHOLDER/multiruns/${now:%Y-%m-%d_%H-%M-%S}
8
+ subdir: ${hydra.job.num}
9
+
10
+ # you can set here environment variables that are universal for all users
11
+ # for system specific variables (like data paths) it's better to use .env file!
12
+ job:
13
+ env_set:
14
+ EXAMPLE_VAR: "example_value"
configs.example/logger/csv.yaml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # csv logger built in lightning
2
+
3
+ csv:
4
+ _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
5
+ # local path to log training process
6
+ save_dir: "PLACEHOLDER"
7
+ name: "csv/"
8
+ version: null
9
+ prefix: ""
configs.example/logger/many_loggers.yaml ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # train with many loggers at once
2
+
3
+ defaults:
4
+ - csv.yaml
5
+ # - neptune.yaml
6
+ # - tensorboard.yaml
7
+ - wandb.yaml
configs.example/logger/neptune.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # https://neptune.ai
2
+
3
+ neptune:
4
+ _target_: pytorch_lightning.loggers.NeptuneLogger
5
+ api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
6
+ # Neptune project placeholder
7
+ project: PLACEHOLDER
8
+ prefix: ""
configs.example/logger/tensorboard.yaml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://www.tensorflow.org/tensorboard/
2
+
3
+ tensorboard:
4
+ _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
5
+ # Path to use for tensorboard logs
6
+ save_dir: "PLACEHOLDER"
7
+ name: "default"
8
+ version: "${model_name}"
9
+ log_graph: False
10
+ default_hp_metric: False
11
+ prefix: ""
configs.example/logger/wandb.yaml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://wandb.ai
2
+
3
+ wandb:
4
+ _target_: lightning.pytorch.loggers.wandb.WandbLogger
5
+ # wandb project to log to
6
+ project: "PLACEHOLDER"
7
+ name: "${model_name}"
8
+ # location to store the wandb local logs
9
+ save_dir: "PLACEHOLDER"
10
+ offline: False # set True to store all logs only locally
11
+ id: null # pass correct id to resume experiment!
12
+ # entity: "" # set to name of your wandb team or just remove it
13
+ log_model: False
14
+ prefix: ""
15
+ job_type: "train"
16
+ group: ""
17
+ tags: []
configs.example/model/baseline.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ _target_: pvnet.models.baseline.last_value.Model
2
+
3
+ forecast_minutes: 120
4
+ history_minutes: 30
configs.example/model/multimodal.yaml ADDED
@@ -0,0 +1,115 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: pvnet.models.multimodal.multimodal.Model
2
+
3
+ output_quantiles: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
4
+
5
+ #--------------------------------------------
6
+ # NWP encoder
7
+ #--------------------------------------------
8
+
9
+ nwp_encoders_dict:
10
+ ukv:
11
+ _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
12
+ _partial_: True
13
+ in_channels: 2
14
+ out_features: 256
15
+ number_of_conv3d_layers: 6
16
+ conv3d_channels: 32
17
+ image_size_pixels: 24
18
+ ecmwf:
19
+ _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
20
+ _partial_: True
21
+ in_channels: 12
22
+ out_features: 256
23
+ number_of_conv3d_layers: 4
24
+ conv3d_channels: 32
25
+ image_size_pixels: 12
26
+
27
+ #--------------------------------------------
28
+ # Sat encoder settings
29
+ #--------------------------------------------
30
+
31
+ sat_encoder:
32
+ _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
33
+ _partial_: True
34
+ in_channels: 11
35
+ out_features: 256
36
+ number_of_conv3d_layers: 6
37
+ conv3d_channels: 32
38
+ image_size_pixels: 24
39
+
40
+ add_image_embedding_channel: False
41
+
42
+ #--------------------------------------------
43
+ # PV encoder settings
44
+ #--------------------------------------------
45
+
46
+ pv_encoder:
47
+ _target_: pvnet.models.multimodal.site_encoders.encoders.SingleAttentionNetwork
48
+ _partial_: True
49
+ num_sites: 349
50
+ out_features: 40
51
+ num_heads: 4
52
+ kdim: 40
53
+ id_embed_dim: 20
54
+
55
+ #--------------------------------------------
56
+ # Tabular network settings
57
+ #--------------------------------------------
58
+
59
+ output_network:
60
+ _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
61
+ _partial_: True
62
+ fc_hidden_features: 128
63
+ n_res_blocks: 6
64
+ res_block_layers: 2
65
+ dropout_frac: 0.0
66
+
67
+ embedding_dim: 16
68
+ include_sun: True
69
+ include_gsp_yield_history: False
70
+ include_site_yield_history: False
71
+
72
+ # The mapping between the location IDs and their embedding indices
73
+ location_id_mapping:
74
+ 1: 1
75
+ 5: 2
76
+ 110: 3
77
+ # ...
78
+
79
+ #--------------------------------------------
80
+ # Times
81
+ #--------------------------------------------
82
+
83
+ # Foreast and time settings
84
+ forecast_minutes: 480
85
+ history_minutes: 120
86
+
87
+ min_sat_delay_minutes: 60
88
+
89
+ # These must also be set even if identical to forecast_minutes and history_minutes
90
+ sat_history_minutes: 90
91
+ pv_history_minutes: 180
92
+
93
+ # These must be set for each NWP encoder
94
+ nwp_history_minutes:
95
+ ukv: 120
96
+ ecmwf: 120
97
+ nwp_forecast_minutes:
98
+ ukv: 480
99
+ ecmwf: 480
100
+ # Optional; defaults to 60, so must be set for data with different time resolution
101
+ nwp_interval_minutes:
102
+ ukv: 60
103
+ ecmwf: 60
104
+
105
+ # ----------------------------------------------
106
+ # Optimizer
107
+ # ----------------------------------------------
108
+ optimizer:
109
+ _target_: pvnet.optimizers.EmbAdamWReduceLROnPlateau
110
+ lr: 0.0001
111
+ weight_decay: 0.01
112
+ amsgrad: True
113
+ patience: 5
114
+ factor: 0.1
115
+ threshold: 0.002
configs.example/model/nwp_dwsrf_weighting.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: pvnet.models.multimodal.nwp_weighting.Model
2
+
3
+ #--------------------------------------------
4
+ # Network settings
5
+ #--------------------------------------------
6
+
7
+ # Foreast and time settings
8
+ forecast_minutes: 480
9
+ history_minutes: 120
10
+
11
+ nwp_history_minutes: 120
12
+ nwp_forecast_minutes: 480
13
+
14
+ nwp_image_size_pixels: 24
15
+ dwsrf_channel: 1
16
+
17
+ # ----------------------------------------------
18
+
19
+ optimizer:
20
+ _target_: pvnet.optimizers.AdamW
21
+ lr: 0.0005
configs.example/model/test.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ _target_: pvnet.models.baseline.single_value.Model
2
+
3
+ history_minutes: 120
4
+ forecast_minutes: 360
configs.example/model/wind_multimodal.yaml ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: pvnet.models.multimodal.multimodal.Model
2
+
3
+ output_quantiles: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
4
+
5
+ #--------------------------------------------
6
+ # NWP encoder
7
+ #--------------------------------------------
8
+ nwp_encoders_dict:
9
+ ecmwf:
10
+ _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
11
+ _partial_: True
12
+ in_channels: 14
13
+ out_features: 256
14
+ number_of_conv3d_layers: 6
15
+ conv3d_channels: 32
16
+ image_size_pixels: 16
17
+
18
+ #--------------------------------------------
19
+ # Sensor encoder settings
20
+ #--------------------------------------------
21
+
22
+ wind_encoder:
23
+ _target_: pvnet.models.multimodal.site_encoders.encoders.SingleAttentionNetwork
24
+ _partial_: True
25
+ num_sites: 19
26
+ out_features: 40
27
+ num_heads: 4
28
+ kdim: 40
29
+ id_embed_dim: 20
30
+
31
+ #--------------------------------------------
32
+ # Tabular network settings
33
+ #--------------------------------------------
34
+
35
+ output_network:
36
+ _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
37
+ _partial_: True
38
+ fc_hidden_features: 128
39
+ n_res_blocks: 6
40
+ res_block_layers: 2
41
+ dropout_frac: 0.0
42
+
43
+ embedding_dim: 16
44
+ include_sun: False
45
+ include_gsp_yield_history: False
46
+
47
+ # The mapping between the location IDs and their embedding indices
48
+ location_id_mapping:
49
+ 1: 1
50
+ 5: 2
51
+ 110: 3
52
+ # ...
53
+
54
+ #--------------------------------------------
55
+ # Times
56
+ #--------------------------------------------
57
+
58
+ # Foreast and time settings
59
+ forecast_minutes: 480
60
+ history_minutes: 120
61
+
62
+ min_sat_delay_minutes: 60
63
+
64
+ # --- set to null if same as history_minutes ---
65
+ sat_history_minutes: 90
66
+ nwp_history_minutes: 60
67
+ nwp_forecast_minutes: 2880
68
+ pv_history_minutes: 180
69
+ pv_interval_minutes: 15
70
+ sat_interval_minutes: 15
71
+
72
+ target_key: "sensor"
73
+ # ----------------------------------------------
74
+ # Optimizer
75
+ # ----------------------------------------------
76
+ optimizer:
77
+ _target_: pvnet.optimizers.EmbAdamWReduceLROnPlateau
78
+ lr: 0.0001
79
+ weight_decay: 0.01
80
+ amsgrad: True
81
+ patience: 5
82
+ factor: 0.1
83
+ threshold: 0.002
configs.example/readme.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ This directory contains example configuration files for the PVNet project. Many paths will need to unique to each user. You can find these paths by searching for PLACEHOLDER within these logs. Not all of
2
+ the values with a placeholder need to be set. For example in the logger subdirectory there are many different loggers with PLACEHOLDERS. If only one logger is used, then only that placeholder needs to be set.
3
+
4
+ run experiments by:
5
+ `python run.py experiment=example_simple `
configs.example/trainer/all_params.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: pytorch_lightning.Trainer
2
+
3
+ # default values for all trainer parameters
4
+ checkpoint_callback: True
5
+ default_root_dir: null
6
+ gradient_clip_val: 0.0
7
+ process_position: 0
8
+ num_nodes: 1
9
+ num_processes: 1
10
+ gpus: null
11
+ auto_select_gpus: False
12
+ tpu_cores: null
13
+ log_gpu_memory: null
14
+ overfit_batches: 0.0
15
+ track_grad_norm: -1
16
+ check_val_every_n_epoch: 1
17
+ fast_dev_run: False
18
+ accumulate_grad_batches: 1
19
+ max_epochs: 1
20
+ min_epochs: 1
21
+ max_steps: null
22
+ min_steps: null
23
+ limit_train_batches: 1.0
24
+ limit_val_batches: 1.0
25
+ limit_test_batches: 1.0
26
+ val_check_interval: 1.0
27
+ flush_logs_every_n_steps: 100
28
+ log_every_n_steps: 50
29
+ accelerator: null
30
+ sync_batchnorm: False
31
+ precision: 32
32
+ weights_save_path: null
33
+ num_sanity_val_steps: 2
34
+ truncated_bptt_steps: null
35
+ resume_from_checkpoint: null
36
+ profiler: null
37
+ benchmark: False
38
+ deterministic: False
39
+ reload_dataloaders_every_epoch: False
40
+ auto_lr_find: False
41
+ replace_sampler_ddp: True
42
+ terminate_on_nan: False
43
+ auto_scale_batch_size: False
44
+ prepare_data_per_node: True
45
+ plugins: null
46
+ amp_backend: "native"
47
+ amp_level: "O2"
48
+ move_metrics_to_cpu: False
configs.example/trainer/default.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _target_: lightning.pytorch.trainer.trainer.Trainer
2
+
3
+ # set `1` to train on GPU, `0` to train on CPU only
4
+ accelerator: auto
5
+ devices: auto
6
+
7
+ min_epochs: null
8
+ max_epochs: null
9
+ reload_dataloaders_every_n_epochs: 0
10
+ num_sanity_val_steps: 8
11
+ fast_dev_run: false
12
+
13
+ accumulate_grad_batches: 4
14
+ log_every_n_steps: 50
experiments/india/001_v1/india_pv_wind.md ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PVNet for Wind and PV Sites in India
2
+
3
+ ## PVNet for sites
4
+
5
+ ### Data
6
+
7
+ We use PV generation data for India from April 2019-Nov 2022 for training
8
+ and Dec 2022- Nov 2023 for validation. This is only with ECMWF data, and PV generation history.
9
+
10
+ The forecast is every 15 minutes for 48 hours for PV generation.
11
+
12
+ The input NWP data is hourly, and 32x32 pixels (corresponding to around 320kmx320km) around a central
13
+ point in NW-India.
14
+
15
+ [WandB Link](https://wandb.ai/openclimatefix/pvnet_india2.1/runs/o4xpvzrc)
16
+
17
+ ### Results
18
+
19
+ Overall MAE is 4.9% on the validation set, and forecasts look overall good.
20
+
21
+ ![batch_idx_1_all_892_2ca7e12db5de2cf2e244](https://github.com/openclimatefix/PVNet/assets/7170359/07e8199a-11b5-4400-9897-37b7738a4f39)
22
+
23
+ ![W B Chart 05_02_2024, 10_07_12_pvnet](https://github.com/openclimatefix/PVNet/assets/7170359/abaefdc1-dedd-4a12-8a26-afaf36d7786b)
24
+
25
+ ## WindNet
26
+
27
+
28
+ ### April-29-2024 WindNet v1 Production Model
29
+
30
+ [WandB Link](https://wandb.ai/openclimatefix/india/runs/5llq8iw6)
31
+
32
+ Improvements: Larger input size (64x64), 7 hour delay for ECMWF NWP inputs, to match productions.
33
+ New, much more efficient encoder for NWP, allowing for more filters and layers, with less parameters.
34
+ The 64x64 input size corresponds to 6.4 degrees x 6.4 degrees, which is around 700km x 700km. This allows for the
35
+ model to see the wind over the wind generation sites, which seems to be the biggest reason for the improvement in the model.
36
+
37
+
38
+
39
+ MAE is 7.6% with real improvements on the production side of things.
40
+
41
+
42
+ There were other experiments with slightly different numbers of filters, model parameters and the like, but generally no
43
+ improvements were seen.
44
+
45
+
46
+ ## WindNet v1 Results
47
+
48
+ ### Data
49
+
50
+ We use Wind generation data for India from April 2019-Nov 2022 for training
51
+ and Dec 2022- Nov 2023 for validation. This is only with ECMWF data, and Wind generation history.
52
+
53
+ The forecast is every 15 minutes for 48 hours for Wind generation.
54
+
55
+ The input NWP data is hourly, and 32x32 pixels (corresponding to around 320kmx320km) around a central
56
+ point in NW-India. Note: The majority of the wind generation is likely not covered in the 320kmx320km area.
57
+
58
+
59
+ [WandB Link](https://wandb.ai/openclimatefix/pvnet_india2.1/runs/otdx7axx)
60
+
61
+ ### Results
62
+
63
+ ![W B Chart 05_02_2024, 10_05_19](https://github.com/openclimatefix/PVNet/assets/7170359/6a8cd9c5-bdfe-41ab-996d-37fd1be2a07c)
64
+
65
+ ![W B Chart 05_02_2024, 10_06_51_windnet](https://github.com/openclimatefix/PVNet/assets/7170359/77554ef0-4411-4432-af95-8530aef4a701)
66
+
67
+ ![batch_idx_1_all_1730_379a9f881a7f01153f98](https://github.com/openclimatefix/PVNet/assets/7170359/243d9f3e-4cb9-405e-80c5-40c6c218c17f)
68
+
69
+ MAE is around 10% overall, although it doesn't seem to do very well on the ramps up and down.
experiments/india/002_wind_meteomatics/india_windnet_v2.md ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### WindNet v2 Meteomatics + ECMWF Model
2
+
3
+ [WandB Linl](https://wandb.ai/openclimatefix/india/runs/v3mja33d)
4
+
5
+ This newest experiment uses Meteomatics data in addition to ECMWF data. The Meteomatics data is at specific locations corresponding
6
+ to the gneeration sites we know about. It is smartly downscaled ECMWF data, down to 15 minutes and at a few height levels we are
7
+ interested in, primarily 10m, 100m, and 200m. The Meteomatics data is a semi-reanalysis, with each block of 6 hours being from one forecast run.
8
+ For example, in one day, hours 00-06 are from the same, 00 forecast run, and hours 06-12 are from the 06 forecast run. This is important to note
9
+ as it is both not a real reanalysis, but we also can't have it exactly match the live data, as any forecast steps beyond 6 hours are thrown away.
10
+ This does mean that these results should be taken as a best case or better than best case scenario, as every 6 hour, observations from the future
11
+ are incorporated into the Meteomatics input data from the next NWP mode run.
12
+
13
+ For the purposes of WindNet, Meteomatics data is treated as Sensor data that goes into the future.
14
+ The model encodes the sensor information the same way as for the historical PV, Wind, and GSP generation, and has
15
+ a simple, single attention head to encode the information. This is then concatenated along with the rest of the data, like in
16
+ previous experiments.
17
+
18
+ This model also has an even larger input size of ECMWF data, 81x81 pixels, corresponding to around 810kmx810km.
19
+ ![Screenshot_20240430_082855](https://github.com/openclimatefix/PVNet/assets/7170359/6981a088-8664-474b-bfea-c94c777fc119)
20
+
21
+ MAE is 7.0% on the validation set, showing a slight improvement over the previous model.
22
+
23
+ Comperison with the production model:
24
+
25
+ | Timestep | Prod MAE % | No Meteomatics MAE % | Meteomatics MAE % |
26
+ | --- | --- | --- | --- |
27
+ | 0-0 minutes | 7.586 | 5.920 | 2.475 |
28
+ | 15-15 minutes | 8.021 | 5.809 | 2.968 |
29
+ | 30-45 minutes | 7.233 | 5.742 | 3.472 |
30
+ | 45-60 minutes | 7.187 | 5.698 | 3.804 |
31
+ | 60-120 minutes | 7.231 | 5.816 | 4.650 |
32
+ | 120-240 minutes | 7.287 | 6.080 | 6.028 |
33
+ | 240-360 minutes | 7.319 | 6.375 | 6.738 |
34
+ | 360-480 minutes | 7.285 | 6.638 | 6.964 |
35
+ | 480-720 minutes | 7.143 | 6.747 | 6.906 |
36
+ | 720-1440 minutes | 7.380 | 7.207 | 6.962 |
37
+ | 1440-2880 minutes | 7.904 | 7.507 | 7.507 |
38
+
39
+ ![mae_per_timestep](https://github.com/openclimatefix/PVNet/assets/7170359/e3c942e8-65c6-4b95-8c51-f25d43e7a082)
40
+
41
+
42
+
43
+
44
+ Example plot
45
+
46
+ ![Screenshot_20240430_082937](https://github.com/openclimatefix/PVNet/assets/7170359/88db342e-bf82-414e-8255-5ad4af659fb8)
experiments/india/003_wind_plevels/MAE.png ADDED

Git LFS Details

  • SHA256: b06d6f85c2ee708e9555969afd622353b950a744f604d6c31d3c32d9b1543c23
  • Pointer size: 131 Bytes
  • Size of remote file: 174 kB
experiments/india/003_wind_plevels/MAEvstimesteps.png ADDED

Git LFS Details

  • SHA256: 3646fe682b4d13b2e00d68cf6d19dec9d00e6c56cc4d3995c3903920b35b8707
  • Pointer size: 131 Bytes
  • Size of remote file: 219 kB
experiments/india/003_wind_plevels/p10.png ADDED

Git LFS Details

  • SHA256: cce6f27ce1bafc89e9b5cb75cc2dad7c1053bea931ea4f5dfa5a1ef404d1042b
  • Pointer size: 131 Bytes
  • Size of remote file: 150 kB
experiments/india/003_wind_plevels/p50.png ADDED

Git LFS Details

  • SHA256: ceae23a3f91f6bc56cf688bdbcaf5172f1a54736e412c5f0e80d8c056f7d9754
  • Pointer size: 131 Bytes
  • Size of remote file: 229 kB
experiments/india/003_wind_plevels/plevel.md ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Running WindNet for RUVNL for diferent Plevels
2
+
3
+ https://wandb.ai/openclimatefix/india/runs/5llq8iw6 is the current production one
4
+ This has 7 plevels and a small patch size.
5
+
6
+ ## Experiments
7
+
8
+ 1. Only used plevel 50 (orange)
9
+ https://wandb.ai/openclimatefix/india/runs/ziudzweq/
10
+
11
+ 2. Use plevels of [2, 10, 25, 50, 75, 90, 98]. This is what is already used. (green)
12
+ https://wandb.ai/openclimatefix/india/runs/xdlew7ib
13
+
14
+ 3. Use plevels of [1, 02, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80 (brown)
15
+ , 90, 98, 99]
16
+ https://wandb.ai/openclimatefix/india/runs/pcr2zsrc
17
+
18
+
19
+ ## Training
20
+
21
+ Each epoch took about ~4 hours, so the training runs took several days.
22
+
23
+ TODO add number of samples
24
+
25
+ ## Results
26
+
27
+ MAE results show that using the plevel of 50 only, gives better results
28
+ ![](Mae.png "Mae")
29
+
30
+ The p50 results are about the same
31
+ ![](p50.png "p50")
32
+
33
+ We can see that for p10 the results are not right, as they should converge to 0.1
34
+ ![](p10.png "p10")
35
+
36
+ Interestingly the more plevels you have the better the results are for before 4 hours
37
+ but the less plevels you have the better the results for >= 8 hours.
38
+
39
+ | Timestep | P50 only MAE % | 7 plevels MAE % | 15 plevel MAE % | 7 plevels small patch MAE % |
40
+ | --- | --- | --- | --- | --- |
41
+ | 0-0 minutes | 5.416 | 5.920 | 3.933 | 7.586 |
42
+ | 15-15 minutes | 5.458 | 5.809 | 4.003 | 8.021 |
43
+ | 30-45 minutes | 5.525 | 5.742 | 4.442 | 7.233 |
44
+ | 45-60 minutes | 5.595 | 5.698 | 4.772 | 7.187 |
45
+ | 60-120 minutes | 5.890 | 5.816 | 5.307 | 7.231 |
46
+ | 120-240 minutes | 6.423 | 6.080 | 6.275 | 7.287 |
47
+ | 240-360 minutes | 6.608 | 6.375 | 6.707 | 7.319 |
48
+ | 360-480 minutes | 6.728 | 6.638 | 6.904 | 7.285 |
49
+ | 480-720 minutes | 6.634 | 6.747 | 6.872 | 7.143 |
50
+ | 720-1440 minutes | 6.940 | 7.207 | 7.176 | 7.380 |
51
+ | 1440-2880 minutes | 7.446 | 7.507 | 7.735 | 7.904 |
52
+
53
+
54
+ ![](MAEvstimesteps.png "MAEvstimesteps")
experiments/india/004_n_training_samples/log-plot.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """ Small script to make MAE vs number of batches plot"""
2
+
3
+ import pandas as df
4
+ import plotly.graph_objects as go
5
+
6
+ data = [[100, 7.779], [300, 7.441], [1000, 7.181], [3000, 7.180], [6711, 7.151]]
7
+ df = df.DataFrame(data, columns=["n_samples", "MAE [%]"])
8
+
9
+ fig = go.Figure()
10
+ fig.add_trace(go.Scatter(x=df["n_samples"], y=df["MAE [%]"], mode="lines+markers"))
11
+ fig.update_layout(title="MAE % for N samples", xaxis_title="N Samples", yaxis_title="MAE %")
12
+ # change to log log
13
+ fig.update_xaxes(type="log")
14
+ fig.show(renderer="browser")
experiments/india/004_n_training_samples/mae_samples.png ADDED
experiments/india/004_n_training_samples/mae_step.png ADDED

Git LFS Details

  • SHA256: 3a3180a382e4b2c1534524f92a633d488912475a1e8a4effb0b28caf44368834
  • Pointer size: 131 Bytes
  • Size of remote file: 325 kB
experiments/india/004_n_training_samples/readme.md ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # N samples experiments
2
+
3
+ Kicked off an experiment that uses N samples
4
+ This is done by adding `limit_train_batches` to the `trainer/default.yaml`.
5
+
6
+ I checked that when limiting the batches, the same batches are shown to model for each epoch.
7
+
8
+ ## Experiments
9
+
10
+ Original is 6711 batches
11
+
12
+ - 100: 3p6scx2r
13
+ - 300: am46tno1
14
+ - 1000: u04xlb6p
15
+ - 3000: p11lhreo
16
+
17
+ ## Results
18
+
19
+ Overall
20
+
21
+ | Experiment | MAE % |
22
+ |------------|-------|
23
+ | 100 | 7.779 |
24
+ | 300 | 7.441 |
25
+ | 1000 | 7.181 |
26
+ | 3000 | 7.180 |
27
+ | 6711 | 7.151 |
28
+
29
+ Results by timestamps
30
+
31
+
32
+ | Timestep | 100 MAE % | 300 MAE % | 1000 MAE % | 3000 MAE % | 6711 MAE % |
33
+ | --- | --- | --- | --- | --- | --- |
34
+ | 0-0 minutes | 7.985 | 7.453 | 7.155 | 5.553 | 5.920 |
35
+ | 15-15 minutes | 7.953 | 7.055 | 6.923 | 5.453 | 5.809 |
36
+ | 30-45 minutes | 8.043 | 7.172 | 6.907 | 5.764 | 5.742 |
37
+ | 45-60 minutes | 7.850 | 7.070 | 6.790 | 5.815 | 5.698 |
38
+ | 60-120 minutes | 7.698 | 6.809 | 6.597 | 5.890 | 5.816 |
39
+ | 120-240 minutes | 7.355 | 6.629 | 6.495 | 6.221 | 6.080 |
40
+ | 240-360 minutes | 7.230 | 6.729 | 6.559 | 6.541 | 6.375 |
41
+ | 360-480 minutes | 7.415 | 6.997 | 6.770 | 6.855 | 6.638 |
42
+ | 480-720 minutes | 7.258 | 7.037 | 6.668 | 6.876 | 6.747 |
43
+ | 720-1440 minutes | 7.659 | 7.362 | 7.038 | 7.142 | 7.207 |
44
+ | 1440-2880 minutes | 8.027 | 7.745 | 7.518 | 7.535 | 7.507 |
45
+
46
+ ![](mae_step.png "mae_steps")
47
+
48
+ ![](mae_samples.png "mae_samples")
experiments/india/005_extra_nwp_variables/mae_steps.png ADDED

Git LFS Details

  • SHA256: 0ef7f7af4dafe38aac5a5df6cc74acc606cb4f0a1a9fc78972b09d68dd7574ad
  • Pointer size: 131 Bytes
  • Size of remote file: 215 kB
experiments/india/005_extra_nwp_variables/mae_steps_grouped.png ADDED

Git LFS Details

  • SHA256: 547d3aafbb1658602fe03ea1677589de4e208467756e9ce9cd1d8727f364dffa
  • Pointer size: 131 Bytes
  • Size of remote file: 133 kB
experiments/india/005_extra_nwp_variables/readmd.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Adding extra nwp variables
2
+
3
+ I wanted to run Windnet but testing some new nwp variables from ecmwf
4
+
5
+ General conclusion, although more experiments could be done.
6
+ The current nwp variables are about right.
7
+ If you add lots it makes it worse.
8
+ If you take some away, it makes it worse.
9
+
10
+ ## Bugs
11
+
12
+ Ran into a problem where found that some xamples have
13
+ `d.__getitem__('nwp-ecmwf__init_time_utc').values` had size 50, where it should be just one values. I removed these examples. This might
14
+
15
+ ## Experiments
16
+
17
+ The number of samples were 8000 when training.
18
+
19
+ ### 15 variablles
20
+ Run windnet with `'hcc', 'lcc', 'mcc', 'prate', 'sde', 'sr', 't2m', 'tcc', 'u10',
21
+ 'v10', 'u100', 'v100', 'u200', 'v200', 'dlwrf', 'dswrf'`.
22
+
23
+ The experiment on wandb is [here](https://wandb.ai/openclimatefix/india/runs/k91rdffo)
24
+
25
+ ### 7 variables
26
+ Run windnet with the original 7 variables.
27
+ `t2m, u10, u100, u200, v10, v100, v200 `
28
+
29
+ The experiment on wandb is [here](https://wandb.ai/openclimatefix/india/runs/miszfep5)
30
+
31
+ ### 3 variables
32
+ Run windnet with only `t, u10, v100`
33
+
34
+ The experiment on wandb is [here](https://wandb.ai/openclimatefix/india/runs/22v3a39g)
35
+
36
+ ## Results
37
+
38
+ | Timestep | 15 MAE % | 7 MAE % | 3 MAE % |
39
+ | --- | --- | --- | --- |
40
+ | 0-0 minutes | 7.450 | 6.623 | 7.529 |
41
+ | 15-15 minutes | 7.348 | 6.441 | 7.408 |
42
+ | 30-45 minutes | 7.242 | 6.544 | 7.294 |
43
+ | 45-60 minutes | 7.134 | 6.567 | 7.185 |
44
+ | 60-120 minutes | 7.058 | 6.295 | 7.009 |
45
+ | 120-240 minutes | 6.965 | 6.290 | 6.800 |
46
+ | 240-360 minutes | 6.807 | 6.374 | 6.580 |
47
+ | 360-480 minutes | 6.749 | 6.482 | 6.548 |
48
+ | 480-720 minutes | 6.892 | 6.686 | 6.685 |
49
+ | 720-1440 minutes | 7.020 | 6.756 | 6.780 |
50
+ | 1440-2880 minutes | 7.445 | 7.095 | 7.214 |
51
+
52
+ ![](mae_steps_grouped.png "mae_steps")
53
+
54
+ The raw data is here
55
+ ![](mae_steps.png "mae_steps")
experiments/india/006_da_only/bad.png ADDED

Git LFS Details

  • SHA256: 37cbbf51e7fa7dceb8b2074419267b4bde8186ddcd40b4a49c085735fdf72e43
  • Pointer size: 131 Bytes
  • Size of remote file: 358 kB
experiments/india/006_da_only/da_only.md ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ## DA forecasts only
2
+
3
+ The idea was to create a forecast for DA (day-ahead) only for Windnet.
4
+ We hope this would bring down the DA MAE values.
5
+
6
+ We do this by not forecasting the first X hours.
7
+
8
+ Unfortunately, it doesnt not look like ignore X hours, make the DA forecast better.
9
+
10
+ ## Experiments
11
+
12
+ 1. Baseline - [here](https://wandb.ai/openclimatefix/india/runs/miszfep5)
13
+ 2. Ignore first 6 hours - [here](https://wandb.ai/openclimatefix/india/runs/uosk0qug)
14
+ 3. Ignore first 12 hours - [here](https://wandb.ai/openclimatefix/india/runs/s9cnn4ei)
15
+
16
+ ## Results
17
+
18
+ | Timestep | all MAE % | 6 MAE % | 12 MAE % |
19
+ | --- | --- |---------|---------|
20
+ | 0-0 minutes | nan | nan | nan |
21
+ | 15-15 minutes | nan | nan | nan |
22
+ | 30-45 minutes | 0.065 | nan | nan |
23
+ | 45-60 minutes | 0.066 | nan | nan |
24
+ | 60-120 minutes | 0.063 | nan | nan |
25
+ | 120-240 minutes | 0.063 | nan | nan |
26
+ | 240-360 minutes | 0.064 | nan | nan |
27
+ | 360-480 minutes | 0.065 | 0.068 | nan |
28
+ | 480-720 minutes | 0.067 | 0.065 | nan |
29
+ | 720-1440 minutes | 0.068 | 0.065 | 0.065 |
30
+ | 1440-2880 minutes | 0.071 | 0.071 | 0.071 |
31
+
32
+ ![](mae_steps.png "mae_steps")
33
+
34
+ Here's two examples from the 6 hour ignore model, one that forecated it well, one that didnt
35
+
36
+ ![](bad.png "bad")
37
+ ![](good.png "good")
experiments/india/006_da_only/good.png ADDED

Git LFS Details

  • SHA256: 5f4b6a11ac1560dbea1214ce381602b9eab7334a74110052dda072f0f53c3de8
  • Pointer size: 131 Bytes
  • Size of remote file: 424 kB
experiments/india/006_da_only/mae_steps.png ADDED

Git LFS Details

  • SHA256: 5ca49fbc24530c3d75d0ec5cd2ba6345082c1747a600143afc40faf7bade0cd6
  • Pointer size: 131 Bytes
  • Size of remote file: 122 kB
experiments/india/007_different_seeds/mae_all_steps.png ADDED

Git LFS Details

  • SHA256: b06eaa2f75d645185bea5b874d6020bae3bccd7de25ec519cf348cde511f27c6
  • Pointer size: 131 Bytes
  • Size of remote file: 203 kB
experiments/india/007_different_seeds/mae_steps.png ADDED

Git LFS Details

  • SHA256: 3adfaa5394e9f45c684812e47e385c25d1796a6c772d04f4e7a3cbcbeffafda3
  • Pointer size: 131 Bytes
  • Size of remote file: 130 kB
experiments/india/007_different_seeds/readme.md ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Training models with different seeds
2
+
3
+ Want to see the effect or training a model with different seeds.
4
+
5
+ We can see that the results for different seeds can vary by 0.5%,
6
+ and some models being better at different time horizons than others
7
+
8
+ ## Experiments
9
+ - seed 1 - [miszfep5](https://wandb.ai/openclimatefix/india/runs/miszfep5)
10
+ - seed 2 - [cxshv2q4](https://wandb.ai/openclimatefix/india/runs/cxshv2q4)
11
+ - seed 3 - [m46wdrr7](https://wandb.ai/openclimatefix/india/runs/m46wdrr7)
12
+
13
+ These were trained with 1000 batches, and 300 batches for validation
14
+
15
+ ## Results
16
+
17
+ | Timestep | s1 MAE % | s2 MAE % | s3 MAE % |
18
+ | --- | --- | --- | --- |
19
+ | 0-0 minutes | 0.066 | 0.061 | 0.066 |
20
+ | 15-15 minutes | 0.064 | 0.058 | 0.064 |
21
+ | 30-45 minutes | 0.065 | 0.060 | 0.063 |
22
+ | 45-60 minutes | 0.066 | 0.060 | 0.063 |
23
+ | 60-120 minutes | 0.063 | 0.060 | 0.063 |
24
+ | 120-240 minutes | 0.063 | 0.063 | 0.065 |
25
+ | 240-360 minutes | 0.064 | 0.066 | 0.065 |
26
+ | 360-480 minutes | 0.065 | 0.066 | 0.066 |
27
+ | 480-720 minutes | 0.067 | 0.066 | 0.065 |
28
+ | 720-1440 minutes | 0.068 | 0.068 | 0.066 |
29
+ | 1440-2880 minutes | 0.071 | 0.072 | 0.071 |
30
+
31
+ ![](mae_steps.png "mae_steps")
32
+
33
+ ![](mae_all_steps.png "mae_steps")
experiments/india/008_coarse4/mae_step.png ADDED

Git LFS Details

  • SHA256: 52e85df6c2ed7865e0f6f412ae47e7e5f0a1b12550b72702ebe7e166dec53636
  • Pointer size: 131 Bytes
  • Size of remote file: 179 kB