peterdudfield commited on Jun 11, 2025

Commit

cbe6208

1 Parent(s): 3f81be8

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.github/workflows/release.yml +17 -0
.github/workflows/test.yml +22 -0
configs.example/callbacks/default.yaml +30 -0
configs.example/callbacks/none.yaml +0 -0
configs.example/callbacks/wandb.yaml +26 -0
configs.example/config.yaml +45 -0
configs.example/datamodule/configuration/example_configuration.yaml +288 -0
configs.example/datamodule/premade_batches.yaml +10 -0
configs.example/datamodule/streamed_batches.yaml +20 -0
configs.example/experiment/baseline.yaml +21 -0
configs.example/experiment/conv3d_sat_nwp.yaml +23 -0
configs.example/experiment/example_simple.yaml +27 -0
configs.example/experiment/test.yaml +33 -0
configs.example/hparams_search/conv3d_optuna.yaml +49 -0
configs.example/hydra/default.yaml +14 -0
configs.example/logger/csv.yaml +9 -0
configs.example/logger/many_loggers.yaml +7 -0
configs.example/logger/neptune.yaml +8 -0
configs.example/logger/tensorboard.yaml +11 -0
configs.example/logger/wandb.yaml +17 -0
configs.example/model/baseline.yaml +4 -0
configs.example/model/multimodal.yaml +115 -0
configs.example/model/nwp_dwsrf_weighting.yaml +21 -0
configs.example/model/test.yaml +4 -0
configs.example/model/wind_multimodal.yaml +83 -0
configs.example/readme.md +5 -0
configs.example/trainer/all_params.yaml +48 -0
configs.example/trainer/default.yaml +14 -0
experiments/india/001_v1/india_pv_wind.md +69 -0
experiments/india/002_wind_meteomatics/india_windnet_v2.md +46 -0
experiments/india/003_wind_plevels/MAE.png +3 -0
experiments/india/003_wind_plevels/MAEvstimesteps.png +3 -0
experiments/india/003_wind_plevels/p10.png +3 -0
experiments/india/003_wind_plevels/p50.png +3 -0
experiments/india/003_wind_plevels/plevel.md +54 -0
experiments/india/004_n_training_samples/log-plot.py +14 -0
experiments/india/004_n_training_samples/mae_samples.png +0 -0
experiments/india/004_n_training_samples/mae_step.png +3 -0
experiments/india/004_n_training_samples/readme.md +48 -0
experiments/india/005_extra_nwp_variables/mae_steps.png +3 -0
experiments/india/005_extra_nwp_variables/mae_steps_grouped.png +3 -0
experiments/india/005_extra_nwp_variables/readmd.md +55 -0
experiments/india/006_da_only/bad.png +3 -0
experiments/india/006_da_only/da_only.md +37 -0
experiments/india/006_da_only/good.png +3 -0
experiments/india/006_da_only/mae_steps.png +3 -0
experiments/india/007_different_seeds/mae_all_steps.png +3 -0
experiments/india/007_different_seeds/mae_steps.png +3 -0
experiments/india/007_different_seeds/readme.md +33 -0
experiments/india/008_coarse4/mae_step.png +3 -0

.github/workflows/release.yml ADDED Viewed

	@@ -0,0 +1,17 @@

+name: Python Bump Version & release
+on:
+  push:
+    branches:
+      - main
+    paths-ignore:
+      - "configs.example/**" # ignores all files in configs.example
+      - "**/README.md" # ignores all README files
+      - "experiments/**" # ignores all files in experiments directory
+jobs:
+  release:
+    uses: openclimatefix/.github/.github/workflows/python-release.yml@main
+    secrets:
+      token: ${{ secrets.PYPI_API_TOKEN }}
+      PAT_TOKEN: ${{ secrets.PAT_TOKEN }}

.github/workflows/test.yml ADDED Viewed

	@@ -0,0 +1,22 @@

+name: Python package tests
+on:
+  push:
+  pull_request:
+    types: [opened, reopened]
+  schedule:
+    - cron: "0 12 * * 1"
+jobs:
+  call-run-python-tests:
+    uses: openclimatefix/.github/.github/workflows/python-test.yml@main
+    with:
+      # 0 means don't use pytest-xdist
+      pytest_numcpus: "4"
+      # pytest-cov looks at this folder
+      pytest_cov_dir: "pvnet"
+      # extra things to install
+      sudo_apt_install: "libgeos++-dev libproj-dev proj-data proj-bin"
+      #      brew_install: "proj geos librttopo"
+      os_list: '["ubuntu-latest"]'
+      python-version: "['3.10', '3.11']"
+      extra_commands: "pip3 install -e '.[all]'"

configs.example/callbacks/default.yaml ADDED Viewed

	@@ -0,0 +1,30 @@

+early_stopping:
+  _target_: pvnet.callbacks.MainEarlyStopping
+  # name of the logged metric which determines when model is improving
+  monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
+  mode: "min" # can be "max" or "min"
+  patience: 10 # how many epochs (or val check periods) of not improving until training stops
+  min_delta: 0 # minimum change in the monitored metric needed to qualify as an improvement
+learning_rate_monitor:
+  _target_: lightning.pytorch.callbacks.LearningRateMonitor
+  logging_interval: "epoch"
+model_summary:
+  _target_: lightning.pytorch.callbacks.ModelSummary
+  max_depth: 3
+model_checkpoint:
+  _target_: lightning.pytorch.callbacks.ModelCheckpoint
+  # name of the logged metric which determines when model is improving
+  monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
+  mode: "min" # can be "max" or "min"
+  save_top_k: 1 # save k best models (determined by above metric)
+  save_last: True # additionaly always save model from last epoch
+  every_n_epochs: 1
+  verbose: False
+  filename: "epoch={epoch}-step={step}"
+  # The path to where the model checkpoints will be stored
+  dirpath: "PLACEHOLDER/${model_name}" #${..model_name}
+  auto_insert_metric_name: False
+  save_on_train_epoch_end: False

configs.example/callbacks/none.yaml ADDED Viewed

File without changes

configs.example/callbacks/wandb.yaml ADDED Viewed

	@@ -0,0 +1,26 @@

+defaults:
+  - default.yaml
+watch_model:
+  _target_: src.callbacks.wandb_callbacks.WatchModel
+  log: "all"
+  log_freq: 100
+upload_code_as_artifact:
+  _target_: src.callbacks.wandb_callbacks.UploadCodeAsArtifact
+  code_dir: ${work_dir}/src
+upload_ckpts_as_artifact:
+  _target_: src.callbacks.wandb_callbacks.UploadCheckpointsAsArtifact
+  ckpt_dir: "checkpoints/"
+  upload_best_only: True
+log_f1_precision_recall_heatmap:
+  _target_: src.callbacks.wandb_callbacks.LogF1PrecRecHeatmap
+log_confusion_matrix:
+  _target_: src.callbacks.wandb_callbacks.LogConfusionMatrix
+log_image_predictions:
+  _target_: src.callbacks.wandb_callbacks.LogImagePredictions
+  num_samples: 8

configs.example/config.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+# @package _global_
+# specify here default training configuration
+defaults:
+  - _self_
+  - trainer: default.yaml
+  - model: multimodal.yaml
+  - datamodule: premade_samples.yaml
+  - callbacks: default.yaml # set this to null if you don't want to use callbacks
+  - logger: wandb.yaml # set logger here or use command line (e.g. `python run.py logger=wandb`)
+  - experiment: null
+  - hparams_search: null
+  - hydra: default.yaml
+renewable: "pv_uk"
+# enable color logging
+#  - override hydra/hydra_logging: colorlog
+#  - override hydra/job_logging: colorlog
+# path to original working directory
+# hydra hijacks working directory by changing it to the current log directory,
+# so it's useful to have this path as a special variable
+# learn more here: https://hydra.cc/docs/next/tutorials/basic/running_your_app/working_directory
+work_dir: ${hydra:runtime.cwd}
+model_name: "default"
+# use `python run.py debug=true` for easy debugging!
+# this will run 1 train, val and test loop with only 1 batch
+# equivalent to running `python run.py trainer.fast_dev_run=true`
+# (this is placed here just for easier access from command line)
+debug: False
+# pretty print config at the start of the run using Rich library
+print_config: True
+# disable python warnings if they annoy you
+ignore_warnings: True
+# check performance on test set, using the best model achieved during training
+# lightning chooses best model based on metric specified in checkpoint callback
+test_after_training: False
+seed: 2727831

configs.example/datamodule/configuration/example_configuration.yaml ADDED Viewed

	@@ -0,0 +1,288 @@

+general:
+  description: Example config for producing PVNet samples
+  name: example_config
+input_data:
+  # Either use Site OR GSP configuration
+  site:
+    # Path to Site data in NetCDF format
+    file_path: PLACEHOLDER.nc
+    # Path to metadata in CSV format
+    metadata_file_path: PLACEHOLDER.csv
+    time_resolution_minutes: 15
+    interval_start_minutes: -60
+    # Specified for intraday currently
+    interval_end_minutes: 480
+    dropout_timedeltas_minutes: []
+    dropout_fraction: 0 # Fraction of samples with dropout
+  gsp:
+    # Path to GSP data in zarr format
+    # e.g. gs://solar-pv-nowcasting-data/PV/GSP/v7/pv_gsp.zarr
+    zarr_path: PLACEHOLDER.zarr
+    interval_start_minutes: -60
+    # Specified for intraday currently
+    interval_end_minutes: 480
+    time_resolution_minutes: 30
+    # Random value from the list below will be chosen as the delay when dropout is used
+    # If set to null no dropout is applied. Only values before t0 are dropped out for GSP.
+    # Values after t0 are assumed as targets and cannot be dropped.
+    dropout_timedeltas_minutes: []
+    dropout_fraction: 0 # Fraction of samples with dropout
+  nwp:
+    ecmwf:
+      provider: ecmwf
+      # Path to ECMWF NWP data in zarr format
+      # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
+      zarr_path: PLACEHOLDER.zarr
+      interval_start_minutes: -60
+      # Specified for intraday currently
+      interval_end_minutes: 480
+      time_resolution_minutes: 60
+      channels:
+        - t2m # 2-metre temperature
+        - dswrf # downwards short-wave radiation flux
+        - dlwrf # downwards long-wave radiation flux
+        - hcc # high cloud cover
+        - mcc # medium cloud cover
+        - lcc # low cloud cover
+        - tcc # total cloud cover
+        - sde # snow depth water equivalent
+        - sr # direct solar radiation
+        - duvrs # downwards UV radiation at surface
+        - prate # precipitation rate
+        - u10 # 10-metre U component of wind speed
+        - u100 # 100-metre U component of wind speed
+        - u200 # 200-metre U component of wind speed
+        - v10 # 10-metre V component of wind speed
+        - v100 # 100-metre V component of wind speed
+        - v200 # 200-metre V component of wind speed
+      # The following channels are accumulated and need to be diffed
+      accum_channels:
+        - dswrf # downwards short-wave radiation flux
+        - dlwrf # downwards long-wave radiation flux
+        - sr # direct solar radiation
+        - duvrs # downwards UV radiation at surface
+      image_size_pixels_height: 24
+      image_size_pixels_width: 24
+      dropout_timedeltas_minutes: [-360]
+      dropout_fraction: 1.0 # Fraction of samples with dropout
+      max_staleness_minutes: null
+      normalisation_constants:
+        t2m:
+          mean: 283.48333740234375
+          std: 3.692270040512085
+        dswrf:
+          mean: 11458988.0
+          std: 13025427.0
+        dlwrf:
+          mean: 27187026.0
+          std: 15855867.0
+        hcc:
+          mean: 0.3961029052734375
+          std: 0.42244860529899597
+        mcc:
+          mean: 0.3288780450820923
+          std: 0.38039860129356384
+        lcc:
+          mean: 0.44901806116104126
+          std: 0.3791404366493225
+        tcc:
+          mean: 0.7049227356910706
+          std: 0.37487083673477173
+        sde:
+          mean: 8.107526082312688e-05
+          std: 0.000913831521756947  # Mapped from "sd" in the Python file
+        sr:
+          mean: 12905302.0
+          std: 16294988.0
+        duvrs:
+          mean: 1305651.25
+          std: 1445635.25
+        prate:
+          mean: 3.108070450252853e-05
+          std: 9.81039775069803e-05
+        u10:
+          mean: 1.7677178382873535
+          std: 5.531515598297119
+        u100:
+          mean: 2.393547296524048
+          std: 7.2320556640625
+        u200:
+          mean: 2.7963004112243652
+          std: 8.049470901489258
+        v10:
+          mean: 0.985887885093689
+          std: 5.411230564117432
+        v100:
+          mean: 1.4244288206100464
+          std: 6.944501876831055
+        v200:
+          mean: 1.6010299921035767
+          std: 7.561611652374268
+        # Added diff_ keys for the channels under accum_channels:
+        diff_dlwrf:
+          mean: 1136464.0
+          std: 131942.03125
+        diff_dswrf:
+          mean: 420584.6875
+          std: 715366.3125
+        diff_duvrs:
+          mean: 48265.4765625
+          std: 81605.25
+        diff_sr:
+          mean: 469169.5
+          std: 818950.6875
+    ukv:
+      provider: ukv
+      # Path to UKV NWP data in zarr format
+      # e.g. gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV_intermediate_version_7.zarr
+      # n.b. It is not necessary to use multiple or any NWP data. These entries can be removed
+      zarr_path: PLACEHOLDER.zarr
+      interval_start_minutes: -60
+      # Specified for intraday currently
+      interval_end_minutes: 480
+      time_resolution_minutes: 60
+      channels:
+        - t # 2-metre temperature
+        - dswrf # downwards short-wave radiation flux
+        - dlwrf # downwards long-wave radiation flux
+        - hcc # high cloud cover
+        - mcc # medium cloud cover
+        - lcc # low cloud cover
+        - sde # snow depth water equivalent
+        - r # relative humidty
+        - vis # visibility
+        - si10 # 10-metre wind speed
+        - wdir10 # 10-metre wind direction
+        - prate # precipitation rate
+        # These variables exist in CEDA training data but not in the live MetOffice live service
+        - hcct # height of convective cloud top, meters above surface. NaN if no clouds
+        - cdcb # height of lowest cloud base > 3 oktas
+        - dpt # dew point temperature
+        - prmsl # mean sea level pressure
+        - h # geometrical? (maybe geopotential?) height
+      image_size_pixels_height: 24
+      image_size_pixels_width: 24
+      dropout_timedeltas_minutes: [-360]
+      dropout_fraction: 1.0 # Fraction of samples with dropout
+      max_staleness_minutes: null
+      normalisation_constants:
+        t:
+          mean: 283.64913206
+          std: 4.38818501
+        dswrf:
+          mean: 111.28265039
+          std: 190.47216887
+        dlwrf:
+          mean: 325.03130139
+          std: 39.45988077
+        hcc:
+          mean: 29.11949682
+          std: 38.07184418
+        mcc:
+          mean: 40.88984494
+          std: 41.91144559
+        lcc:
+          mean: 50.08362643
+          std: 39.33210726
+        sde:
+          mean: 0.00289545
+          std: 0.1029753
+        r:
+          mean: 81.79229501
+          std: 11.45012499
+        vis:
+          mean: 32262.03285118
+          std: 21578.97975625
+        si10:
+          mean: 6.88348448
+          std: 3.94718813
+        wdir10:
+          mean: 199.41891636
+          std: 94.08407495
+        prate:
+          mean: 3.45793433e-05
+          std: 0.00021497
+        hcct:
+          mean: -18345.97478167
+          std: 18382.63958991
+        cdcb:
+          mean: 1412.26599062
+          std: 2126.99350113
+        dpt:
+          mean: 280.54379901
+          std: 4.57250482
+        prmsl:
+          mean: 101321.61574029
+          std: 1252.71790539
+        h:
+          mean: 2096.51991356
+          std: 1075.77812282
+  satellite:
+    # Path to Satellite data (non-HRV) in zarr format
+    # e.g. gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v4/2020_nonhrv.zarr
+    zarr_path: PLACEHOLDER.zarr
+    interval_start_minutes: -30
+    interval_end_minutes: 0
+    time_resolution_minutes: 5
+    channels:
+      - IR_016 # Surface, cloud phase
+      - IR_039 # Surface, clouds, wind fields
+      - IR_087 # Surface, clouds, atmospheric instability
+      - IR_097 # Ozone
+      - IR_108 # Surface, clouds, wind fields, atmospheric instability
+      - IR_120 # Surface, clouds, atmospheric instability
+      - IR_134 # Cirrus cloud height, atmospheric instability
+      - VIS006 # Surface, clouds, wind fields
+      - VIS008 # Surface, clouds, wind fields
+      - WV_062 # Water vapor, high level clouds, upper air analysis
+      - WV_073 # Water vapor, atmospheric instability, upper-level dynamics
+    image_size_pixels_height: 24
+    image_size_pixels_width: 24
+    dropout_timedeltas_minutes: []
+    dropout_fraction: 0 # Fraction of samples with dropout
+    normalisation_constants:
+      IR_016:
+        mean: 0.17594202
+        std: 0.21462157
+      IR_039:
+        mean: 0.86167645
+        std: 0.04618041
+      IR_087:
+        mean: 0.7719318
+        std: 0.06687243
+      IR_097:
+        mean: 0.8014212
+        std: 0.0468558
+      IR_108:
+        mean: 0.71254843
+        std: 0.17482725
+      IR_120:
+        mean: 0.89058584
+        std: 0.06115861
+      IR_134:
+        mean: 0.944365
+        std: 0.04492306
+      VIS006:
+        mean: 0.09633306
+        std: 0.12184761
+      VIS008:
+        mean: 0.11426069
+        std: 0.13090034
+      WV_062:
+        mean: 0.7359355
+        std: 0.16111417
+      WV_073:
+        mean: 0.62479186
+        std: 0.12924142
+  solar_position:
+    interval_start_minutes: -60
+    interval_end_minutes: 480
+    time_resolution_minutes: 30

configs.example/datamodule/premade_batches.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+_target_: pvnet.data.DataModule
+configuration: null
+# The sample_dir is the location batches were saved to using the save_batches.py script
+# The sample_dir should contain train and val subdirectories with batches
+sample_dir: "PLACEHOLDER"
+num_workers: 10
+prefetch_factor: 2
+batch_size: 8

configs.example/datamodule/streamed_batches.yaml ADDED Viewed

	@@ -0,0 +1,20 @@

+_target_: pvnet.data.DataModule
+# Path to the data configuration yaml file. You can find examples in the configuration subdirectory
+# in configs.example/datamodule/configuration
+# Use the full local path such as: /FULL/PATH/PVNet/configs/datamodule/configuration/gcp_configuration.yaml"
+configuration: "PLACEHOLDER.yaml"
+num_workers: 20
+prefetch_factor: 2
+batch_size: 8
+sample_output_dir: "PLACEHOLDER"
+num_train_samples: 2
+num_val_samples: 1
+train_period:
+  - null
+  - "2022-05-07"
+val_period:
+  - "2022-05-08"
+  - "2023-05-08"

configs.example/experiment/baseline.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
+  - override /model: baseline.yaml
+  - override /datamodule: premade_samples.yaml
+  - override /callbacks: default.yaml
+  - override /logger: neptune.yaml
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+seed: 518
+validate_only: "1" # by putting this key in the config file, the model does not get trained.
+trainer:
+  min_epochs: 1
+  max_epochs: 1

configs.example/experiment/conv3d_sat_nwp.yaml ADDED Viewed

	@@ -0,0 +1,23 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
+  - override /model: conv3d_sat_nwp.yaml
+  - override /datamodule: premade_samples.yaml
+  - override /callbacks: default.yaml
+#  - override /logger: neptune.yaml
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+seed: 518
+trainer:
+  min_epochs: 1
+  max_epochs: 10
+model:
+  conv3d_channels: 32

configs.example/experiment/example_simple.yaml ADDED Viewed

	@@ -0,0 +1,27 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=example_simple.yaml
+defaults:
+  - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
+  - override /model: conv3d_sat_nwp.yaml
+  - override /datamodule: premade_samples.yaml
+  - override /callbacks: default.yaml
+  - override /logger: tensorboard.yaml
+  - override /hparams_search: null
+  - override /hydra: default.yaml
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+seed: 518
+trainer:
+  min_epochs: 1
+  max_epochs: 2
+datamodule:
+  batch_size: 16
+validate_only: "1" # by putting this key in the config file, the model does not get trained.

configs.example/experiment/test.yaml ADDED Viewed

	@@ -0,0 +1,33 @@

+# @package _global_
+# to execute this experiment run:
+# python run.py experiment=test.yaml
+defaults:
+  - override /trainer: default.yaml # choose trainer from 'configs/trainer/'
+  - override /model: test.yaml
+  - override /datamodule: premade_samples.yaml
+  - override /callbacks: default.yaml
+# all parameters below will be merged with parameters from default configurations set above
+# this allows you to overwrite only specified parameters
+seed: 518
+trainer:
+  min_epochs: 0
+  max_epochs: 2
+  reload_dataloaders_every_n_epochs: 0
+  limit_train_batches: 2000
+  limit_val_batches: 100
+  limit_test_batches: 100
+  val_check_interval: 100
+  num_sanity_val_steps: 8
+  accumulate_grad_batches: 4
+  #fast_dev_run: 3
+datamodule:
+  num_workers: 10
+  prefetch_factor: 2
+  batch_size: 8
+#validate_only: '1'  # by putting this key in the config file, the model does not get trained.

configs.example/hparams_search/conv3d_optuna.yaml ADDED Viewed

	@@ -0,0 +1,49 @@

+# @package _global_
+# example hyperparameter optimization of some experiment with Optuna:
+# python run.py -m hparams_search=conv3d_optuna experiment=conv3d_sat_nwp
+defaults:
+  - override /hydra/sweeper: optuna
+# choose metric which will be optimized by Optuna
+optimized_metric: "MSE/Validation_epoch"
+hydra:
+  # here we define Optuna hyperparameter search
+  # it optimizes for value returned from function with @hydra.main decorator
+  # learn more here: https://hydra.cc/docs/next/plugins/optuna_sweeper
+  sweeper:
+    _target_: hydra_plugins.hydra_optuna_sweeper.optuna_sweeper.OptunaSweeper
+    storage: null
+    study_name: null
+    n_jobs: 1
+    # 'minimize' or 'maximize' the objective
+    direction: minimize
+    # number of experiments that will be executed
+    n_trials: 20
+    # choose Optuna hyperparameter sampler
+    # learn more here: https://optuna.readthedocs.io/en/stable/reference/samplers.html
+    sampler:
+      _target_: optuna.samplers.TPESampler
+      seed: 12345
+      consider_prior: true
+      prior_weight: 1.0
+      consider_magic_clip: true
+      consider_endpoints: false
+      n_startup_trials: 10
+      n_ei_candidates: 24
+      multivariate: false
+      warn_independent_sampling: true
+    # define range of hyperparameters
+    search_space:
+      model.include_pv_yield_history:
+        type: categorical
+        choices: [true, false]
+      model.include_future_satellite:
+        type: categorical
+        choices: [true, false]

configs.example/hydra/default.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+# output paths for hydra logs
+run:
+  # Local log directory for hydra
+  dir: PLACEHOLDER/runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+sweep:
+  # Local log directory for hydra
+  dir: PLACEHOLDER/multiruns/${now:%Y-%m-%d_%H-%M-%S}
+  subdir: ${hydra.job.num}
+# you can set here environment variables that are universal for all users
+# for system specific variables (like data paths) it's better to use .env file!
+job:
+  env_set:
+    EXAMPLE_VAR: "example_value"

configs.example/logger/csv.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+# csv logger built in lightning
+csv:
+  _target_: pytorch_lightning.loggers.csv_logs.CSVLogger
+  # local path to log training process
+  save_dir: "PLACEHOLDER"
+  name: "csv/"
+  version: null
+  prefix: ""

configs.example/logger/many_loggers.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+# train with many loggers at once
+defaults:
+  - csv.yaml
+  # - neptune.yaml
+  # - tensorboard.yaml
+  - wandb.yaml

configs.example/logger/neptune.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+# https://neptune.ai
+neptune:
+  _target_: pytorch_lightning.loggers.NeptuneLogger
+  api_key: ${oc.env:NEPTUNE_API_TOKEN} # api key is loaded from environment variable
+  # Neptune project placeholder
+  project: PLACEHOLDER
+  prefix: ""

configs.example/logger/tensorboard.yaml ADDED Viewed

	@@ -0,0 +1,11 @@

+# https://www.tensorflow.org/tensorboard/
+tensorboard:
+  _target_: pytorch_lightning.loggers.tensorboard.TensorBoardLogger
+  # Path to use for tensorboard logs
+  save_dir: "PLACEHOLDER"
+  name: "default"
+  version: "${model_name}"
+  log_graph: False
+  default_hp_metric: False
+  prefix: ""

configs.example/logger/wandb.yaml ADDED Viewed

	@@ -0,0 +1,17 @@

+# https://wandb.ai
+wandb:
+  _target_: lightning.pytorch.loggers.wandb.WandbLogger
+  # wandb project to log to
+  project: "PLACEHOLDER"
+  name: "${model_name}"
+  # location to store the wandb local logs
+  save_dir: "PLACEHOLDER"
+  offline: False # set True to store all logs only locally
+  id: null # pass correct id to resume experiment!
+  # entity: ""  # set to name of your wandb team or just remove it
+  log_model: False
+  prefix: ""
+  job_type: "train"
+  group: ""
+  tags: []

configs.example/model/baseline.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: pvnet.models.baseline.last_value.Model
+forecast_minutes: 120
+history_minutes: 30

configs.example/model/multimodal.yaml ADDED Viewed

	@@ -0,0 +1,115 @@

+_target_: pvnet.models.multimodal.multimodal.Model
+output_quantiles: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
+#--------------------------------------------
+# NWP encoder
+#--------------------------------------------
+nwp_encoders_dict:
+  ukv:
+    _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
+    _partial_: True
+    in_channels: 2
+    out_features: 256
+    number_of_conv3d_layers: 6
+    conv3d_channels: 32
+    image_size_pixels: 24
+  ecmwf:
+    _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
+    _partial_: True
+    in_channels: 12
+    out_features: 256
+    number_of_conv3d_layers: 4
+    conv3d_channels: 32
+    image_size_pixels: 12
+#--------------------------------------------
+# Sat encoder settings
+#--------------------------------------------
+sat_encoder:
+  _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
+  _partial_: True
+  in_channels: 11
+  out_features: 256
+  number_of_conv3d_layers: 6
+  conv3d_channels: 32
+  image_size_pixels: 24
+add_image_embedding_channel: False
+#--------------------------------------------
+# PV encoder settings
+#--------------------------------------------
+pv_encoder:
+  _target_: pvnet.models.multimodal.site_encoders.encoders.SingleAttentionNetwork
+  _partial_: True
+  num_sites: 349
+  out_features: 40
+  num_heads: 4
+  kdim: 40
+  id_embed_dim: 20
+#--------------------------------------------
+# Tabular network settings
+#--------------------------------------------
+output_network:
+  _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
+  _partial_: True
+  fc_hidden_features: 128
+  n_res_blocks: 6
+  res_block_layers: 2
+  dropout_frac: 0.0
+embedding_dim: 16
+include_sun: True
+include_gsp_yield_history: False
+include_site_yield_history: False
+# The mapping between the location IDs and their embedding indices
+location_id_mapping:
+  1: 1
+  5: 2
+  110: 3
+# ...
+#--------------------------------------------
+# Times
+#--------------------------------------------
+# Foreast and time settings
+forecast_minutes: 480
+history_minutes: 120
+min_sat_delay_minutes: 60
+# These must also be set even if identical to forecast_minutes and  history_minutes
+sat_history_minutes: 90
+pv_history_minutes: 180
+# These must be set for each NWP encoder
+nwp_history_minutes:
+  ukv: 120
+  ecmwf: 120
+nwp_forecast_minutes:
+  ukv: 480
+  ecmwf: 480
+# Optional; defaults to 60, so must be set for data with different time resolution
+nwp_interval_minutes:
+  ukv: 60
+  ecmwf: 60
+# ----------------------------------------------
+# Optimizer
+# ----------------------------------------------
+optimizer:
+  _target_: pvnet.optimizers.EmbAdamWReduceLROnPlateau
+  lr: 0.0001
+  weight_decay: 0.01
+  amsgrad: True
+  patience: 5
+  factor: 0.1
+  threshold: 0.002

configs.example/model/nwp_dwsrf_weighting.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+_target_: pvnet.models.multimodal.nwp_weighting.Model
+#--------------------------------------------
+# Network settings
+#--------------------------------------------
+# Foreast and time settings
+forecast_minutes: 480
+history_minutes: 120
+nwp_history_minutes: 120
+nwp_forecast_minutes: 480
+nwp_image_size_pixels: 24
+dwsrf_channel: 1
+# ----------------------------------------------
+optimizer:
+  _target_: pvnet.optimizers.AdamW
+  lr: 0.0005

configs.example/model/test.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+_target_: pvnet.models.baseline.single_value.Model
+history_minutes: 120
+forecast_minutes: 360

configs.example/model/wind_multimodal.yaml ADDED Viewed

	@@ -0,0 +1,83 @@

+_target_: pvnet.models.multimodal.multimodal.Model
+output_quantiles: [0.02, 0.1, 0.25, 0.5, 0.75, 0.9, 0.98]
+#--------------------------------------------
+# NWP encoder
+#--------------------------------------------
+nwp_encoders_dict:
+  ecmwf:
+    _target_: pvnet.models.multimodal.encoders.encoders3d.DefaultPVNet
+    _partial_: True
+    in_channels: 14
+    out_features: 256
+    number_of_conv3d_layers: 6
+    conv3d_channels: 32
+    image_size_pixels: 16
+#--------------------------------------------
+# Sensor encoder settings
+#--------------------------------------------
+wind_encoder:
+  _target_: pvnet.models.multimodal.site_encoders.encoders.SingleAttentionNetwork
+  _partial_: True
+  num_sites: 19
+  out_features: 40
+  num_heads: 4
+  kdim: 40
+  id_embed_dim: 20
+#--------------------------------------------
+# Tabular network settings
+#--------------------------------------------
+output_network:
+  _target_: pvnet.models.multimodal.linear_networks.networks.ResFCNet2
+  _partial_: True
+  fc_hidden_features: 128
+  n_res_blocks: 6
+  res_block_layers: 2
+  dropout_frac: 0.0
+embedding_dim: 16
+include_sun: False
+include_gsp_yield_history: False
+# The mapping between the location IDs and their embedding indices
+location_id_mapping:
+  1: 1
+  5: 2
+  110: 3
+# ...
+#--------------------------------------------
+# Times
+#--------------------------------------------
+# Foreast and time settings
+forecast_minutes: 480
+history_minutes: 120
+min_sat_delay_minutes: 60
+# --- set to null if same as history_minutes ---
+sat_history_minutes: 90
+nwp_history_minutes: 60
+nwp_forecast_minutes: 2880
+pv_history_minutes: 180
+pv_interval_minutes: 15
+sat_interval_minutes: 15
+target_key: "sensor"
+# ----------------------------------------------
+# Optimizer
+# ----------------------------------------------
+optimizer:
+  _target_: pvnet.optimizers.EmbAdamWReduceLROnPlateau
+  lr: 0.0001
+  weight_decay: 0.01
+  amsgrad: True
+  patience: 5
+  factor: 0.1
+  threshold: 0.002

configs.example/readme.md ADDED Viewed

	@@ -0,0 +1,5 @@

+This directory contains example configuration files for the PVNet project. Many paths will need to unique to each user. You can find these paths by searching for PLACEHOLDER within these logs. Not all of
+the values with a placeholder need to be set. For example in the logger subdirectory there are many different loggers with PLACEHOLDERS. If only one logger is used, then only that placeholder needs to be set.
+run experiments by:
+`python run.py experiment=example_simple `

configs.example/trainer/all_params.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+_target_: pytorch_lightning.Trainer
+# default values for all trainer parameters
+checkpoint_callback: True
+default_root_dir: null
+gradient_clip_val: 0.0
+process_position: 0
+num_nodes: 1
+num_processes: 1
+gpus: null
+auto_select_gpus: False
+tpu_cores: null
+log_gpu_memory: null
+overfit_batches: 0.0
+track_grad_norm: -1
+check_val_every_n_epoch: 1
+fast_dev_run: False
+accumulate_grad_batches: 1
+max_epochs: 1
+min_epochs: 1
+max_steps: null
+min_steps: null
+limit_train_batches: 1.0
+limit_val_batches: 1.0
+limit_test_batches: 1.0
+val_check_interval: 1.0
+flush_logs_every_n_steps: 100
+log_every_n_steps: 50
+accelerator: null
+sync_batchnorm: False
+precision: 32
+weights_save_path: null
+num_sanity_val_steps: 2
+truncated_bptt_steps: null
+resume_from_checkpoint: null
+profiler: null
+benchmark: False
+deterministic: False
+reload_dataloaders_every_epoch: False
+auto_lr_find: False
+replace_sampler_ddp: True
+terminate_on_nan: False
+auto_scale_batch_size: False
+prepare_data_per_node: True
+plugins: null
+amp_backend: "native"
+amp_level: "O2"
+move_metrics_to_cpu: False

configs.example/trainer/default.yaml ADDED Viewed

	@@ -0,0 +1,14 @@

+_target_: lightning.pytorch.trainer.trainer.Trainer
+# set `1` to train on GPU, `0` to train on CPU only
+accelerator: auto
+devices: auto
+min_epochs: null
+max_epochs: null
+reload_dataloaders_every_n_epochs: 0
+num_sanity_val_steps: 8
+fast_dev_run: false
+accumulate_grad_batches: 4
+log_every_n_steps: 50

experiments/india/001_v1/india_pv_wind.md ADDED Viewed

	@@ -0,0 +1,69 @@

+# PVNet for Wind and PV Sites in India
+## PVNet for sites
+### Data
+We use PV generation data for India from April 2019-Nov 2022 for training
+and Dec 2022- Nov 2023 for validation. This is only with ECMWF data, and PV generation history.
+The forecast is every 15 minutes for 48 hours for PV generation.
+The input NWP data is hourly, and 32x32 pixels (corresponding to around 320kmx320km) around a central
+point in NW-India.
+[WandB Link](https://wandb.ai/openclimatefix/pvnet_india2.1/runs/o4xpvzrc)
+### Results
+Overall MAE is 4.9% on the validation set, and forecasts look overall good.
+![batch_idx_1_all_892_2ca7e12db5de2cf2e244](https://github.com/openclimatefix/PVNet/assets/7170359/07e8199a-11b5-4400-9897-37b7738a4f39)
+![W B Chart 05_02_2024, 10_07_12_pvnet](https://github.com/openclimatefix/PVNet/assets/7170359/abaefdc1-dedd-4a12-8a26-afaf36d7786b)
+## WindNet
+### April-29-2024 WindNet v1 Production Model
+[WandB Link](https://wandb.ai/openclimatefix/india/runs/5llq8iw6)
+Improvements: Larger input size (64x64), 7 hour delay for ECMWF NWP inputs, to match productions.
+New, much more efficient encoder for NWP, allowing for more filters and layers, with less parameters.
+The 64x64 input size corresponds to 6.4 degrees x 6.4 degrees, which is around 700km x 700km. This allows for the
+model to see the wind over the wind generation sites, which seems to be the biggest reason for the improvement in the model.
+MAE is 7.6% with real improvements on the production side of things.
+There were other experiments with slightly different numbers of filters, model parameters and the like, but generally no
+improvements were seen.
+## WindNet v1 Results
+### Data
+We use Wind generation data for India from April 2019-Nov 2022 for training
+and Dec 2022- Nov 2023 for validation. This is only with ECMWF data, and Wind generation history.
+The forecast is every 15 minutes for 48 hours for Wind generation.
+The input NWP data is hourly, and 32x32 pixels (corresponding to around 320kmx320km) around a central
+point in NW-India. Note: The majority of the wind generation is likely not covered in the 320kmx320km area.
+[WandB Link](https://wandb.ai/openclimatefix/pvnet_india2.1/runs/otdx7axx)
+### Results
+![W B Chart 05_02_2024, 10_05_19](https://github.com/openclimatefix/PVNet/assets/7170359/6a8cd9c5-bdfe-41ab-996d-37fd1be2a07c)
+![W B Chart 05_02_2024, 10_06_51_windnet](https://github.com/openclimatefix/PVNet/assets/7170359/77554ef0-4411-4432-af95-8530aef4a701)
+![batch_idx_1_all_1730_379a9f881a7f01153f98](https://github.com/openclimatefix/PVNet/assets/7170359/243d9f3e-4cb9-405e-80c5-40c6c218c17f)
+MAE is around 10% overall, although it doesn't seem to do very well on the ramps up and down.

experiments/india/002_wind_meteomatics/india_windnet_v2.md ADDED Viewed

	@@ -0,0 +1,46 @@

+### WindNet v2 Meteomatics + ECMWF Model
+[WandB Linl](https://wandb.ai/openclimatefix/india/runs/v3mja33d)
+This newest experiment uses Meteomatics data in addition to ECMWF data. The Meteomatics data is at specific locations corresponding
+to the gneeration sites we know about. It is smartly downscaled ECMWF data, down to 15 minutes and at a few height levels we are
+interested in, primarily 10m, 100m, and 200m. The Meteomatics data is a semi-reanalysis, with each block of 6 hours being from one forecast run.
+For example, in one day, hours 00-06 are from the same, 00 forecast run, and hours 06-12 are from the 06 forecast run. This is important to note
+as it is both not a real reanalysis, but we also can't have it exactly match the live data, as any forecast steps beyond 6 hours are thrown away.
+This does mean that these results should be taken as a best case or better than best case scenario, as every 6 hour, observations from the future
+are incorporated into the Meteomatics input data from the next NWP mode run.
+For the purposes of WindNet, Meteomatics data is treated as Sensor data that goes into the future.
+The model encodes the sensor information the same way as for the historical PV, Wind, and GSP generation, and has
+a simple, single attention head to encode the information. This is then concatenated along with the rest of the data, like in
+previous experiments.
+This model also has an even larger input size of ECMWF data, 81x81 pixels, corresponding to around 810kmx810km.
+![Screenshot_20240430_082855](https://github.com/openclimatefix/PVNet/assets/7170359/6981a088-8664-474b-bfea-c94c777fc119)
+MAE is 7.0% on the validation set, showing a slight improvement over the previous model.
+Comperison  with the production model:
+| Timestep | Prod MAE % | No Meteomatics MAE % | Meteomatics MAE % |
+| --- | --- | --- | --- |
+| 0-0 minutes | 7.586 | 5.920 | 2.475 |
+| 15-15 minutes | 8.021 | 5.809 | 2.968 |
+| 30-45 minutes | 7.233 | 5.742 | 3.472 |
+| 45-60 minutes | 7.187 | 5.698 | 3.804 |
+| 60-120 minutes | 7.231 | 5.816 | 4.650 |
+| 120-240 minutes | 7.287 | 6.080 | 6.028 |
+| 240-360 minutes | 7.319 | 6.375 | 6.738 |
+| 360-480 minutes | 7.285 | 6.638 | 6.964 |
+| 480-720 minutes | 7.143 | 6.747 | 6.906 |
+| 720-1440 minutes | 7.380 | 7.207 | 6.962 |
+| 1440-2880 minutes | 7.904 | 7.507 | 7.507 |
+![mae_per_timestep](https://github.com/openclimatefix/PVNet/assets/7170359/e3c942e8-65c6-4b95-8c51-f25d43e7a082)
+Example plot
+![Screenshot_20240430_082937](https://github.com/openclimatefix/PVNet/assets/7170359/88db342e-bf82-414e-8255-5ad4af659fb8)

experiments/india/003_wind_plevels/MAE.png ADDED Viewed

Git LFS Details

SHA256: b06d6f85c2ee708e9555969afd622353b950a744f604d6c31d3c32d9b1543c23
Pointer size: 131 Bytes
Size of remote file: 174 kB

experiments/india/003_wind_plevels/MAEvstimesteps.png ADDED Viewed

Git LFS Details

SHA256: 3646fe682b4d13b2e00d68cf6d19dec9d00e6c56cc4d3995c3903920b35b8707
Pointer size: 131 Bytes
Size of remote file: 219 kB

experiments/india/003_wind_plevels/p10.png ADDED Viewed

Git LFS Details

SHA256: cce6f27ce1bafc89e9b5cb75cc2dad7c1053bea931ea4f5dfa5a1ef404d1042b
Pointer size: 131 Bytes
Size of remote file: 150 kB

experiments/india/003_wind_plevels/p50.png ADDED Viewed

Git LFS Details

SHA256: ceae23a3f91f6bc56cf688bdbcaf5172f1a54736e412c5f0e80d8c056f7d9754
Pointer size: 131 Bytes
Size of remote file: 229 kB

experiments/india/003_wind_plevels/plevel.md ADDED Viewed

	@@ -0,0 +1,54 @@

+# Running WindNet for RUVNL for diferent Plevels
+https://wandb.ai/openclimatefix/india/runs/5llq8iw6 is the current production one
+This has 7 plevels and a small patch size.
+## Experiments
+1. Only used plevel 50 (orange)
+https://wandb.ai/openclimatefix/india/runs/ziudzweq/
+2. Use plevels of [2, 10, 25, 50, 75, 90, 98]. This is what is already used. (green)
+https://wandb.ai/openclimatefix/india/runs/xdlew7ib
+3. Use plevels of [1, 02, 10, 20, 25, 30, 40, 50, 60, 70, 75, 80 (brown)
+, 90, 98, 99]
+https://wandb.ai/openclimatefix/india/runs/pcr2zsrc
+## Training
+Each epoch took about ~4 hours, so the training runs took several days.
+TODO add number of samples
+## Results
+MAE results show that using the plevel of 50 only, gives better results
+![](Mae.png "Mae")
+The p50 results are about the same
+![](p50.png "p50")
+We can see that for p10 the results are not right, as they should converge to 0.1
+![](p10.png "p10")
+Interestingly the more plevels you have the better the results are for before 4 hours
+but the less plevels you have the better the results for >= 8 hours.
+| Timestep | P50 only MAE % | 7 plevels MAE % | 15 plevel MAE %  | 7 plevels small patch MAE % |
+| --- | --- | --- |  --- | --- |
+| 0-0 minutes | 5.416 | 5.920 | 3.933 | 7.586 |
+| 15-15 minutes | 5.458 | 5.809 | 4.003 | 8.021 |
+| 30-45 minutes | 5.525 | 5.742 | 4.442 | 7.233 |
+| 45-60 minutes | 5.595 | 5.698 | 4.772 | 7.187 |
+| 60-120 minutes | 5.890 | 5.816 | 5.307 | 7.231 |
+| 120-240 minutes | 6.423 | 6.080 | 6.275 | 7.287 |
+| 240-360 minutes | 6.608 | 6.375 | 6.707 | 7.319 |
+| 360-480 minutes | 6.728 | 6.638 | 6.904 | 7.285 |
+| 480-720 minutes | 6.634 | 6.747 | 6.872 | 7.143 |
+| 720-1440 minutes | 6.940 | 7.207 | 7.176 | 7.380 |
+| 1440-2880 minutes | 7.446 | 7.507 | 7.735 | 7.904 |
+![](MAEvstimesteps.png "MAEvstimesteps")

experiments/india/004_n_training_samples/log-plot.py ADDED Viewed

	@@ -0,0 +1,14 @@

+""" Small script to make MAE vs number of batches plot"""
+import pandas as df
+import plotly.graph_objects as go
+data = [[100, 7.779], [300, 7.441], [1000, 7.181], [3000, 7.180], [6711, 7.151]]
+df = df.DataFrame(data, columns=["n_samples", "MAE [%]"])
+fig = go.Figure()
+fig.add_trace(go.Scatter(x=df["n_samples"], y=df["MAE [%]"], mode="lines+markers"))
+fig.update_layout(title="MAE % for N samples", xaxis_title="N Samples", yaxis_title="MAE %")
+# change to log log
+fig.update_xaxes(type="log")
+fig.show(renderer="browser")

experiments/india/004_n_training_samples/mae_samples.png ADDED Viewed

experiments/india/004_n_training_samples/mae_step.png ADDED Viewed

Git LFS Details

SHA256: 3a3180a382e4b2c1534524f92a633d488912475a1e8a4effb0b28caf44368834
Pointer size: 131 Bytes
Size of remote file: 325 kB

experiments/india/004_n_training_samples/readme.md ADDED Viewed

	@@ -0,0 +1,48 @@

+# N samples experiments
+Kicked off an experiment that uses N samples
+This is done by adding `limit_train_batches` to the `trainer/default.yaml`.
+I checked that when limiting the batches, the same batches are shown to model for each epoch.
+## Experiments
+Original is 6711 batches
+- 100: 3p6scx2r
+- 300: am46tno1
+- 1000: u04xlb6p
+- 3000: p11lhreo
+## Results
+Overall
+| Experiment | MAE % |
+|------------|-------|
+| 100        | 7.779 |
+| 300        | 7.441 |
+| 1000       | 7.181 |
+| 3000       | 7.180 |
+| 6711       | 7.151 |
+Results by timestamps
+| Timestep | 100 MAE % | 300 MAE % | 1000 MAE % | 3000 MAE % | 6711 MAE % |
+| --- | --- | --- | --- | --- | --- |
+| 0-0 minutes | 7.985 | 7.453 | 7.155 | 5.553 | 5.920 |
+| 15-15 minutes | 7.953 | 7.055 | 6.923 | 5.453 | 5.809 |
+| 30-45 minutes | 8.043 | 7.172 | 6.907 | 5.764 | 5.742 |
+| 45-60 minutes | 7.850 | 7.070 | 6.790 | 5.815 | 5.698 |
+| 60-120 minutes | 7.698 | 6.809 | 6.597 | 5.890 | 5.816 |
+| 120-240 minutes | 7.355 | 6.629 | 6.495 | 6.221 | 6.080 |
+| 240-360 minutes | 7.230 | 6.729 | 6.559 | 6.541 | 6.375 |
+| 360-480 minutes | 7.415 | 6.997 | 6.770 | 6.855 | 6.638 |
+| 480-720 minutes | 7.258 | 7.037 | 6.668 | 6.876 | 6.747 |
+| 720-1440 minutes | 7.659 | 7.362 | 7.038 | 7.142 | 7.207 |
+| 1440-2880 minutes | 8.027 | 7.745 | 7.518 | 7.535 | 7.507 |
+![](mae_step.png "mae_steps")
+![](mae_samples.png "mae_samples")

experiments/india/005_extra_nwp_variables/mae_steps.png ADDED Viewed

Git LFS Details

SHA256: 0ef7f7af4dafe38aac5a5df6cc74acc606cb4f0a1a9fc78972b09d68dd7574ad
Pointer size: 131 Bytes
Size of remote file: 215 kB

experiments/india/005_extra_nwp_variables/mae_steps_grouped.png ADDED Viewed

Git LFS Details

SHA256: 547d3aafbb1658602fe03ea1677589de4e208467756e9ce9cd1d8727f364dffa
Pointer size: 131 Bytes
Size of remote file: 133 kB

experiments/india/005_extra_nwp_variables/readmd.md ADDED Viewed

	@@ -0,0 +1,55 @@

+# Adding extra nwp variables
+I wanted to run Windnet but testing some new nwp variables from ecmwf
+General conclusion, although more experiments could be done.
+The current nwp variables are about right.
+If you add lots it makes it worse.
+If you take some away, it makes it worse.
+## Bugs
+Ran into a problem where found that some xamples have
+`d.__getitem__('nwp-ecmwf__init_time_utc').values` had size 50, where it should be just one values. I removed these examples. This might
+## Experiments
+The number of samples were 8000 when training.
+### 15 variablles
+Run windnet with `'hcc', 'lcc', 'mcc', 'prate', 'sde', 'sr', 't2m', 'tcc', 'u10',
+       'v10', 'u100', 'v100', 'u200', 'v200', 'dlwrf', 'dswrf'`.
+The experiment on wandb is [here](https://wandb.ai/openclimatefix/india/runs/k91rdffo)
+### 7 variables
+Run windnet with the original 7 variables.
+`t2m, u10, u100, u200, v10, v100, v200  `
+The experiment on wandb is [here](https://wandb.ai/openclimatefix/india/runs/miszfep5)
+### 3 variables
+Run windnet with only `t, u10, v100`
+The experiment on wandb is [here](https://wandb.ai/openclimatefix/india/runs/22v3a39g)
+## Results
+| Timestep | 15 MAE % | 7 MAE % | 3 MAE % |
+| --- | --- | --- | --- |
+| 0-0 minutes | 7.450 | 6.623 | 7.529 |
+| 15-15 minutes | 7.348 | 6.441 | 7.408 |
+| 30-45 minutes | 7.242 | 6.544 | 7.294 |
+| 45-60 minutes | 7.134 | 6.567 | 7.185 |
+| 60-120 minutes | 7.058 | 6.295 | 7.009 |
+| 120-240 minutes | 6.965 | 6.290 | 6.800 |
+| 240-360 minutes | 6.807 | 6.374 | 6.580 |
+| 360-480 minutes | 6.749 | 6.482 | 6.548 |
+| 480-720 minutes | 6.892 | 6.686 | 6.685 |
+| 720-1440 minutes | 7.020 | 6.756 | 6.780 |
+| 1440-2880 minutes | 7.445 | 7.095 | 7.214 |
+![](mae_steps_grouped.png "mae_steps")
+The raw data is here
+![](mae_steps.png "mae_steps")

experiments/india/006_da_only/bad.png ADDED Viewed

Git LFS Details

SHA256: 37cbbf51e7fa7dceb8b2074419267b4bde8186ddcd40b4a49c085735fdf72e43
Pointer size: 131 Bytes
Size of remote file: 358 kB

experiments/india/006_da_only/da_only.md ADDED Viewed

	@@ -0,0 +1,37 @@

+## DA forecasts only
+The idea was to create a forecast for DA (day-ahead) only for Windnet.
+We hope this would bring down the DA MAE values.
+We do this by not forecasting the first X hours.
+Unfortunately, it doesnt not look like ignore X hours, make the DA forecast better.
+## Experiments
+1. Baseline - [here](https://wandb.ai/openclimatefix/india/runs/miszfep5)
+2. Ignore first 6 hours - [here](https://wandb.ai/openclimatefix/india/runs/uosk0qug)
+3. Ignore first 12 hours - [here](https://wandb.ai/openclimatefix/india/runs/s9cnn4ei)
+## Results
+| Timestep | all MAE % | 6 MAE % | 12 MAE % |
+| --- | --- |---------|---------|
+| 0-0 minutes | nan | nan     | nan     |
+| 15-15 minutes | nan | nan     | nan     |
+| 30-45 minutes | 0.065 | nan     | nan     |
+| 45-60 minutes | 0.066 | nan     | nan     |
+| 60-120 minutes | 0.063 | nan     | nan     |
+| 120-240 minutes | 0.063 | nan     | nan     |
+| 240-360 minutes | 0.064 | nan     | nan     |
+| 360-480 minutes | 0.065 | 0.068   | nan     |
+| 480-720 minutes | 0.067 | 0.065   | nan     |
+| 720-1440 minutes | 0.068 | 0.065   | 0.065   |
+| 1440-2880 minutes | 0.071 | 0.071   | 0.071   |
+![](mae_steps.png "mae_steps")
+Here's two examples from the 6 hour ignore model, one that forecated it well, one that didnt
+![](bad.png "bad")
+![](good.png "good")

experiments/india/006_da_only/good.png ADDED Viewed

Git LFS Details

SHA256: 5f4b6a11ac1560dbea1214ce381602b9eab7334a74110052dda072f0f53c3de8
Pointer size: 131 Bytes
Size of remote file: 424 kB

experiments/india/006_da_only/mae_steps.png ADDED Viewed

Git LFS Details

SHA256: 5ca49fbc24530c3d75d0ec5cd2ba6345082c1747a600143afc40faf7bade0cd6
Pointer size: 131 Bytes
Size of remote file: 122 kB

experiments/india/007_different_seeds/mae_all_steps.png ADDED Viewed

Git LFS Details

SHA256: b06eaa2f75d645185bea5b874d6020bae3bccd7de25ec519cf348cde511f27c6
Pointer size: 131 Bytes
Size of remote file: 203 kB

experiments/india/007_different_seeds/mae_steps.png ADDED Viewed

Git LFS Details

SHA256: 3adfaa5394e9f45c684812e47e385c25d1796a6c772d04f4e7a3cbcbeffafda3
Pointer size: 131 Bytes
Size of remote file: 130 kB

experiments/india/007_different_seeds/readme.md ADDED Viewed

	@@ -0,0 +1,33 @@

+# Training models with different seeds
+Want to see the effect or training a model with different seeds.
+We can see that the results for different seeds can vary by 0.5%,
+and some models being better at different time horizons than others
+## Experiments
+- seed 1 - [miszfep5](https://wandb.ai/openclimatefix/india/runs/miszfep5)
+- seed 2 - [cxshv2q4](https://wandb.ai/openclimatefix/india/runs/cxshv2q4)
+- seed 3 - [m46wdrr7](https://wandb.ai/openclimatefix/india/runs/m46wdrr7)
+These were trained with 1000 batches, and 300 batches for validation
+## Results
+| Timestep | s1 MAE % | s2 MAE % | s3 MAE % |
+| --- | --- | --- | --- |
+| 0-0 minutes | 0.066 | 0.061 | 0.066 |
+| 15-15 minutes | 0.064 | 0.058 | 0.064 |
+| 30-45 minutes | 0.065 | 0.060 | 0.063 |
+| 45-60 minutes | 0.066 | 0.060 | 0.063 |
+| 60-120 minutes | 0.063 | 0.060 | 0.063 |
+| 120-240 minutes | 0.063 | 0.063 | 0.065 |
+| 240-360 minutes | 0.064 | 0.066 | 0.065 |
+| 360-480 minutes | 0.065 | 0.066 | 0.066 |
+| 480-720 minutes | 0.067 | 0.066 | 0.065 |
+| 720-1440 minutes | 0.068 | 0.068 | 0.066 |
+| 1440-2880 minutes | 0.071 | 0.072 | 0.071 |
+![](mae_steps.png "mae_steps")
+![](mae_all_steps.png "mae_steps")

experiments/india/008_coarse4/mae_step.png ADDED Viewed

Git LFS Details

SHA256: 52e85df6c2ed7865e0f6f412ae47e7e5f0a1b12550b72702ebe7e166dec53636
Pointer size: 131 Bytes
Size of remote file: 179 kB