peterdudfield commited on Jun 11, 2025

Commit

7bffb2f

1 Parent(s): b74423e

Delete pvnet

Browse files

Files changed (37) hide show

pvnet/__init__.py +0 -2
pvnet/callbacks.py +0 -129
pvnet/data/__init__.py +0 -3
pvnet/data/base_datamodule.py +0 -118
pvnet/data/site_datamodule.py +0 -53
pvnet/data/uk_regional_datamodule.py +0 -54
pvnet/load_model.py +0 -71
pvnet/models/__init__.py +0 -1
pvnet/models/base_model.py +0 -973
pvnet/models/baseline/__init__.py +0 -1
pvnet/models/baseline/last_value.py +0 -42
pvnet/models/baseline/readme.md +0 -5
pvnet/models/baseline/single_value.py +0 -36
pvnet/models/ensemble.py +0 -74
pvnet/models/model_cards/pv_india_model_card_template.md +0 -56
pvnet/models/model_cards/pv_uk_regional_model_card_template.md +0 -59
pvnet/models/model_cards/wind_india_model_card_template.md +0 -56
pvnet/models/multimodal/__init__.py +0 -1
pvnet/models/multimodal/basic_blocks.py +0 -104
pvnet/models/multimodal/encoders/__init__.py +0 -1
pvnet/models/multimodal/encoders/basic_blocks.py +0 -217
pvnet/models/multimodal/encoders/encoders2d.py +0 -413
pvnet/models/multimodal/encoders/encoders3d.py +0 -402
pvnet/models/multimodal/encoders/encodersRNN.py +0 -141
pvnet/models/multimodal/linear_networks/__init__.py +0 -1
pvnet/models/multimodal/linear_networks/basic_blocks.py +0 -121
pvnet/models/multimodal/linear_networks/networks.py +0 -332
pvnet/models/multimodal/multimodal.py +0 -417
pvnet/models/multimodal/readme.md +0 -11
pvnet/models/multimodal/site_encoders/__init__.py +0 -1
pvnet/models/multimodal/site_encoders/basic_blocks.py +0 -35
pvnet/models/multimodal/site_encoders/encoders.py +0 -284
pvnet/models/multimodal/unimodal_teacher.py +0 -447
pvnet/models/utils.py +0 -123
pvnet/optimizers.py +0 -200
pvnet/training.py +0 -183
pvnet/utils.py +0 -321

pvnet/__init__.py DELETED Viewed

	@@ -1,2 +0,0 @@
1	- """PVNet"""
2	- __version__ = "4.1.18"

pvnet/callbacks.py DELETED Viewed

@@ -1,129 +0,0 @@
-"""Custom callbacks
-"""
-from lightning.pytorch import Trainer
-from lightning.pytorch.callbacks import BaseFinetuning, EarlyStopping, LearningRateFinder
-from lightning.pytorch.trainer.states import TrainerFn
-class PhaseEarlyStopping(EarlyStopping):
-    """Monitor a validation metric and stop training when it stops improving.
-    Only functions in a specific phase of training.
-    """
-    training_phase = None
-    def switch_phase(self, phase: str):
-        """Switch phase of callback"""
-        if phase == self.training_phase:
-            self.activate()
-        else:
-            self.deactivate()
-    def deactivate(self):
-        """Deactivate callback"""
-        self.active = False
-    def activate(self):
-        """Activate callback"""
-        self.active = True
-    def _should_skip_check(self, trainer: Trainer) -> bool:
-        return (
-            (trainer.state.fn != TrainerFn.FITTING) or (trainer.sanity_checking) or not self.active
-        )
-class PretrainEarlyStopping(EarlyStopping):
-    """Monitor a validation metric and stop training when it stops improving.
-    Only functions in the 'pretrain' phase of training.
-    """
-    training_phase = "pretrain"
-class MainEarlyStopping(EarlyStopping):
-    """Monitor a validation metric and stop training when it stops improving.
-    Only functions in the 'main' phase of training.
-    """
-    training_phase = "main"
-class PretrainFreeze(BaseFinetuning):
-    """Freeze the satellite and NWP encoders during pretraining"""
-    training_phase = "pretrain"
-    def __init__(self):
-        """Freeze the satellite and NWP encoders during pretraining"""
-        super().__init__()
-    def freeze_before_training(self, pl_module):
-        """Freeze satellite and NWP encoders before training start"""
-        # freeze any module you want
-        modules = []
-        if pl_module.include_sat:
-            modules += [pl_module.sat_encoder]
-        if pl_module.include_nwp:
-            modules += [pl_module.nwp_encoder]
-        self.freeze(modules)
-    def finetune_function(self, pl_module, current_epoch, optimizer):
-        """Unfreeze satellite and NWP encoders"""
-        if not self.active:
-            modules = []
-            if pl_module.include_sat:
-                modules += [pl_module.sat_encoder]
-            if pl_module.include_nwp:
-                modules += [pl_module.nwp_encoder]
-            self.unfreeze_and_add_param_group(
-                modules=modules,
-                optimizer=optimizer,
-                train_bn=True,
-            )
-    def switch_phase(self, phase: str):
-        """Switch phase of callback"""
-        if phase == self.training_phase:
-            self.activate()
-        else:
-            self.deactivate()
-    def deactivate(self):
-        """Deactivate callback"""
-        self.active = False
-    def activate(self):
-        """Activate callback"""
-        self.active = True
-class PhasedLearningRateFinder(LearningRateFinder):
-    """Finds a learning rate at the start of each phase of learning"""
-    active = True
-    def on_fit_start(self, *args, **kwargs):
-        """Do nothing"""
-        return
-    def on_train_epoch_start(self, trainer, pl_module):
-        """Run learning rate finder on epoch start and then deactivate"""
-        if self.active:
-            self.lr_find(trainer, pl_module)
-            self.deactivate()
-    def switch_phase(self, phase: str):
-        """Switch training phase"""
-        self.activate()
-    def deactivate(self):
-        """Deactivate callback"""
-        self.active = False
-    def activate(self):
-        """Activate callback"""
-        self.active = True

pvnet/data/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-"""Data parts"""
-from .site_datamodule import SiteDataModule
-from .uk_regional_datamodule import DataModule

pvnet/data/base_datamodule.py DELETED Viewed

@@ -1,118 +0,0 @@
-""" Data module for pytorch lightning """
-from glob import glob
-from lightning.pytorch import LightningDataModule
-from ocf_data_sampler.numpy_sample.collate import stack_np_samples_into_batch
-from ocf_data_sampler.torch_datasets.sample.base import (
-    NumpyBatch,
-    SampleBase,
-    TensorBatch,
-    batch_to_tensor,
-)
-from torch.utils.data import DataLoader, Dataset
-def collate_fn(samples: list[NumpyBatch]) -> TensorBatch:
-    """Convert a list of NumpySample samples to a tensor batch"""
-    return batch_to_tensor(stack_np_samples_into_batch(samples))
-class PremadeSamplesDataset(Dataset):
-    """Dataset to load samples from
-    Args:
-        sample_dir: Path to the directory of pre-saved samples.
-        sample_class: sample class type to use for save/load/to_numpy
-    """
-    def __init__(self, sample_dir: str, sample_class: SampleBase):
-        """Initialise PremadeSamplesDataset"""
-        self.sample_paths = glob(f"{sample_dir}/*")
-        self.sample_class = sample_class
-    def __len__(self):
-        return len(self.sample_paths)
-    def __getitem__(self, idx):
-        sample = self.sample_class.load(self.sample_paths[idx])
-        return sample.to_numpy()
-class BaseDataModule(LightningDataModule):
-    """Base Datamodule for training pvnet and using pvnet pipeline in ocf-data-sampler."""
-    def __init__(
-        self,
-        configuration: str | None = None,
-        sample_dir: str | None = None,
-        batch_size: int = 16,
-        num_workers: int = 0,
-        prefetch_factor: int | None = None,
-        train_period: list[str | None] = [None, None],
-        val_period: list[str | None] = [None, None],
-    ):
-        """Base Datamodule for training pvnet architecture.
-        Can also be used with pre-made batches if `sample_dir` is set.
-        Args:
-            configuration: Path to ocf-data-sampler configuration file.
-            sample_dir: Path to the directory of pre-saved samples. Cannot be used together with
-                `configuration` or '[train/val]_period'.
-            batch_size: Batch size.
-            num_workers: Number of workers to use in multiprocess batch loading.
-            prefetch_factor: Number of data will be prefetched at the end of each worker process.
-            train_period: Date range filter for train dataloader.
-            val_period: Date range filter for val dataloader.
-        """
-        super().__init__()
-        if not ((sample_dir is not None) ^ (configuration is not None)):
-            raise ValueError("Exactly one of `sample_dir` or `configuration` must be set.")
-        if sample_dir is not None:
-            if any([period != [None, None] for period in [train_period, val_period]]):
-                raise ValueError("Cannot set `(train/val)_period` with presaved samples")
-        self.configuration = configuration
-        self.sample_dir = sample_dir
-        self.train_period = train_period
-        self.val_period = val_period
-        self._common_dataloader_kwargs = dict(
-            batch_size=batch_size,
-            sampler=None,
-            batch_sampler=None,
-            num_workers=num_workers,
-            collate_fn=collate_fn,
-            pin_memory=False,
-            drop_last=False,
-            timeout=0,
-            worker_init_fn=None,
-            prefetch_factor=prefetch_factor,
-            persistent_workers=False,
-        )
-    def _get_streamed_samples_dataset(self, start_time, end_time) -> Dataset:
-        raise NotImplementedError
-    def _get_premade_samples_dataset(self, subdir) -> Dataset:
-        raise NotImplementedError
-    def train_dataloader(self) -> DataLoader:
-        """Construct train dataloader"""
-        if self.sample_dir is not None:
-            dataset = self._get_premade_samples_dataset("train")
-        else:
-            dataset = self._get_streamed_samples_dataset(*self.train_period)
-        return DataLoader(dataset, shuffle=True, **self._common_dataloader_kwargs)
-    def val_dataloader(self) -> DataLoader:
-        """Construct val dataloader"""
-        if self.sample_dir is not None:
-            dataset = self._get_premade_samples_dataset("val")
-        else:
-            dataset = self._get_streamed_samples_dataset(*self.val_period)
-        return DataLoader(dataset, shuffle=False, **self._common_dataloader_kwargs)

pvnet/data/site_datamodule.py DELETED Viewed

@@ -1,53 +0,0 @@
-""" Data module for pytorch lightning """
-from ocf_data_sampler.torch_datasets.datasets.site import SitesDataset
-from ocf_data_sampler.torch_datasets.sample.site import SiteSample
-from torch.utils.data import Dataset
-from pvnet.data.base_datamodule import BaseDataModule, PremadeSamplesDataset
-class SiteDataModule(BaseDataModule):
-    """Datamodule for training pvnet and using pvnet pipeline in `ocf-data-sampler`."""
-    def __init__(
-        self,
-        configuration: str | None = None,
-        sample_dir: str | None = None,
-        batch_size: int = 16,
-        num_workers: int = 0,
-        prefetch_factor: int | None = None,
-        train_period: list[str | None] = [None, None],
-        val_period: list[str | None] = [None, None],
-    ):
-        """Datamodule for training pvnet architecture.
-        Can also be used with pre-made batches if `sample_dir` is set.
-        Args:
-            configuration: Path to configuration file.
-            sample_dir: Path to the directory of pre-saved samples. Cannot be used together with
-                `configuration` or '[train/val]_period'.
-            batch_size: Batch size.
-            num_workers: Number of workers to use in multiprocess batch loading.
-            prefetch_factor: Number of data will be prefetched at the end of each worker process.
-            train_period: Date range filter for train dataloader.
-            val_period: Date range filter for val dataloader.
-        """
-        super().__init__(
-            configuration=configuration,
-            sample_dir=sample_dir,
-            batch_size=batch_size,
-            num_workers=num_workers,
-            prefetch_factor=prefetch_factor,
-            train_period=train_period,
-            val_period=val_period,
-        )
-    def _get_streamed_samples_dataset(self, start_time, end_time) -> Dataset:
-        return SitesDataset(self.configuration, start_time=start_time, end_time=end_time)
-    def _get_premade_samples_dataset(self, subdir) -> Dataset:
-        split_dir = f"{self.sample_dir}/{subdir}"
-        return PremadeSamplesDataset(split_dir, SiteSample)

pvnet/data/uk_regional_datamodule.py DELETED Viewed

@@ -1,54 +0,0 @@
-""" Data module for pytorch lightning """
-from ocf_data_sampler.torch_datasets.datasets.pvnet_uk import PVNetUKRegionalDataset
-from ocf_data_sampler.torch_datasets.sample.uk_regional import UKRegionalSample
-from torch.utils.data import Dataset
-from pvnet.data.base_datamodule import BaseDataModule, PremadeSamplesDataset
-class DataModule(BaseDataModule):
-    """Datamodule for training pvnet and using pvnet pipeline in `ocf-data-sampler`."""
-    def __init__(
-        self,
-        configuration: str | None = None,
-        sample_dir: str | None = None,
-        batch_size: int = 16,
-        num_workers: int = 0,
-        prefetch_factor: int | None = None,
-        train_period: list[str | None] = [None, None],
-        val_period: list[str | None] = [None, None],
-    ):
-        """Datamodule for training pvnet architecture.
-        Can also be used with pre-made batches if `sample_dir` is set.
-        Args:
-            configuration: Path to configuration file.
-            sample_dir: Path to the directory of pre-saved samples. Cannot be used together with
-                `configuration` or '[train/val]_period'.
-            batch_size: Batch size.
-            num_workers: Number of workers to use in multiprocess batch loading.
-            prefetch_factor: Number of data will be prefetched at the end of each worker process.
-            train_period: Date range filter for train dataloader.
-            val_period: Date range filter for val dataloader.
-        """
-        super().__init__(
-            configuration=configuration,
-            sample_dir=sample_dir,
-            batch_size=batch_size,
-            num_workers=num_workers,
-            prefetch_factor=prefetch_factor,
-            train_period=train_period,
-            val_period=val_period,
-        )
-    def _get_streamed_samples_dataset(self, start_time, end_time) -> Dataset:
-        return PVNetUKRegionalDataset(self.configuration, start_time=start_time, end_time=end_time)
-    def _get_premade_samples_dataset(self, subdir) -> Dataset:
-        split_dir = f"{self.sample_dir}/{subdir}"
-        # Returns a dict of np arrays
-        return PremadeSamplesDataset(split_dir, UKRegionalSample)

pvnet/load_model.py DELETED Viewed

@@ -1,71 +0,0 @@
-""" Load a model from its checkpoint directory """
-import glob
-import os
-import hydra
-import torch
-from pyaml_env import parse_config
-from pvnet.models.ensemble import Ensemble
-from pvnet.models.multimodal.unimodal_teacher import Model as UMTModel
-def get_model_from_checkpoints(
-    checkpoint_dir_paths: list[str],
-    val_best: bool = True,
-):
-    """Load a model from its checkpoint directory"""
-    is_ensemble = len(checkpoint_dir_paths) > 1
-    model_configs = []
-    models = []
-    data_configs = []
-    for path in checkpoint_dir_paths:
-        # Load the model
-        model_config = parse_config(f"{path}/model_config.yaml")
-        model = hydra.utils.instantiate(model_config)
-        if val_best:
-            # Only one epoch (best) saved per model
-            files = glob.glob(f"{path}/epoch*.ckpt")
-            if len(files) != 1:
-                raise ValueError(
-                    f"Found {len(files)} checkpoints @ {path}/epoch*.ckpt. Expected one."
-                )
-            # TODO: Loading with weights_only=False is not recommended
-            checkpoint = torch.load(files[0], map_location="cpu", weights_only=False)
-        else:
-            checkpoint = torch.load(f"{path}/last.ckpt", map_location="cpu", weights_only=False)
-        model.load_state_dict(state_dict=checkpoint["state_dict"])
-        if isinstance(model, UMTModel):
-            model, model_config = model.convert_to_multimodal_model(model_config)
-        # Check for data config
-        data_config = f"{path}/data_config.yaml"
-        if os.path.isfile(data_config):
-            data_configs.append(data_config)
-        else:
-            data_configs.append(None)
-        model_configs.append(model_config)
-        models.append(model)
-    if is_ensemble:
-        model_config = {
-            "_target_": "pvnet.models.ensemble.Ensemble",
-            "model_list": model_configs,
-        }
-        model = Ensemble(model_list=models)
-        data_config = data_configs[0]
-    else:
-        model_config = model_configs[0]
-        model = models[0]
-        data_config = data_configs[0]
-    return model, model_config, data_config

pvnet/models/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Models for PVNet"""

pvnet/models/base_model.py DELETED Viewed

@@ -1,973 +0,0 @@
-"""Base model for all PVNet submodels"""
-import copy
-import logging
-import os
-import tempfile
-import time
-from pathlib import Path
-from typing import Dict, Optional, Union
-import hydra
-import lightning.pytorch as pl
-import matplotlib.pyplot as plt
-import pandas as pd
-import pkg_resources
-import torch
-import torch.nn.functional as F
-import wandb
-import yaml
-from huggingface_hub import ModelCard, ModelCardData, PyTorchModelHubMixin
-from huggingface_hub.constants import PYTORCH_WEIGHTS_NAME
-from huggingface_hub.file_download import hf_hub_download
-from huggingface_hub.hf_api import HfApi
-from ocf_data_sampler.torch_datasets.sample.base import copy_batch_to_device
-from torchvision.transforms.functional import center_crop
-from pvnet.models.utils import (
-    BatchAccumulator,
-    MetricAccumulator,
-    PredAccumulator,
-)
-from pvnet.optimizers import AbstractOptimizer
-from pvnet.utils import plot_batch_forecasts
-DATA_CONFIG_NAME = "data_config.yaml"
-MODEL_CONFIG_NAME = "model_config.yaml"
-logger = logging.getLogger(__name__)
-activities = [torch.profiler.ProfilerActivity.CPU]
-if torch.cuda.is_available():
-    activities.append(torch.profiler.ProfilerActivity.CUDA)
-def make_clean_data_config(input_path, output_path, placeholder="PLACEHOLDER"):
-    """Resave the data config and replace the filepaths with a placeholder.
-    Args:
-        input_path: Path to input configuration file
-        output_path: Location to save the output configuration file
-        placeholder: String placeholder for data sources
-    """
-    with open(input_path) as cfg:
-        config = yaml.load(cfg, Loader=yaml.FullLoader)
-    config["general"]["description"] = "Config for training the saved PVNet model"
-    config["general"]["name"] = "PVNet current"
-    for source in ["gsp", "satellite", "hrvsatellite"]:
-        if source in config["input_data"]:
-            # If not empty - i.e. if used
-            if config["input_data"][source]["zarr_path"] != "":
-                config["input_data"][source]["zarr_path"] = f"{placeholder}.zarr"
-    if "nwp" in config["input_data"]:
-        for source in config["input_data"]["nwp"]:
-            if config["input_data"]["nwp"][source]["zarr_path"] != "":
-                config["input_data"]["nwp"][source]["zarr_path"] = f"{placeholder}.zarr"
-    if "pv" in config["input_data"]:
-        for d in config["input_data"]["pv"]["pv_files_groups"]:
-            d["pv_filename"] = f"{placeholder}.netcdf"
-            d["pv_metadata_filename"] = f"{placeholder}.csv"
-    if "sensor" in config["input_data"]:
-        # If not empty - i.e. if used
-        if config["input_data"][source][f"{source}_filename"] != "":
-            config["input_data"][source][f"{source}_filename"] = f"{placeholder}.nc"
-    with open(output_path, "w") as outfile:
-        yaml.dump(config, outfile, default_flow_style=False)
-def minimize_data_config(input_path, output_path, model):
-    """Strip out parts of the data config which aren't used by the model
-    Args:
-        input_path: Path to input configuration file
-        output_path: Location to save the output configuration file
-        model: The PVNet model object
-    """
-    with open(input_path) as cfg:
-        config = yaml.load(cfg, Loader=yaml.FullLoader)
-    if "nwp" in config["input_data"]:
-        if not model.include_nwp:
-            del config["input_data"]["nwp"]
-        else:
-            for nwp_source in list(config["input_data"]["nwp"].keys()):
-                nwp_config = config["input_data"]["nwp"][nwp_source]
-                if nwp_source not in model.nwp_encoders_dict:
-                    # If not used, delete this source from the config
-                    del config["input_data"]["nwp"][nwp_source]
-                else:
-                    # Replace the image size
-                    nwp_pixel_size = model.nwp_encoders_dict[nwp_source].image_size_pixels
-                    nwp_config["image_size_pixels_height"] = nwp_pixel_size
-                    nwp_config["image_size_pixels_width"] = nwp_pixel_size
-                    # Replace the interval_end_minutes minutes
-                    nwp_config["interval_end_minutes"] = (
-                        nwp_config["interval_start_minutes"] +
-                        (model.nwp_encoders_dict[nwp_source].sequence_length - 1)
-                        * nwp_config["time_resolution_minutes"]
-                    )
-    if "satellite" in config["input_data"]:
-        if not model.include_sat:
-            del config["input_data"]["satellite"]
-        else:
-            sat_config = config["input_data"]["satellite"]
-            # Replace the image size
-            sat_pixel_size = model.sat_encoder.image_size_pixels
-            sat_config["image_size_pixels_height"] = sat_pixel_size
-            sat_config["image_size_pixels_width"] = sat_pixel_size
-            # Replace the interval_end_minutes minutes
-            sat_config["interval_end_minutes"] = (
-                sat_config["interval_start_minutes"] +
-                (model.sat_encoder.sequence_length - 1)
-                * sat_config["time_resolution_minutes"]
-            )
-    if "pv" in config["input_data"]:
-        if not model.include_pv:
-            del config["input_data"]["pv"]
-    if "gsp" in config["input_data"]:
-        gsp_config = config["input_data"]["gsp"]
-        # Replace the forecast minutes
-        gsp_config["interval_end_minutes"] = model.forecast_minutes
-    if "solar_position" in config["input_data"]:
-        solar_config = config["input_data"]["solar_position"]
-        solar_config["interval_end_minutes"] = model.forecast_minutes
-    with open(output_path, "w") as outfile:
-        yaml.dump(config, outfile, default_flow_style=False)
-def download_hf_hub_with_retries(
-    repo_id,
-    filename,
-    revision,
-    cache_dir,
-    force_download,
-    proxies,
-    resume_download,
-    token,
-    local_files_only,
-    max_retries=5,
-    wait_time=10,
-):
-    """
-    Tries to download a file from HuggingFace up to max_retries times.
-    Args:
-        repo_id (str): HuggingFace repo ID
-        filename (str): Name of the file to download
-        revision (str): Specific model revision
-        cache_dir (str): Cache directory
-        force_download (bool): Whether to force a new download
-        proxies (dict): Proxy settings
-        resume_download (bool): Resume interrupted downloads
-        token (str): HuggingFace auth token
-        local_files_only (bool): Use local files only
-        max_retries (int): Maximum number of retry attempts
-        wait_time (int): Wait time (in seconds) before retrying
-    Returns:
-        str: The local file path of the downloaded file
-    """
-    for attempt in range(1, max_retries + 1):
-        try:
-            return hf_hub_download(
-                repo_id=repo_id,
-                filename=filename,
-                revision=revision,
-                cache_dir=cache_dir,
-                force_download=force_download,
-                proxies=proxies,
-                resume_download=resume_download,
-                token=token,
-                local_files_only=local_files_only,
-            )
-        except Exception as e:
-            if attempt == max_retries:
-                raise Exception(
-                    f"Failed to download {filename} from {repo_id} after {max_retries} attempts."
-                ) from e
-            logging.warning(
-                (
-                    f"Attempt {attempt}/{max_retries} failed to download {filename} "
-                    f"from {repo_id}. Retrying in {wait_time} seconds..."
-                )
-            )
-            time.sleep(wait_time)
-class PVNetModelHubMixin(PyTorchModelHubMixin):
-    """
-    Implementation of [`PyTorchModelHubMixin`] to provide model Hub upload/download capabilities.
-    """
-    @classmethod
-    def from_pretrained(
-        cls,
-        *,
-        model_id: str,
-        revision: str,
-        cache_dir: Optional[Union[str, Path]] = None,
-        force_download: bool = False,
-        proxies: Optional[Dict] = None,
-        resume_download: Optional[bool] = None,
-        local_files_only: bool = False,
-        token: Union[str, bool, None] = None,
-        map_location: str = "cpu",
-        strict: bool = False,
-    ):
-        """Load Pytorch pretrained weights and return the loaded model."""
-        if os.path.isdir(model_id):
-            print("Loading weights from local directory")
-            model_file = os.path.join(model_id, PYTORCH_WEIGHTS_NAME)
-            config_file = os.path.join(model_id, MODEL_CONFIG_NAME)
-        else:
-            # load model file
-            model_file = download_hf_hub_with_retries(
-                repo_id=model_id,
-                filename=PYTORCH_WEIGHTS_NAME,
-                revision=revision,
-                cache_dir=cache_dir,
-                force_download=force_download,
-                proxies=proxies,
-                resume_download=resume_download,
-                token=token,
-                local_files_only=local_files_only,
-                max_retries=5,
-                wait_time=10,
-            )
-            # load config file
-            config_file = download_hf_hub_with_retries(
-                repo_id=model_id,
-                filename=MODEL_CONFIG_NAME,
-                revision=revision,
-                cache_dir=cache_dir,
-                force_download=force_download,
-                proxies=proxies,
-                resume_download=resume_download,
-                token=token,
-                local_files_only=local_files_only,
-                max_retries=5,
-                wait_time=10,
-            )
-        with open(config_file, "r") as f:
-            config = yaml.safe_load(f)
-        model = hydra.utils.instantiate(config)
-        state_dict = torch.load(model_file, map_location=torch.device(map_location))
-        model.load_state_dict(state_dict, strict=strict)  # type: ignore
-        model.eval()  # type: ignore
-        return model
-    @classmethod
-    def get_data_config(
-        cls,
-        model_id: str,
-        revision: str,
-        cache_dir: Optional[Union[str, Path]] = None,
-        force_download: bool = False,
-        proxies: Optional[Dict] = None,
-        resume_download: bool = False,
-        local_files_only: bool = False,
-        token: Optional[Union[str, bool]] = None,
-    ):
-        """Load data config file."""
-        if os.path.isdir(model_id):
-            print("Loading data config from local directory")
-            data_config_file = os.path.join(model_id, DATA_CONFIG_NAME)
-        else:
-            data_config_file = download_hf_hub_with_retries(
-                repo_id=model_id,
-                filename=DATA_CONFIG_NAME,
-                revision=revision,
-                cache_dir=cache_dir,
-                force_download=force_download,
-                proxies=proxies,
-                resume_download=resume_download,
-                token=token,
-                local_files_only=local_files_only,
-                max_retries=5,
-                wait_time=10,
-            )
-        return data_config_file
-    def _save_pretrained(self, save_directory: Path) -> None:
-        """Save weights from a Pytorch model to a local directory."""
-        model_to_save = self.module if hasattr(self, "module") else self  # type: ignore
-        torch.save(model_to_save.state_dict(), save_directory / PYTORCH_WEIGHTS_NAME)
-    def save_pretrained(
-        self,
-        save_directory: Union[str, Path],
-        config: dict,
-        data_config: Optional[Union[str, Path]],
-        repo_id: Optional[str] = None,
-        push_to_hub: bool = False,
-        wandb_repo: Optional[str] = None,
-        wandb_ids: Optional[Union[list[str], str]] = None,
-        card_template_path: Optional[Path] = None,
-        **kwargs,
-    ) -> Optional[str]:
-        """
-        Save weights in local directory.
-        Args:
-            save_directory (`str` or `Path`):
-                Path to directory in which the model weights and configuration will be saved.
-            config (`dict`):
-                Model configuration specified as a key/value dictionary.
-            data_config (`str` or `Path`):
-                The path to the data config.
-            repo_id (`str`, *optional*):
-                ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to
-                the folder name if not provided.
-            push_to_hub (`bool`, *optional*, defaults to `False`):
-                Whether or not to push your model to the HuggingFace Hub after saving it.
-            wandb_repo: Identifier of the repo on wandb.
-            wandb_ids: Identifier(s) of the model on wandb.
-            card_template_path: Path to the HuggingFace model card template. Defaults to card in
-                PVNet library if set to None.
-            kwargs:
-                Additional key word arguments passed along to the
-                [`~ModelHubMixin._from_pretrained`] method.
-        """
-        save_directory = Path(save_directory)
-        save_directory.mkdir(parents=True, exist_ok=True)
-        # saving model weights/files
-        self._save_pretrained(save_directory)
-        # saving model and data config
-        if isinstance(config, dict):
-            with open(save_directory / MODEL_CONFIG_NAME, "w") as f:
-                yaml.dump(config, f, sort_keys=False, default_flow_style=False)
-        # Save cleaned configuration file
-        if data_config is not None:
-            new_data_config_path = save_directory / DATA_CONFIG_NAME
-            # Replace the input filenames with place holders
-            make_clean_data_config(data_config, new_data_config_path)
-            # Taylor the data config to the model being saved
-            minimize_data_config(new_data_config_path, new_data_config_path, self)
-        card = self.create_hugging_face_model_card(
-            repo_id, wandb_repo, wandb_ids, card_template_path
-        )
-        (save_directory / "README.md").write_text(str(card))
-        if push_to_hub:
-            api = HfApi()
-            api.upload_folder(
-                repo_id=repo_id,
-                repo_type="model",
-                folder_path=save_directory,
-            )
-            # Print the most recent commit hash
-            c = api.list_repo_commits(repo_id=repo_id, repo_type="model")[0]
-            message = (
-                f"The latest commit is now: \n"
-                f"    date: {c.created_at} \n"
-                f"    commit hash: {c.commit_id}\n"
-                f"    by: {c.authors}\n"
-                f"    title: {c.title}\n"
-            )
-            print(message)
-        return None
-    @staticmethod
-    def create_hugging_face_model_card(
-        repo_id: Optional[str] = None,
-        wandb_repo: Optional[str] = None,
-        wandb_ids: Optional[Union[list[str], str]] = None,
-        card_template_path: Optional[Path] = None,
-    ) -> ModelCard:
-        """
-        Creates Hugging Face model card
-        Args:
-            repo_id (`str`, *optional*):
-                ID of your repository on the Hub. Used only if `push_to_hub=True`. Will default to
-                the folder name if not provided.
-            wandb_repo: Identifier of the repo on wandb.
-            wandb_ids: Identifier(s) of the model on wandb.
-            card_template_path: Path to the HuggingFace model card template. Defaults to card in
-                PVNet library if set to None.
-        Returns:
-            card: ModelCard - Hugging Face model card object
-        """
-        # Get appropriate model card
-        model_name = repo_id.split("/")[1]
-        if model_name == "windnet_india":
-            model_card = "wind_india_model_card_template.md"
-        elif model_name == "pvnet_india":
-            model_card = "pv_india_model_card_template.md"
-        else:
-            model_card = "pv_uk_regional_model_card_template.md"
-        # Creating and saving model card.
-        card_data = ModelCardData(language="en", license="mit", library_name="pytorch")
-        if card_template_path is None:
-            card_template_path = (
-                f"{os.path.dirname(os.path.abspath(__file__))}/model_cards/{model_card}"
-            )
-        if isinstance(wandb_ids, str):
-            wandb_ids = [wandb_ids]
-        wandb_links = ""
-        for wandb_id in wandb_ids:
-            link = f"https://wandb.ai/{wandb_repo}/runs/{wandb_id}"
-            wandb_links += f" - [{link}]({link})\n"
-        # Find package versions for OCF packages
-        packages_to_display = ["pvnet", "ocf-data-sampler"]
-        packages_and_versions = {
-            package_name: pkg_resources.get_distribution(package_name).version
-            for package_name in packages_to_display
-        }
-        package_versions_markdown = ""
-        for package, version in packages_and_versions.items():
-            package_versions_markdown += f" - {package}=={version}\n"
-        return ModelCard.from_template(
-            card_data,
-            template_path=card_template_path,
-            wandb_links=wandb_links,
-            package_versions=package_versions_markdown
-        )
-class BaseModel(pl.LightningModule, PVNetModelHubMixin):
-    """Abstract base class for PVNet submodels"""
-    def __init__(
-        self,
-        history_minutes: int,
-        forecast_minutes: int,
-        optimizer: AbstractOptimizer,
-        output_quantiles: Optional[list[float]] = None,
-        target_key: str = "gsp",
-        interval_minutes: int = 30,
-        timestep_intervals_to_plot: Optional[list[int]] = None,
-        forecast_minutes_ignore: Optional[int] = 0,
-        save_validation_results_csv: Optional[bool] = False,
-    ):
-        """Abtstract base class for PVNet submodels.
-        Args:
-            history_minutes (int): Length of the GSP history period in minutes
-            forecast_minutes (int): Length of the GSP forecast period in minutes
-            optimizer (AbstractOptimizer): Optimizer
-            output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to
-                None the output is a single value.
-            target_key: The key of the target variable in the batch
-            interval_minutes: The interval in minutes between each timestep in the data
-            timestep_intervals_to_plot: Intervals, in timesteps, to plot during training
-            forecast_minutes_ignore: Number of forecast minutes to ignore when calculating losses.
-                For example if set to 60, the model doesnt predict the first 60 minutes
-            save_validation_results_csv: whether to save full csv outputs from validation results.
-        """
-        super().__init__()
-        self._optimizer = optimizer
-        self._target_key = target_key
-        if timestep_intervals_to_plot is not None:
-            for interval in timestep_intervals_to_plot:
-                assert type(interval) in [list, tuple] and len(interval) == 2, ValueError(
-                    f"timestep_intervals_to_plot must be a list of tuples or lists of length 2, "
-                    f"but got {timestep_intervals_to_plot=}"
-                )
-        self.time_step_intervals_to_plot = timestep_intervals_to_plot
-        # Model must have lr to allow tuning
-        # This setting is only used when lr is tuned with callback
-        self.lr = None
-        self.history_minutes = history_minutes
-        self.forecast_minutes = forecast_minutes
-        self.output_quantiles = output_quantiles
-        self.interval_minutes = interval_minutes
-        self.forecast_minutes_ignore = forecast_minutes_ignore
-        # Number of timestemps for 30 minutely data
-        self.history_len = history_minutes // interval_minutes
-        self.forecast_len = (forecast_minutes - forecast_minutes_ignore) // interval_minutes
-        self.forecast_len_ignore = forecast_minutes_ignore // interval_minutes
-        self._accumulated_metrics = MetricAccumulator()
-        self._accumulated_batches = BatchAccumulator(key_to_keep=self._target_key)
-        self._accumulated_y_hat = PredAccumulator()
-        self._horizon_maes = MetricAccumulator()
-        # Store whether the model should use quantile regression or simply predict the mean
-        self.use_quantile_regression = self.output_quantiles is not None
-        # Store the number of ouput features that the model should predict for
-        if self.use_quantile_regression:
-            self.num_output_features = self.forecast_len * len(self.output_quantiles)
-        else:
-            self.num_output_features = self.forecast_len
-        # save all validation results to array, so we can save these to weights n biases
-        self.validation_epoch_results = []
-        self.save_validation_results_csv = save_validation_results_csv
-    def _adapt_batch(self, batch):
-        """Slice batches into appropriate shapes for model.
-        Returns a new batch dictionary with adapted data, leaving the original batch unchanged.
-        We make some specific assumptions about the original batch and the derived sliced batch:
-        - We are only limiting the future projections. I.e. we are never shrinking the batch from
-          the left hand side of the time axis, only slicing it from the right
-        - We are only shrinking the spatial crop of the satellite and NWP data
-        """
-        # Create a copy of the batch to avoid modifying the original
-        new_batch = {key: copy.deepcopy(value) for key, value in batch.items()}
-        if "gsp" in new_batch.keys():
-            # Slice off the end of the GSP data
-            gsp_len = self.forecast_len + self.history_len + 1
-            new_batch["gsp"] = new_batch["gsp"][:, :gsp_len]
-            new_batch["gsp_time_utc"] = new_batch["gsp_time_utc"][:, :gsp_len]
-        if self.include_sat:
-            # Slice off the end of the satellite data and spatially crop
-            # Shape: batch_size, seq_length, channel, height, width
-            new_batch["satellite_actual"] = center_crop(
-                new_batch["satellite_actual"][:, : self.sat_sequence_len],
-                output_size=self.sat_encoder.image_size_pixels,
-            )
-        if self.include_nwp:
-            # Slice off the end of the NWP data and spatially crop
-            for nwp_source in self.nwp_encoders_dict:
-                # shape: batch_size, seq_len, n_chans, height, width
-                new_batch["nwp"][nwp_source]["nwp"] = center_crop(
-                    new_batch["nwp"][nwp_source]["nwp"],
-                    output_size=self.nwp_encoders_dict[nwp_source].image_size_pixels,
-                )[:, : self.nwp_encoders_dict[nwp_source].sequence_length]
-        if self.include_sun:
-            sun_len = self.forecast_len + self.history_len + 1
-            # Slice off end of solar coords
-            for s in ["solar_azimuth", "solar_elevation"]:
-                if s in new_batch.keys():
-                    new_batch[s] = new_batch[s][:, :sun_len]
-        return new_batch
-    def transfer_batch_to_device(self, batch, device, dataloader_idx):
-        """Method to move custom batches to a given device"""
-        return copy_batch_to_device(batch, device)
-    def _quantiles_to_prediction(self, y_quantiles):
-        """
-        Convert network prediction into a point prediction.
-        Note:
-            Implementation copied from:
-                https://pytorch-forecasting.readthedocs.io/en/stable/_modules/pytorch_forecasting
-                /metrics/quantile.html#QuantileLoss.loss
-        Args:
-            y_quantiles: Quantile prediction of network
-        Returns:
-            torch.Tensor: Point prediction
-        """
-        # y_quantiles Shape: batch_size, seq_length, num_quantiles
-        idx = self.output_quantiles.index(0.5)
-        y_median = y_quantiles[..., idx]
-        return y_median
-    def _calculate_quantile_loss(self, y_quantiles, y):
-        """Calculate quantile loss.
-        Note:
-            Implementation copied from:
-                https://pytorch-forecasting.readthedocs.io/en/stable/_modules/pytorch_forecasting
-                /metrics/quantile.html#QuantileLoss.loss
-        Args:
-            y_quantiles: Quantile prediction of network
-            y: Target values
-        Returns:
-            Quantile loss
-        """
-        # calculate quantile loss
-        losses = []
-        for i, q in enumerate(self.output_quantiles):
-            errors = y - y_quantiles[..., i]
-            losses.append(torch.max((q - 1) * errors, q * errors).unsqueeze(-1))
-        losses = 2 * torch.cat(losses, dim=2)
-        return losses.mean()
-    def _calculate_common_losses(self, y, y_hat):
-        """Calculate losses common to train, and val"""
-        losses = {}
-        if self.use_quantile_regression:
-            losses["quantile_loss"] = self._calculate_quantile_loss(y_hat, y)
-            y_hat = self._quantiles_to_prediction(y_hat)
-        # calculate mse, mae
-        mse_loss = F.mse_loss(y_hat, y)
-        mae_loss = F.l1_loss(y_hat, y)
-        # TODO: Compute correlation coef using np.corrcoef(tensor with
-        # shape (2, num_timesteps))[0, 1] on each example, and taking
-        # the mean across the batch?
-        losses.update(
-            {
-                "MSE": mse_loss,
-                "MAE": mae_loss,
-            }
-        )
-        return losses
-    def _step_mae_and_mse(self, y, y_hat, dict_key_root):
-        """Calculate the MSE and MAE at each forecast step"""
-        losses = {}
-        mse_each_step = torch.mean((y_hat - y) ** 2, dim=0)
-        mae_each_step = torch.mean(torch.abs(y_hat - y), dim=0)
-        losses.update({f"MSE_{dict_key_root}/step_{i:03}": m for i, m in enumerate(mse_each_step)})
-        losses.update({f"MAE_{dict_key_root}/step_{i:03}": m for i, m in enumerate(mae_each_step)})
-        return losses
-    def _calculate_val_losses(self, y, y_hat):
-        """Calculate additional validation losses"""
-        losses = {}
-        if self.use_quantile_regression:
-            # Add fraction below each quantile for calibration
-            for i, quantile in enumerate(self.output_quantiles):
-                below_quant = y <= y_hat[..., i]
-                # Mask values small values, which are dominated by night
-                mask = y >= 0.01
-                losses[f"fraction_below_{quantile}_quantile"] = (below_quant[mask]).float().mean()
-            # Take median value for remaining metric calculations
-            y_hat = self._quantiles_to_prediction(y_hat)
-        # Log the loss at each time horizon
-        losses.update(self._step_mae_and_mse(y, y_hat, dict_key_root="horizon"))
-        # Log the persistance losses
-        y_persist = y[:, -1].unsqueeze(1).expand(-1, self.forecast_len)
-        losses["MAE_persistence/val"] = F.l1_loss(y_persist, y)
-        losses["MSE_persistence/val"] = F.mse_loss(y_persist, y)
-        # Log persistance loss at each time horizon
-        losses.update(self._step_mae_and_mse(y, y_persist, dict_key_root="persistence"))
-        return losses
-    def _training_accumulate_log(self, batch, batch_idx, losses, y_hat):
-        """Internal function to accumulate training batches and log results.
-        This is used when accummulating grad batches. Should make the variability in logged training
-        step metrics indpendent on whether we accumulate N batches of size B or just use a larger
-        batch size of N*B with no accumulaion.
-        """
-        losses = {k: v.detach().cpu() for k, v in losses.items()}
-        y_hat = y_hat.detach().cpu()
-        self._accumulated_metrics.append(losses)
-        self._accumulated_batches.append(batch)
-        self._accumulated_y_hat.append(y_hat)
-        if not self.trainer.fit_loop._should_accumulate():
-            losses = self._accumulated_metrics.flush()
-            batch = self._accumulated_batches.flush()
-            y_hat = self._accumulated_y_hat.flush()
-            self.log_dict(
-                losses,
-                on_step=True,
-                on_epoch=True,
-            )
-            # Number of accumulated grad batches
-            grad_batch_num = (batch_idx + 1) / self.trainer.accumulate_grad_batches
-            # We only create the figure every 8 log steps
-            # This was reduced as it was creating figures too often
-            if grad_batch_num % (8 * self.trainer.log_every_n_steps) == 0:
-                fig = plot_batch_forecasts(
-                    batch,
-                    y_hat,
-                    batch_idx,
-                    quantiles=self.output_quantiles,
-                    key_to_plot=self._target_key,
-                )
-                fig.savefig("latest_logged_train_batch.png")
-                plt.close(fig)
-    def training_step(self, batch, batch_idx):
-        """Run training step"""
-        y_hat = self(batch)
-        # Batch is adapted in the model forward method, but needs to be adapted here too
-        batch = self._adapt_batch(batch)
-        y = batch[self._target_key][:, -self.forecast_len :]
-        losses = self._calculate_common_losses(y, y_hat)
-        losses = {f"{k}/train": v for k, v in losses.items()}
-        self._training_accumulate_log(batch, batch_idx, losses, y_hat)
-        if self.use_quantile_regression:
-            opt_target = losses["quantile_loss/train"]
-        else:
-            opt_target = losses["MAE/train"]
-        return opt_target
-    def _log_forecast_plot(self, batch, y_hat, accum_batch_num, timesteps_to_plot, plot_suffix):
-        """Log forecast plot to wandb"""
-        fig = plot_batch_forecasts(
-            batch,
-            y_hat,
-            quantiles=self.output_quantiles,
-            key_to_plot=self._target_key,
-        )
-        plot_name = f"val_forecast_samples/batch_idx_{accum_batch_num}_{plot_suffix}"
-        try:
-            self.logger.experiment.log({plot_name: wandb.Image(fig)})
-        except Exception as e:
-            print(f"Failed to log {plot_name} to wandb")
-            print(e)
-        plt.close(fig)
-    def _log_validation_results(self, batch, y_hat, accum_batch_num):
-        """Append validation results to self.validation_epoch_results"""
-        # get truth values, shape (b, forecast_len)
-        y = batch[self._target_key][:, -self.forecast_len :]
-        y = y.detach().cpu().numpy()
-        batch_size = y.shape[0]
-        # get prediction values, shape (b, forecast_len, quantiles?)
-        y_hat = y_hat.detach().cpu().numpy()
-        # get time_utc, shape (b, forecast_len)
-        time_utc_key = f"{self._target_key}_time_utc"
-        time_utc = batch[time_utc_key][:, -self.forecast_len :].detach().cpu().numpy()
-        # get target id and change from (b,1) to (b,)
-        id_key = f"{self._target_key}_id"
-        target_id = batch[id_key].detach().cpu().numpy()
-        target_id = target_id.squeeze()
-        for i in range(batch_size):
-            y_i = y[i]
-            y_hat_i = y_hat[i]
-            time_utc_i = time_utc[i]
-            target_id_i = target_id[i]
-            results_dict = {
-                "y": y_i,
-                "time_utc": time_utc_i,
-            }
-            if self.use_quantile_regression:
-                results_dict.update(
-                    {f"y_quantile_{q}": y_hat_i[:, i] for i, q in enumerate(self.output_quantiles)}
-                )
-            else:
-                results_dict["y_hat"] = y_hat_i
-            results_df = pd.DataFrame(results_dict)
-            results_df["id"] = target_id_i
-            results_df["batch_idx"] = accum_batch_num
-            results_df["example_idx"] = i
-            self.validation_epoch_results.append(results_df)
-    def validation_step(self, batch: dict, batch_idx):
-        """Run validation step"""
-        accum_batch_num = batch_idx // self.trainer.accumulate_grad_batches
-        y_hat = self(batch)
-        # Batch is adapted in the model forward method, but needs to be adapted here too
-        batch = self._adapt_batch(batch)
-        y = batch[self._target_key][:, -self.forecast_len :]
-        if (batch_idx + 1) % self.trainer.accumulate_grad_batches == 0:
-            self._log_validation_results(batch, y_hat, accum_batch_num)
-        # Expand persistence to be the same shape as y
-        losses = self._calculate_common_losses(y, y_hat)
-        losses.update(self._calculate_val_losses(y, y_hat))
-        # Store these to make horizon accuracy plot
-        self._horizon_maes.append(
-            {i: losses[f"MAE_horizon/step_{i:03}"].cpu().numpy() for i in range(self.forecast_len)}
-        )
-        logged_losses = {f"{k}/val": v for k, v in losses.items()}
-        self.log_dict(
-            logged_losses,
-            on_step=False,
-            on_epoch=True,
-        )
-        # Make plots only if using wandb logger
-        if isinstance(self.logger, pl.loggers.WandbLogger) and accum_batch_num in [0, 1]:
-            # Store these temporarily under self
-            if not hasattr(self, "_val_y_hats"):
-                self._val_y_hats = PredAccumulator()
-                self._val_batches = BatchAccumulator(key_to_keep=self._target_key)
-            self._val_y_hats.append(y_hat)
-            self._val_batches.append(batch)
-            # if batch has accumulated
-            if (batch_idx + 1) % self.trainer.accumulate_grad_batches == 0:
-                y_hat = self._val_y_hats.flush()
-                batch = self._val_batches.flush()
-                self._log_forecast_plot(
-                    batch,
-                    y_hat,
-                    accum_batch_num,
-                    timesteps_to_plot=None,
-                    plot_suffix="all",
-                )
-                if self.time_step_intervals_to_plot is not None:
-                    for interval in self.time_step_intervals_to_plot:
-                        self._log_forecast_plot(
-                            batch,
-                            y_hat,
-                            accum_batch_num,
-                            timesteps_to_plot=interval,
-                            plot_suffix=f"timestep_{interval}",
-                        )
-                del self._val_y_hats
-                del self._val_batches
-        return logged_losses
-    def on_validation_epoch_end(self):
-        """Run on epoch end"""
-        try:
-            # join together validation results, and save to wandb
-            validation_results_df = pd.concat(self.validation_epoch_results)
-            validation_results_df["error"] = (
-                validation_results_df["y"] - validation_results_df["y_quantile_0.5"]
-            )
-            if isinstance(self.logger, pl.loggers.WandbLogger):
-                # log error distribution metrics
-                wandb.log(
-                    {
-                        "2nd_percentile_median_forecast_error": validation_results_df[
-                            "error"
-                        ].quantile(0.02),
-                        "5th_percentile_median_forecast_error": validation_results_df[
-                            "error"
-                        ].quantile(0.05),
-                        "95th_percentile_median_forecast_error": validation_results_df[
-                            "error"
-                        ].quantile(0.95),
-                        "98th_percentile_median_forecast_error": validation_results_df[
-                            "error"
-                        ].quantile(0.98),
-                        "95th_percentile_median_forecast_absolute_error": abs(
-                            validation_results_df["error"]
-                        ).quantile(0.95),
-                        "98th_percentile_median_forecast_absolute_error": abs(
-                            validation_results_df["error"]
-                        ).quantile(0.98),
-                    }
-                )
-            # saving validation result csvs
-            if self.save_validation_results_csv:
-                with tempfile.TemporaryDirectory() as tempdir:
-                    filename = os.path.join(tempdir, f"validation_results_{self.current_epoch}.csv")
-                    validation_results_df.to_csv(filename, index=False)
-                    # make and log wand artifact
-                    validation_artifact = wandb.Artifact(
-                        f"validation_results_epoch_{self.current_epoch}", type="dataset"
-                    )
-                    validation_artifact.add_file(filename)
-                    wandb.log_artifact(validation_artifact)
-        except Exception as e:
-            print("Failed to log validation results to wandb")
-            print(e)
-        self.validation_epoch_results = []
-        horizon_maes_dict = self._horizon_maes.flush()
-        # Create the horizon accuracy curve
-        if isinstance(self.logger, pl.loggers.WandbLogger):
-            per_step_losses = [[i, horizon_maes_dict[i]] for i in range(self.forecast_len)]
-            try:
-                table = wandb.Table(data=per_step_losses, columns=["horizon_step", "MAE"])
-                wandb.log(
-                    {
-                        "horizon_loss_curve": wandb.plot.line(
-                            table, "horizon_step", "MAE", title="Horizon loss curve"
-                        )
-                    },
-                )
-            except Exception as e:
-                print("Failed to log horizon_loss_curve to wandb")
-                print(e)
-    def configure_optimizers(self):
-        """Configure the optimizers using learning rate found with LR finder if used"""
-        if self.lr is not None:
-            # Use learning rate found by learning rate finder callback
-            self._optimizer.lr = self.lr
-        return self._optimizer(self)

pvnet/models/baseline/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Baselines"""

pvnet/models/baseline/last_value.py DELETED Viewed

@@ -1,42 +0,0 @@
-"""Persistence model"""
-import pvnet
-from pvnet.models.base_model import BaseModel
-from pvnet.optimizers import AbstractOptimizer
-class Model(BaseModel):
-    """Simple baseline model that takes the last gsp yield value and copies it forward."""
-    name = "last_value"
-    def __init__(
-        self,
-        forecast_minutes: int = 12,
-        history_minutes: int = 6,
-        optimizer: AbstractOptimizer = pvnet.optimizers.Adam(),
-    ):
-        """Simple baseline model that takes the last gsp yield value and copies it forward.
-        Args:
-            history_minutes (int): Length of the GSP history period in minutes
-            forecast_minutes (int): Length of the GSP forecast period in minutes
-            optimizer (AbstractOptimizer): Optimizer
-        """
-        super().__init__(history_minutes, forecast_minutes, optimizer)
-        self.save_hyperparameters()
-    def forward(self, x: dict):
-        """Run model forward on dict batch of data"""
-        # Shape: batch_size, seq_length, n_sites
-        gsp_yield = x["gsp"]
-        # take the last value non forecaster value and the first in the pv yeild
-        # (this is the pv site we are preditcting for)
-        y_hat = gsp_yield[:, -self.forecast_len - 1]
-        # expand the last valid forward n predict steps
-        out = y_hat.unsqueeze(1).repeat(1, self.forecast_len)
-        return out

pvnet/models/baseline/readme.md DELETED Viewed

@@ -1,5 +0,0 @@
-# Baseline Models
- - `last_value` - Forecast the sample last historical PV yeild for every forecast step
- - `single_value` - Learns a single value estimate and predicts this value for every input and every
-     forecast step.

pvnet/models/baseline/single_value.py DELETED Viewed

@@ -1,36 +0,0 @@
-"""Average value model"""
-import torch
-from torch import nn
-import pvnet
-from pvnet.models.base_model import BaseModel
-from pvnet.optimizers import AbstractOptimizer
-class Model(BaseModel):
-    """Simple baseline model that predicts always the same value."""
-    name = "single_value"
-    def __init__(
-        self,
-        forecast_minutes: int = 120,
-        history_minutes: int = 60,
-        optimizer: AbstractOptimizer = pvnet.optimizers.Adam(),
-    ):
-        """Simple baseline model that predicts always the same value.
-        Args:
-            history_minutes (int): Length of the GSP history period in minutes
-            forecast_minutes (int): Length of the GSP forecast period in minutes
-            optimizer (AbstractOptimizer): Optimizer
-        """
-        super().__init__(history_minutes, forecast_minutes, optimizer)
-        self._value = nn.Parameter(torch.zeros(1), requires_grad=True)
-        self.save_hyperparameters()
-    def forward(self, x: dict):
-        """Run model forward on dict batch of data"""
-        # Returns a single value at all steps
-        y_hat = torch.zeros_like(x["gsp"][:, : self.forecast_len]) + self._value
-        return y_hat

pvnet/models/ensemble.py DELETED Viewed

@@ -1,74 +0,0 @@
-"""Model which uses mutliple prediction heads"""
-from typing import Optional
-import torch
-from torch import nn
-from pvnet.models.base_model import BaseModel
-class Ensemble(BaseModel):
-    """Ensemble of PVNet models"""
-    def __init__(
-        self,
-        model_list: list[BaseModel],
-        weights: Optional[list[float]] = None,
-    ):
-        """Ensemble of PVNet models
-        Args:
-            model_list: A list of PVNet models to ensemble
-            weights: A list of weighting to apply to each model. If None, the models are weighted
-                equally.
-        """
-        # Surface check all the models are compatible
-        output_quantiles = []
-        history_minutes = []
-        forecast_minutes = []
-        target_key = []
-        interval_minutes = []
-        # Get some model properties from each model
-        for model in model_list:
-            output_quantiles.append(model.output_quantiles)
-            history_minutes.append(model.history_minutes)
-            forecast_minutes.append(model.forecast_minutes)
-            target_key.append(model._target_key)
-            interval_minutes.append(model.interval_minutes)
-        # Check these properties are all the same
-        for param_list in [
-            output_quantiles,
-            history_minutes,
-            forecast_minutes,
-            target_key,
-            interval_minutes,
-        ]:
-            assert all([p == param_list[0] for p in param_list]), param_list
-        super().__init__(
-            history_minutes=history_minutes[0],
-            forecast_minutes=forecast_minutes[0],
-            optimizer=None,
-            output_quantiles=output_quantiles[0],
-            target_key=target_key[0],
-            interval_minutes=interval_minutes[0],
-        )
-        self.model_list = nn.ModuleList(model_list)
-        if weights is None:
-            weights = torch.ones(len(model_list)) / len(model_list)
-        else:
-            assert len(weights) == len(model_list)
-            weights = torch.Tensor(weights) / sum(weights)
-        self.weights = nn.Parameter(weights, requires_grad=False)
-    def forward(self, batch):
-        """Run the model forward"""
-        y_hat = 0
-        for weight, model in zip(self.weights, self.model_list):
-            y_hat = model(batch) * weight + y_hat
-        return y_hat

pvnet/models/model_cards/pv_india_model_card_template.md DELETED Viewed

@@ -1,56 +0,0 @@
----
-{{ card_data }}
----
-# PVNet India
-## Model Description
-<!-- Provide a longer summary of what this model is/does. -->
-This model class uses numerical weather predictions from providers such as ECMWF to forecast the PV power in North West India over the next 48 hours. More information can be found in the model repo [1] and experimental notes [here](https://github.com/openclimatefix/PVNet/tree/main/experiments/india).
-- **Developed by:** openclimatefix
-- **Model type:** Fusion model
-- **Language(s) (NLP):** en
-- **License:** mit
-# Training Details
-## Data
-<!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
-The model is trained on data from 2019-2022 and validated on data from 2022-2023. See experimental notes [here](https://github.com/openclimatefix/PVNet/tree/main/experiments/india)
-### Preprocessing
-Data is prepared with the `ocf_data_sampler/torch_datasets/datasets/site` Dataset [2].
-## Results
-The training logs for the current model can be found here:
-{{ wandb_links }}
-### Hardware
-Trained on a single NVIDIA Tesla T4
-### Software
-This model was trained using the following Open Climate Fix packages:
-- [1] https://github.com/openclimatefix/PVNet
-- [2] https://github.com/openclimatefix/ocf-data-sampler
-The versions of these packages can be found below:
-{{ package_versions }}

pvnet/models/model_cards/pv_uk_regional_model_card_template.md DELETED Viewed

@@ -1,59 +0,0 @@
----
-{{ card_data }}
----
-# PVNet2
-## Model Description
-<!-- Provide a longer summary of what this model is/does. -->
-This model class uses satellite data, numerical weather predictions, and recent Grid Service Point( GSP) PV power output to forecast the near-term (~8 hours) PV power output at all GSPs. More information can be found in the model repo [1] and experimental notes in [this google doc](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA/edit?usp=sharing).
-- **Developed by:** openclimatefix
-- **Model type:** Fusion model
-- **Language(s) (NLP):** en
-- **License:** mit
-# Training Details
-## Data
-<!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
-The model is trained on data from 2019-2022 and validated on data from 2022-2023. See experimental notes in the [the google doc](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA/edit?usp=sharing) for more details.
-### Preprocessing
-Data is prepared with the `ocf_data_sampler/torch_datasets/datasets/pvnet_uk` Dataset [2].
-## Results
-The training logs for the current model can be found here:
-{{ wandb_links }}
-The training logs for all model runs of PVNet2 can be found [here](https://wandb.ai/openclimatefix/pvnet2.1).
-Some experimental notes can be found at in [the google doc](https://docs.google.com/document/d/1fbkfkBzp16WbnCg7RDuRDvgzInA6XQu3xh4NCjV-WDA/edit?usp=sharing)
-### Hardware
-Trained on a single NVIDIA Tesla T4
-### Software
-This model was trained using the following Open Climate Fix packages:
-- [1] https://github.com/openclimatefix/PVNet
-- [2] https://github.com/openclimatefix/ocf-data-sampler
-The versions of these packages can be found below:
-{{ package_versions }}

pvnet/models/model_cards/wind_india_model_card_template.md DELETED Viewed

@@ -1,56 +0,0 @@
----
-{{ card_data }}
----
-# WindNet
-## Model Description
-<!-- Provide a longer summary of what this model is/does. -->
-This model class uses numerical weather predictions from providers such as ECMWF to forecast the wind power in North West India over the next 48 hours at 15 minute granularity. More information can be found in the model repo [1] and experimental notes [here](https://github.com/openclimatefix/PVNet/tree/main/experiments/india).
-- **Developed by:** openclimatefix
-- **Model type:** Fusion model
-- **Language(s) (NLP):** en
-- **License:** mit
-# Training Details
-## Data
-<!-- This should link to a Data Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
-The model is trained on data from 2019-2022 and validated on data from 2022-2023. See experimental notes [here](https://github.com/openclimatefix/PVNet/tree/main/experiments/india)
-### Preprocessing
-Data is prepared with the `ocf_data_sampler/torch_datasets/datasets/site` Dataset [2].
-## Results
-The training logs for the current model can be found here:
-{{ wandb_links }}
-### Hardware
-Trained on a single NVIDIA Tesla T4
-### Software
-This model was trained using the following Open Climate Fix packages:
-- [1] https://github.com/openclimatefix/PVNet
-- [2] https://github.com/openclimatefix/ocf-data-sampler
-The versions of these packages can be found below:
-{{ package_versions }}

pvnet/models/multimodal/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Multimodal Models"""

pvnet/models/multimodal/basic_blocks.py DELETED Viewed

@@ -1,104 +0,0 @@
-"""Basic layers for composite models"""
-import warnings
-import torch
-from torch import _VF, nn
-class ImageEmbedding(nn.Module):
-    """A embedding layer which concatenates an ID embedding as a new channel onto 3D inputs."""
-    def __init__(self, num_embeddings, sequence_length, image_size_pixels, **kwargs):
-        """A embedding layer which concatenates an ID embedding as a new channel onto 3D inputs.
-        The embedding is a single 2D image and is appended at each step in the 1st dimension
-        (assumed to be time).
-        Args:
-            num_embeddings: Size of the dictionary of embeddings
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            **kwargs: See `torch.nn.Embedding` for more possible arguments.
-        """
-        super().__init__()
-        self.image_size_pixels = image_size_pixels
-        self.sequence_length = sequence_length
-        self._embed = nn.Embedding(
-            num_embeddings=num_embeddings,
-            embedding_dim=image_size_pixels * image_size_pixels,
-            **kwargs,
-        )
-    def forward(self, x, id):
-        """Append ID embedding to image"""
-        emb = self._embed(id)
-        emb = emb.reshape((-1, 1, 1, self.image_size_pixels, self.image_size_pixels))
-        emb = emb.repeat(1, 1, self.sequence_length, 1, 1)
-        x = torch.cat((x, emb), dim=1)
-        return x
-class CompleteDropoutNd(nn.Module):
-    """A layer used to completely drop out all elements of a N-dimensional sample.
-    Each sample will be zeroed out independently on every forward call with probability `p` using
-    samples from a Bernoulli distribution.
-    """
-    __constants__ = ["p", "inplace", "n_dim"]
-    p: float
-    inplace: bool
-    n_dim: int
-    def __init__(self, n_dim, p=0.5, inplace=False):
-        """A layer used to completely drop out all elements of a N-dimensional sample.
-        Args:
-            n_dim: Number of dimensions of each sample not including channels. E.g. a sample with
-                shape (channel, time, height, width) would use `n_dim=3`.
-            p: probability of a channel to be zeroed. Default: 0.5
-            training: apply dropout if is `True`. Default: `True`
-            inplace: If set to `True`, will do this operation in-place. Default: `False`
-        """
-        super().__init__()
-        if p < 0 or p > 1:
-            raise ValueError(
-                "dropout probability has to be between 0 and 1, " "but got {}".format(p)
-            )
-        self.p = p
-        self.inplace = inplace
-        self.n_dim = n_dim
-    def forward(self, input: torch.Tensor) -> torch.Tensor:
-        """Run dropout"""
-        p = self.p
-        inp_dim = input.dim()
-        if inp_dim not in (self.n_dim + 1, self.n_dim + 2):
-            warn_msg = (
-                f"CompleteDropoutNd: Received a {inp_dim}-D input. Expected either a single sample"
-                f" with {self.n_dim+1} dimensions, or a batch of samples with {self.n_dim+2}"
-                " dimensions."
-            )
-            warnings.warn(warn_msg)
-        is_batched = inp_dim == self.n_dim + 2
-        if not is_batched:
-            input = input.unsqueeze_(0) if self.inplace else input.unsqueeze(0)
-        input = input.unsqueeze_(1) if self.inplace else input.unsqueeze(1)
-        result = (
-            _VF.feature_dropout_(input, p, self.training)
-            if self.inplace
-            else _VF.feature_dropout(input, p, self.training)
-        )
-        result = result.squeeze_(1) if self.inplace else result.squeeze(1)
-        if not is_batched:
-            result = result.squeeze_(0) if self.inplace else result.squeeze(0)
-        return result

pvnet/models/multimodal/encoders/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Submodels to encode satellite and NWP inputs"""

pvnet/models/multimodal/encoders/basic_blocks.py DELETED Viewed

@@ -1,217 +0,0 @@
-"""Basic blocks for image sequence encoders"""
-from abc import ABCMeta, abstractmethod
-import torch
-from torch import nn
-class AbstractNWPSatelliteEncoder(nn.Module, metaclass=ABCMeta):
-    """Abstract class for NWP/satellite encoder.
-    The encoder will take an input of shape (batch_size, sequence_length, channels, height, width)
-    and return an output of shape (batch_size, out_features).
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-    ):
-        """Abstract class for NWP/satellite encoder.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-        """
-        super().__init__()
-        self.out_features = out_features
-        self.image_size_pixels = image_size_pixels
-        self.sequence_length = sequence_length
-    @abstractmethod
-    def forward(self):
-        """Run model forward"""
-        pass
-class ResidualConv3dBlock(nn.Module):
-    """Fully-connected deep network based on ResNet architecture.
-    Internally, this network uses ELU activations throughout the residual blocks.
-    """
-    def __init__(
-        self,
-        in_channels,
-        n_layers: int = 2,
-        dropout_frac: float = 0.0,
-    ):
-        """Fully-connected deep network based on ResNet architecture.
-        Args:
-            in_channels: Number of input channels.
-            n_layers: Number of layers in residual pathway.
-            dropout_frac: Probability of an element to be zeroed.
-        """
-        super().__init__()
-        layers = []
-        for i in range(n_layers):
-            layers += [
-                nn.ELU(),
-                nn.Conv3d(
-                    in_channels=in_channels,
-                    out_channels=in_channels,
-                    kernel_size=(3, 3, 3),
-                    padding=(1, 1, 1),
-                ),
-                nn.Dropout3d(p=dropout_frac),
-            ]
-        self.model = nn.Sequential(*layers)
-    def forward(self, x):
-        """Run residual connection"""
-        return self.model(x) + x
-class ResidualConv3dBlock2(nn.Module):
-    """Residual block of 'full pre-activation' similar to the block in figure 4(e) of [1].
-    This was the best performing residual block tested in the study. This implementation differs
-    from that block just by using LeakyReLU activation to avoid dead neurons, and by including
-    optional dropout in the residual branch. This is also a 3D fully connected layer residual block
-    rather than a 2D convolutional block.
-    Sources:
-        [1] https://arxiv.org/pdf/1603.05027.pdf
-    """
-    def __init__(
-        self,
-        in_channels: int,
-        n_layers: int = 2,
-        dropout_frac: float = 0.0,
-        batch_norm: bool = True,
-    ):
-        """Residual block of 'full pre-activation' similar to the block in figure 4(e) of [1].
-        Sources:
-            [1] https://arxiv.org/pdf/1603.05027.pdf
-        Args:
-            in_channels: Number of input channels.
-            n_layers: Number of layers in residual pathway.
-            dropout_frac: Probability of an element to be zeroed.
-            batch_norm: Whether to use batchnorm
-        """
-        super().__init__()
-        layers = []
-        for i in range(n_layers):
-            if batch_norm:
-                layers.append(nn.BatchNorm3d(in_channels))
-            layers.extend(
-                [
-                    nn.Dropout3d(p=dropout_frac),
-                    nn.LeakyReLU(),
-                    nn.Conv3d(
-                        in_channels=in_channels,
-                        out_channels=in_channels,
-                        kernel_size=(3, 3, 3),
-                        padding=(1, 1, 1),
-                    ),
-                ]
-            )
-        self.model = nn.Sequential(*layers)
-    def forward(self, x):
-        """Run model forward"""
-        return self.model(x) + x
-class ImageSequenceEncoder(nn.Module):
-    """Simple network which independently encodes each image in a sequence into 1D features"""
-    def __init__(
-        self,
-        image_size_pixels: int,
-        in_channels: int,
-        number_of_conv2d_layers: int = 4,
-        conv2d_channels: int = 32,
-        fc_features: int = 128,
-    ):
-        """Simple network which independently encodes each image in a sequence into 1D features.
-        For input image with shape [N, C, L, H, W] the output is of shape [N, L, fc_features] where
-        N is number of samples in batch, C is the number of input channels, L is the length of the
-        sequence, and H and W are the height and width.
-        Args:
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            number_of_conv2d_layers: Number of convolution 2D layers that are used.
-            conv2d_channels: Number of channels used in each conv2d layer.
-            fc_features: Number of output nodes for each image in each sequence.
-        """
-        super().__init__()
-        # Check that the output shape of the convolutional layers will be at least 1x1
-        cnn_spatial_output_size = image_size_pixels - 2 * number_of_conv2d_layers
-        if not (cnn_spatial_output_size >= 1):
-            raise ValueError(
-                f"cannot use this many conv2d layers ({number_of_conv2d_layers}) with this input "
-                f"spatial size ({image_size_pixels})"
-            )
-        conv_layers = []
-        conv_layers += [
-            nn.Conv2d(
-                in_channels=in_channels,
-                out_channels=conv2d_channels,
-                kernel_size=3,
-                padding=0,
-            ),
-            nn.ELU(),
-        ]
-        for i in range(0, number_of_conv2d_layers - 1):
-            conv_layers += [
-                nn.Conv2d(
-                    in_channels=conv2d_channels,
-                    out_channels=conv2d_channels,
-                    kernel_size=3,
-                    padding=0,
-                ),
-                nn.ELU(),
-            ]
-        self.conv_layers = nn.Sequential(*conv_layers)
-        self.final_block = nn.Sequential(
-            nn.Linear(
-                in_features=(cnn_spatial_output_size**2) * conv2d_channels,
-                out_features=fc_features,
-            ),
-            nn.ELU(),
-        )
-    def forward(self, x):
-        """Run model forward"""
-        batch_size, channel, seq_len, height, width = x.shape
-        x = torch.swapaxes(x, 1, 2)
-        x = x.reshape(batch_size * seq_len, channel, height, width)
-        out = self.conv_layers(x)
-        out = out.reshape(batch_size * seq_len, -1)
-        out = self.final_block(out)
-        out = out.reshape(batch_size, seq_len, -1)
-        return out

pvnet/models/multimodal/encoders/encoders2d.py DELETED Viewed

@@ -1,413 +0,0 @@
-"""Encoder modules for the satellite/NWP data.
-These networks naively stack the sequences into extra channels before putting through their
-architectures.
-"""
-from functools import partial
-from typing import Any, Callable, List, Optional, Sequence, Type, Union
-import torch
-from torch import Tensor, nn
-from torchvision.models.convnext import CNBlock, CNBlockConfig, LayerNorm2d
-from torchvision.models.resnet import BasicBlock, Bottleneck, conv1x1
-from torchvision.ops.misc import Conv2dNormActivation
-from torchvision.utils import _log_api_usage_once
-from pvnet.models.multimodal.encoders.basic_blocks import AbstractNWPSatelliteEncoder
-class NaiveEfficientNet(AbstractNWPSatelliteEncoder):
-    """An implementation of EfficientNet from `efficientnet_pytorch`.
-    This model is quite naive, and just stacks the sequence into channels.
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        model_name: str = "efficientnet-b0",
-    ):
-        """An implementation of EfficientNet from `efficientnet_pytorch`.
-        This model is quite naive, and just stacks the sequence into channels.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            model_name: Name of EfficientNet model to construct.
-        Notes:
-            The `efficientnet_pytorch` package must be installed to use `EncoderNaiveEfficientNet`.
-            See https://github.com/lukemelas/EfficientNet-PyTorch for install instructions.
-        """
-        from efficientnet_pytorch import EfficientNet
-        super().__init__(sequence_length, image_size_pixels, in_channels, out_features)
-        self.model = EfficientNet.from_name(
-            model_name,
-            in_channels=in_channels * sequence_length,
-            image_size=image_size_pixels,
-            num_classes=out_features,
-        )
-    def forward(self, x):
-        """Run model forward"""
-        bs, s, c, h, w = x.shape
-        x = x.reshape((bs, s * c, h, w))
-        return self.model(x)
-class NaiveResNet(nn.Module):
-    """A ResNet model modified from one in torchvision [1].
-    Modified allow different number of input channels. This model is quite naive, and just stacks
-    the sequence into channels.
-    Example use:
-        ```
-        resnet18 = ResNet(BasicBlock, [2, 2, 2, 2])
-        resnet50 = ResNet(Bottleneck, [3, 4, 6, 3])
-        ```
-    Sources:
-         [1] https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
-         [2] https://pytorch.org/hub/pytorch_vision_resnet
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        layers: List[int] = [2, 2, 2, 2],
-        block: str = "bottleneck",
-        zero_init_residual: bool = False,
-        groups: int = 1,
-        width_per_group: int = 64,
-        replace_stride_with_dilation: Optional[List[bool]] = None,
-        norm_layer: Optional[Callable[..., nn.Module]] = None,
-    ):
-        """A ResNet model modified from one in torchvision [1].
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            layers: See [1] and [2].
-            block: See [1] and [2].
-            zero_init_residual: See [1] and [2].
-            groups: See [1] and [2].
-            width_per_group: See [1] and [2].
-            replace_stride_with_dilation: See [1] and [2].
-            norm_layer: See [1] and [2].
-        Sources:
-             [1] https://github.com/pytorch/vision/blob/main/torchvision/models/resnet.py
-             [2] https://pytorch.org/hub/pytorch_vision_resnet
-        """
-        super().__init__()
-        _log_api_usage_once(self)
-        if norm_layer is None:
-            norm_layer = nn.BatchNorm2d
-        self._norm_layer = norm_layer
-        # Account for stacking sequences into more channels
-        in_channels = in_channels * sequence_length
-        block = {
-            "basic": BasicBlock,
-            "bottleneck": Bottleneck,
-        }[block]
-        self.inplanes = 64
-        self.dilation = 1
-        if replace_stride_with_dilation is None:
-            # each element in the tuple indicates if we should replace
-            # the 2x2 stride with a dilated convolution instead
-            replace_stride_with_dilation = [False, False, False]
-        if len(replace_stride_with_dilation) != 3:
-            raise ValueError(
-                "replace_stride_with_dilation should be None "
-                f"or a 3-element tuple, got {replace_stride_with_dilation}"
-            )
-        self.groups = groups
-        self.base_width = width_per_group
-        self.conv1 = nn.Conv2d(
-            in_channels, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False
-        )
-        self.bn1 = norm_layer(self.inplanes)
-        self.relu = nn.ReLU(inplace=True)
-        # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        self.layer1 = self._make_layer(block, 64, layers[0])
-        self.layer2 = self._make_layer(
-            block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]
-        )
-        self.layer3 = self._make_layer(
-            block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]
-        )
-        self.layer4 = self._make_layer(
-            block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]
-        )
-        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
-        self.fc = nn.Linear(512 * block.expansion, out_features)
-        self.final_act = nn.LeakyReLU()
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-        # Zero-initialize the last BN in each residual branch,
-        # so that the residual branch starts with zeros, and each residual block behaves like an
-        # identity. This improves the model by 0.2~0.3% according to
-        # https://arxiv.org/abs/1706.02677
-        if zero_init_residual:
-            for m in self.modules():
-                if isinstance(m, Bottleneck) and m.bn3.weight is not None:
-                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
-                elif isinstance(m, BasicBlock) and m.bn2.weight is not None:
-                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]
-    def _make_layer(
-        self,
-        block: Type[Union[BasicBlock, Bottleneck]],
-        planes: int,
-        blocks: int,
-        stride: int = 1,
-        dilate: bool = False,
-    ) -> nn.Sequential:
-        norm_layer = self._norm_layer
-        downsample = None
-        previous_dilation = self.dilation
-        if dilate:
-            self.dilation *= stride
-            stride = 1
-        if stride != 1 or self.inplanes != planes * block.expansion:
-            downsample = nn.Sequential(
-                conv1x1(self.inplanes, planes * block.expansion, stride),
-                norm_layer(planes * block.expansion),
-            )
-        layers = []
-        layers.append(
-            block(
-                self.inplanes,
-                planes,
-                stride,
-                downsample,
-                self.groups,
-                self.base_width,
-                previous_dilation,
-                norm_layer,
-            )
-        )
-        self.inplanes = planes * block.expansion
-        for _ in range(1, blocks):
-            layers.append(
-                block(
-                    self.inplanes,
-                    planes,
-                    groups=self.groups,
-                    base_width=self.base_width,
-                    dilation=self.dilation,
-                    norm_layer=norm_layer,
-                )
-            )
-        return nn.Sequential(*layers)
-    def _forward_impl(self, x: Tensor) -> Tensor:
-        # See note [TorchScript super()]
-        x = self.conv1(x)
-        x = self.bn1(x)
-        x = self.relu(x)
-        # x = self.maxpool(x)
-        x = self.layer1(x)
-        x = self.layer2(x)
-        x = self.layer3(x)
-        x = self.layer4(x)
-        x = self.avgpool(x)
-        x = torch.flatten(x, 1)
-        x = self.fc(x)
-        x = self.final_act(x)
-        return x
-    def forward(self, x: Tensor) -> Tensor:
-        """Run model forward"""
-        bs, s, c, h, w = x.shape
-        x = x.reshape((bs, s * c, h, w))
-        return self._forward_impl(x)
-class NaiveConvNeXt(nn.Module):
-    """A NaiveConvNeXt model [1] modified from one in torchvision [2].
-    Mopdified to allow different number of input channels, and smaller spatial inputs. This model is
-    quite naive, and just stacks the sequence into channels.
-    Example usage:
-        ```
-        block_setting = [
-            CNBlockConfig(96, 192, 3),
-            CNBlockConfig(192, 384, 3),
-            CNBlockConfig(384, 768, 9),
-            CNBlockConfig(768, None, 3),
-        ]
-        sequence_len = 12
-        channels = 2
-        pixels=24
-        convnext_tiny = ConvNeXt(
-            sequence_length=12,
-            image_size_pixels=24,
-            in_channels=2,
-            out_features=128,
-            block_setting=block_setting,
-            stochastic_depth_prob=0.1,
-        )
-        ```
-    Sources:
-        [1] https://arxiv.org/abs/2201.03545
-        [2] https://github.com/pytorch/vision/blob/main/torchvision/models/convnext.py
-        [3] https://pytorch.org/vision/main/models/convnext.html
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        block_setting: List[CNBlockConfig],
-        stochastic_depth_prob: float = 0.0,
-        layer_scale: float = 1e-6,
-        block: Optional[Callable[..., nn.Module]] = None,
-        norm_layer: Optional[Callable[..., nn.Module]] = None,
-        **kwargs: Any,
-    ) -> None:
-        """A ConvNeXt model [1] modified from one in torchvision [2].
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            block_setting: See [2] and [3].
-            stochastic_depth_prob: See [2] and [3].
-            layer_scale: See [2] and [3].
-            block: See [2] and [3].
-            norm_layer: See [2] and [3].
-            **kwargs: See [2] and [3].
-        Sources:
-            [1] https://arxiv.org/abs/2201.03545
-            [2] https://github.com/pytorch/vision/blob/main/torchvision/models/convnext.py
-            [3] https://pytorch.org/vision/main/models/convnext.html
-        """
-        super().__init__()
-        _log_api_usage_once(self)
-        if not block_setting:
-            raise ValueError("The block_setting should not be empty")
-        elif not (
-            isinstance(block_setting, Sequence)
-            and all([isinstance(s, CNBlockConfig) for s in block_setting])
-        ):
-            raise TypeError("The block_setting should be List[CNBlockConfig]")
-        if block is None:
-            block = CNBlock
-        if norm_layer is None:
-            norm_layer = partial(LayerNorm2d, eps=1e-6)
-        layers: List[nn.Module] = []
-        # Account for stacking sequences into more channels
-        in_channels = in_channels * sequence_length
-        # Stem
-        firstconv_output_channels = block_setting[0].input_channels
-        layers.append(
-            Conv2dNormActivation(
-                in_channels,
-                firstconv_output_channels,
-                kernel_size=2,
-                stride=2,
-                padding=0,
-                norm_layer=norm_layer,
-                activation_layer=None,
-                bias=True,
-            )
-        )
-        total_stage_blocks = sum(cnf.num_layers for cnf in block_setting)
-        stage_block_id = 0
-        for cnf in block_setting:
-            # Bottlenecks
-            stage: List[nn.Module] = []
-            for _ in range(cnf.num_layers):
-                # adjust stochastic depth probability based on the depth of the stage block
-                sd_prob = stochastic_depth_prob * stage_block_id / (total_stage_blocks - 1.0)
-                stage.append(block(cnf.input_channels, layer_scale, sd_prob))
-                stage_block_id += 1
-            layers.append(nn.Sequential(*stage))
-            if cnf.out_channels is not None:
-                # Downsampling
-                layers.append(
-                    nn.Sequential(
-                        norm_layer(cnf.input_channels),
-                        nn.Conv2d(cnf.input_channels, cnf.out_channels, kernel_size=2, stride=2),
-                    )
-                )
-        self.features = nn.Sequential(*layers)
-        self.avgpool = nn.AdaptiveAvgPool2d(1)
-        lastblock = block_setting[-1]
-        lastconv_output_channels = (
-            lastblock.out_channels
-            if lastblock.out_channels is not None
-            else lastblock.input_channels
-        )
-        self.classifier = nn.Sequential(
-            norm_layer(lastconv_output_channels),
-            nn.Flatten(1),
-            nn.Linear(lastconv_output_channels, out_features),
-        )
-        for m in self.modules():
-            if isinstance(m, (nn.Conv2d, nn.Linear)):
-                nn.init.trunc_normal_(m.weight, std=0.02)
-                if m.bias is not None:
-                    nn.init.zeros_(m.bias)
-    def _forward_impl(self, x: Tensor) -> Tensor:
-        x = self.features(x)
-        x = self.avgpool(x)
-        x = self.classifier(x)
-        return x
-    def forward(self, x: Tensor) -> Tensor:
-        """Run model forward"""
-        bs, s, c, h, w = x.shape
-        x = x.reshape((bs, s * c, h, w))
-        return self._forward_impl(x)

pvnet/models/multimodal/encoders/encoders3d.py DELETED Viewed

@@ -1,402 +0,0 @@
-"""Encoder modules for the satellite/NWP data based on 3D concolutions.
-"""
-from typing import List, Union
-import torch
-from torch import nn
-from torchvision.transforms import CenterCrop
-from pvnet.models.multimodal.encoders.basic_blocks import (
-    AbstractNWPSatelliteEncoder,
-    ResidualConv3dBlock,
-    ResidualConv3dBlock2,
-)
-class DefaultPVNet(AbstractNWPSatelliteEncoder):
-    """This is the original encoding module used in PVNet, with a few minor tweaks."""
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        number_of_conv3d_layers: int = 4,
-        conv3d_channels: int = 32,
-        fc_features: int = 128,
-        spatial_kernel_size: int = 3,
-        temporal_kernel_size: int = 3,
-        padding: Union[int, List[int]] = (1, 0, 0),
-    ):
-        """This is the original encoding module used in PVNet, with a few minor tweaks.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            number_of_conv3d_layers: Number of convolution 3d layers that are used.
-            conv3d_channels: Number of channels used in each conv3d layer.
-            fc_features: number of output nodes out of the hidden fully connected layer.
-            spatial_kernel_size: The spatial size of the kernel used in the conv3d layers.
-            temporal_kernel_size: The temporal size of the kernel used in the conv3d layers.
-            padding: The padding used in the conv3d layers. If an int, the same padding
-                is used in all dimensions
-        """
-        super().__init__(sequence_length, image_size_pixels, in_channels, out_features)
-        if isinstance(padding, int):
-            padding = (padding, padding, padding)
-        # Check that the output shape of the convolutional layers will be at least 1x1
-        cnn_spatial_output_size = (
-            image_size_pixels
-            - ((spatial_kernel_size - 2 * padding[1]) - 1) * number_of_conv3d_layers
-        )
-        cnn_sequence_length = (
-            sequence_length
-            - ((temporal_kernel_size - 2 * padding[0]) - 1) * number_of_conv3d_layers
-        )
-        if not (cnn_spatial_output_size >= 1):
-            raise ValueError(
-                f"cannot use this many conv3d layers ({number_of_conv3d_layers}) with this input "
-                f"spatial size ({image_size_pixels})"
-            )
-        conv_layers = []
-        conv_layers += [
-            nn.Conv3d(
-                in_channels=in_channels,
-                out_channels=conv3d_channels,
-                kernel_size=(temporal_kernel_size, spatial_kernel_size, spatial_kernel_size),
-                padding=padding,
-            ),
-            nn.ELU(),
-        ]
-        for i in range(0, number_of_conv3d_layers - 1):
-            conv_layers += [
-                nn.Conv3d(
-                    in_channels=conv3d_channels,
-                    out_channels=conv3d_channels,
-                    kernel_size=(temporal_kernel_size, spatial_kernel_size, spatial_kernel_size),
-                    padding=padding,
-                ),
-                nn.ELU(),
-            ]
-        self.conv_layers = nn.Sequential(*conv_layers)
-        # Calculate the size of the output of the 3D convolutional layers
-        cnn_output_size = conv3d_channels * cnn_spatial_output_size**2 * cnn_sequence_length
-        self.final_block = nn.Sequential(
-            nn.Linear(in_features=cnn_output_size, out_features=fc_features),
-            nn.ELU(),
-            nn.Linear(in_features=fc_features, out_features=out_features),
-            nn.ELU(),
-        )
-    def forward(self, x):
-        """Run model forward"""
-        out = self.conv_layers(x)
-        out = out.reshape(x.shape[0], -1)
-        # Fully connected layers
-        out = self.final_block(out)
-        return out
-class DefaultPVNet2(AbstractNWPSatelliteEncoder):
-    """The original encoding module used in PVNet, with a few minor tweaks, and batchnorm."""
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        number_of_conv3d_layers: int = 4,
-        conv3d_channels: int = 32,
-        fc_features: int = 128,
-        batch_norm=True,
-        fc_dropout=0.2,
-    ):
-        """The original encoding module used in PVNet, with a few minor tweaks, and batchnorm.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            number_of_conv3d_layers: Number of convolution 3d layers that are used.
-            conv3d_channels: Number of channels used in each conv3d layer.
-            fc_features: number of output nodes out of the hidden fully connected layer.
-            batch_norm: Whether to include 3D batch normalisation.
-            fc_dropout: Probability of an element to be zeroed before the last two fully connected
-                layers.
-        """
-        super().__init__(sequence_length, image_size_pixels, in_channels, out_features)
-        # Check that the output shape of the convolutional layers will be at least 1x1
-        cnn_spatial_output_size = image_size_pixels - 2 * number_of_conv3d_layers
-        if not (cnn_spatial_output_size > 0):
-            raise ValueError(
-                f"cannot use this many conv3d layers ({number_of_conv3d_layers}) with this input "
-                f"spatial size ({image_size_pixels})"
-            )
-        conv_layers = [
-            nn.Conv3d(
-                in_channels=in_channels,
-                out_channels=conv3d_channels,
-                kernel_size=(3, 3, 3),
-                padding=(1, 0, 0),
-            ),
-            nn.LeakyReLU(),
-        ]
-        if batch_norm:
-            # Inserted before activation using position -1
-            conv_layers.insert(-1, nn.BatchNorm3d(conv3d_channels))
-        for i in range(0, number_of_conv3d_layers - 1):
-            conv_layers += [
-                nn.Conv3d(
-                    in_channels=conv3d_channels,
-                    out_channels=conv3d_channels,
-                    kernel_size=(3, 3, 3),
-                    padding=(1, 0, 0),
-                ),
-                nn.LeakyReLU(),
-            ]
-            if batch_norm:
-                # Inserted before activation using position -1
-                conv_layers.insert(-1, nn.BatchNorm3d(conv3d_channels))
-        self.conv_layers = nn.Sequential(*conv_layers)
-        # Calculate the size of the output of the 3D convolutional layers
-        cnn_output_size = conv3d_channels * cnn_spatial_output_size**2 * sequence_length
-        final_block = [
-            nn.Linear(in_features=cnn_output_size, out_features=fc_features),
-            nn.LeakyReLU(),
-            nn.Linear(in_features=fc_features, out_features=out_features),
-            nn.LeakyReLU(),
-        ]
-        if fc_dropout > 0:
-            # Insert after the linear layers
-            final_block.insert(1, nn.Dropout(fc_dropout))
-            final_block.insert(-1, nn.Dropout(fc_dropout))
-        self.final_block = nn.Sequential(*final_block)
-    def forward(self, x):
-        """Run model forward"""
-        out = self.conv_layers(x)
-        out = out.reshape(x.shape[0], -1)
-        # Fully connected layers
-        out = self.final_block(out)
-        return out
-class ResConv3DNet2(AbstractNWPSatelliteEncoder):
-    """3D convolutional network based on ResNet architecture.
-    The residual blocks are implemented based on the best performing block in [1].
-    Sources:
-        [1] https://arxiv.org/pdf/1603.05027.pdf
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        hidden_channels: int = 32,
-        n_res_blocks: int = 4,
-        res_block_layers: int = 2,
-        batch_norm=True,
-        dropout_frac=0.0,
-    ):
-        """Fully connected deep network based on ResNet architecture.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            hidden_channels: Number of channels in middle hidden layers.
-            n_res_blocks: Number of residual blocks to use.
-            res_block_layers: Number of Conv3D layers used in each residual block.
-            batch_norm: Whether to include batch normalisation.
-            dropout_frac: Probability of an element to be zeroed in the residual pathways.
-        """
-        super().__init__(sequence_length, image_size_pixels, in_channels, out_features)
-        model = [
-            nn.Conv3d(
-                in_channels=in_channels,
-                out_channels=hidden_channels,
-                kernel_size=(3, 3, 3),
-                padding=(1, 1, 1),
-            ),
-        ]
-        for i in range(n_res_blocks):
-            model.extend(
-                [
-                    ResidualConv3dBlock2(
-                        in_channels=hidden_channels,
-                        n_layers=res_block_layers,
-                        dropout_frac=dropout_frac,
-                        batch_norm=batch_norm,
-                    ),
-                    nn.AvgPool3d((1, 2, 2), stride=(1, 2, 2)),
-                ]
-            )
-        # Calculate the size of the output of the 3D convolutional layers
-        final_im_size = image_size_pixels // (2**n_res_blocks)
-        cnn_output_size = hidden_channels * sequence_length * final_im_size * final_im_size
-        model.extend(
-            [
-                nn.ELU(),
-                nn.Flatten(start_dim=1, end_dim=-1),
-                nn.Linear(in_features=cnn_output_size, out_features=out_features),
-                nn.ELU(),
-            ]
-        )
-        self.model = nn.Sequential(*model)
-    def forward(self, x):
-        """Run model forward"""
-        return self.model(x)
-class EncoderUNET(AbstractNWPSatelliteEncoder):
-    """An encoder based on emodifed UNet architecture.
-    An encoder for satellite and/or NWP data taking inspiration from the kinds of skip
-    connections in UNet. This differs from an actual UNet in that it does not have upsampling
-    layers, instead it concats features from different spatial scales, and applies a few extra
-    conv3d layers.
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        n_downscale: int = 3,
-        res_block_layers: int = 2,
-        conv3d_channels: int = 32,
-        dropout_frac: float = 0.1,
-    ):
-        """An encoder based on emodifed UNet architecture.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            n_downscale: Number of conv3d and spatially downscaling layers that are used.
-            res_block_layers: Number of residual blocks used after each downscale layer.
-            conv3d_channels: Number of channels used in each conv3d layer.
-            dropout_frac: Probability of an element to be zeroed in the residual pathways.
-        """
-        cnn_spatial_output = image_size_pixels // (2**n_downscale)
-        if not (cnn_spatial_output > 0):
-            raise ValueError(
-                f"cannot use this many downscaling layers ({n_downscale}) with this input "
-                f"spatial size ({image_size_pixels})"
-            )
-        super().__init__(sequence_length, image_size_pixels, in_channels, out_features)
-        self.first_layer = nn.Sequential(
-            nn.Conv3d(
-                in_channels=in_channels,
-                out_channels=conv3d_channels,
-                kernel_size=(1, 1, 1),
-                padding=(0, 0, 0),
-            ),
-            ResidualConv3dBlock(
-                in_channels=conv3d_channels,
-                n_layers=res_block_layers,
-                dropout_frac=dropout_frac,
-            ),
-        )
-        downscale_layers = []
-        for _ in range(n_downscale):
-            downscale_layers += [
-                nn.Sequential(
-                    ResidualConv3dBlock(
-                        in_channels=conv3d_channels,
-                        n_layers=res_block_layers,
-                        dropout_frac=dropout_frac,
-                    ),
-                    nn.ELU(),
-                    nn.Conv3d(
-                        in_channels=conv3d_channels,
-                        out_channels=conv3d_channels,
-                        kernel_size=(1, 2, 2),
-                        padding=(0, 0, 0),
-                        stride=(1, 2, 2),
-                    ),
-                )
-            ]
-        self.downscale_layers = nn.ModuleList(downscale_layers)
-        self.crop_fn = CenterCrop(cnn_spatial_output)
-        cat_channels = conv3d_channels * (1 + n_downscale)
-        self.post_cat_conv = nn.Sequential(
-            ResidualConv3dBlock(
-                in_channels=cat_channels,
-                n_layers=res_block_layers,
-            ),
-            nn.ELU(),
-            nn.Conv3d(
-                in_channels=cat_channels,
-                out_channels=conv3d_channels,
-                kernel_size=(1, 1, 1),
-            ),
-        )
-        final_channels = (
-            (image_size_pixels // (2**n_downscale)) ** 2 * conv3d_channels * sequence_length
-        )
-        self.final_layer = nn.Sequential(
-            nn.ELU(),
-            nn.Linear(
-                in_features=final_channels,
-                out_features=out_features,
-            ),
-            nn.ELU(),
-        )
-    def forward(self, x):
-        """Run model forward"""
-        out = self.first_layer(x)
-        outputs = [self.crop_fn(out)]
-        for layer in self.downscale_layers:
-            out = layer(out)
-            outputs += [self.crop_fn(out)]
-        out = torch.cat(outputs, dim=1)
-        out = self.post_cat_conv(out)
-        out = torch.flatten(out, start_dim=1)
-        out = self.final_layer(out)
-        return out

pvnet/models/multimodal/encoders/encodersRNN.py DELETED Viewed

@@ -1,141 +0,0 @@
-"""Encoder modules for the satellite/NWP data based on recursive and 2D convolutional layers.
-"""
-import torch
-from torch import nn
-from pvnet.models.multimodal.encoders.basic_blocks import (
-    AbstractNWPSatelliteEncoder,
-    ImageSequenceEncoder,
-)
-class ConvLSTM(AbstractNWPSatelliteEncoder):
-    """Convolutional LSTM block from MetNet."""
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        hidden_channels: int = 32,
-        num_layers: int = 2,
-        kernel_size: int = 3,
-        bias: bool = True,
-        activation=torch.tanh,
-        batchnorm=False,
-    ):
-        """Convolutional LSTM block from MetNet.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features.
-            hidden_channels: Hidden dimension size.
-            num_layers: Depth of ConvLSTM cells.
-            kernel_size: Kernel size.
-            bias: Whether to add bias.
-            activation: Activation function for ConvLSTM cells.
-            batchnorm: Whether to use batch norm.
-        """
-        from metnet.layers.ConvLSTM import ConvLSTM as _ConvLSTM
-        super().__init__(sequence_length, image_size_pixels, in_channels, out_features)
-        self.conv_lstm = _ConvLSTM(
-            input_dim=in_channels,
-            hidden_dim=hidden_channels,
-            kernel_size=kernel_size,
-            num_layers=num_layers,
-            bias=bias,
-            activation=activation,
-            batchnorm=batchnorm,
-        )
-        # Calculate the size of the output of the ConvLSTM network
-        convlstm_output_size = hidden_channels * image_size_pixels**2
-        self.final_block = nn.Sequential(
-            nn.Linear(in_features=convlstm_output_size, out_features=out_features),
-            nn.ELU(),
-        )
-    def forward(self, x):
-        """Run model forward"""
-        batch_size, channel, seq_len, height, width = x.shape
-        x = torch.swapaxes(x, 1, 2)
-        res, _ = self.conv_lstm(x)
-        # Select last state only
-        out = res[:, -1]
-        # Flatten and fully connected layer
-        out = out.reshape(batch_size, -1)
-        out = self.final_block(out)
-        return out
-class FlattenLSTM(AbstractNWPSatelliteEncoder):
-    """Convolutional blocks followed by LSTM."""
-    def __init__(
-        self,
-        sequence_length: int,
-        image_size_pixels: int,
-        in_channels: int,
-        out_features: int,
-        num_layers: int = 2,
-        number_of_conv2d_layers: int = 4,
-        conv2d_channels: int = 32,
-    ):
-        """Network consisting of 2D spatial convolutional and LSTM sequence encoder.
-        Args:
-            sequence_length: The time sequence length of the data.
-            image_size_pixels: The spatial size of the image. Assumed square.
-            in_channels: Number of input channels.
-            out_features: Number of output features. Also used for LSTM hidden dimension.
-            num_layers: Number of recurrent layers. E.g., setting num_layers=2 would mean stacking
-                two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of
-                the first LSTM and computing the final results.
-            number_of_conv2d_layers: Number of convolution 2D layers that are used.
-            conv2d_channels: Number of channels used in each conv2d layer.
-        """
-        super().__init__(sequence_length, image_size_pixels, in_channels, out_features)
-        self.lstm = nn.LSTM(
-            input_size=out_features,
-            hidden_size=out_features,
-            num_layers=num_layers,
-            batch_first=True,
-        )
-        self.encode_image_sequence = ImageSequenceEncoder(
-            image_size_pixels=image_size_pixels,
-            in_channels=in_channels,
-            number_of_conv2d_layers=number_of_conv2d_layers,
-            conv2d_channels=conv2d_channels,
-            fc_features=out_features,
-        )
-        self.final_block = nn.Sequential(
-            nn.Linear(in_features=out_features, out_features=out_features),
-            nn.ELU(),
-        )
-    def forward(self, x):
-        """Run model forward"""
-        encoded_images = self.encode_image_sequence(x)
-        _, (_, c_n) = self.lstm(encoded_images)
-        # Take only the deepest level hidden cell state
-        out = self.final_block(c_n[-1])
-        return out

pvnet/models/multimodal/linear_networks/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Submodels to combine 1D feature vectors from different sources and make final predictions"""

pvnet/models/multimodal/linear_networks/basic_blocks.py DELETED Viewed

@@ -1,121 +0,0 @@
-"""Basic blocks for the lienar networks"""
-from abc import ABCMeta, abstractmethod
-from collections import OrderedDict
-import torch
-from torch import nn
-class AbstractLinearNetwork(nn.Module, metaclass=ABCMeta):
-    """Abstract class for a network to combine the features from all the inputs."""
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-    ):
-        """Abstract class for a network to combine the features from all the inputs.
-        Args:
-            in_features: Number of input features.
-            out_features: Number of output features.
-        """
-        super().__init__()
-    def cat_modes(self, x):
-        """Concatenate modes of input data into 1D feature vector"""
-        if isinstance(x, OrderedDict):
-            return torch.cat([value for key, value in x.items()], dim=1)
-        elif isinstance(x, torch.Tensor):
-            return x
-        else:
-            raise ValueError(f"Input of unexpected type {type(x)}")
-    @abstractmethod
-    def forward(self):
-        """Run model forward"""
-        pass
-class ResidualLinearBlock(nn.Module):
-    """A 1D fully-connected residual block using ELU activations and including optional dropout."""
-    def __init__(
-        self,
-        in_features: int,
-        n_layers: int = 2,
-        dropout_frac: float = 0.0,
-    ):
-        """A 1D fully-connected residual block using ELU activations and including optional dropout.
-        Args:
-            in_features: Number of input features.
-            n_layers: Number of layers in residual pathway.
-            dropout_frac: Probability of an element to be zeroed.
-        """
-        super().__init__()
-        layers = []
-        for i in range(n_layers):
-            layers += [
-                nn.ELU(),
-                nn.Linear(
-                    in_features=in_features,
-                    out_features=in_features,
-                ),
-                nn.Dropout(p=dropout_frac),
-            ]
-        self.model = nn.Sequential(*layers)
-    def forward(self, x):
-        """Run model forward"""
-        return self.model(x) + x
-class ResidualLinearBlock2(nn.Module):
-    """Residual block of 'full pre-activation' similar to the block in figure 4(e) of [1].
-    This was the best performing residual block tested in the study. This implementation differs
-    from that block just by using LeakyReLU activation to avoid dead neuron, and by including
-    optional dropout in the residual branch. This is also a 1D fully connected layer residual block
-    rather than a 2D convolutional block.
-    Sources:
-        [1] https://arxiv.org/pdf/1603.05027.pdf
-    """
-    def __init__(
-        self,
-        in_features: int,
-        n_layers: int = 2,
-        dropout_frac: float = 0.0,
-    ):
-        """Residual block of 'full pre-activation' similar to the block in figure 4(e) of [1].
-        Sources:
-            [1] https://arxiv.org/pdf/1603.05027.pdf
-        Args:
-            in_features: Number of input features.
-            n_layers: Number of layers in residual pathway.
-            dropout_frac: Probability of an element to be zeroed.
-        """
-        super().__init__()
-        layers = []
-        for i in range(n_layers):
-            layers += [
-                nn.BatchNorm1d(in_features),
-                nn.Dropout(p=dropout_frac),
-                nn.LeakyReLU(),
-                nn.Linear(
-                    in_features=in_features,
-                    out_features=in_features,
-                ),
-            ]
-        self.model = nn.Sequential(*layers)
-    def forward(self, x):
-        """Run model forward"""
-        return self.model(x) + x

pvnet/models/multimodal/linear_networks/networks.py DELETED Viewed

@@ -1,332 +0,0 @@
-"""Linear networks used for the fusion model"""
-from torch import nn, rand
-from pvnet.models.multimodal.linear_networks.basic_blocks import (
-    AbstractLinearNetwork,
-    ResidualLinearBlock,
-    ResidualLinearBlock2,
-)
-class DefaultFCNet(AbstractLinearNetwork):
-    """Similar to the original FCNet module used in PVNet, with a few minor tweaks.
-    This is a 2-layer fully connected block, with internal ELU activations and output ReLU.
-    """
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-        fc_hidden_features: int = 128,
-    ):
-        """Similar to the original FCNet module used in PVNet, with a few minor tweaks.
-        Args:
-            in_features: Number of input features.
-            out_features: Number of output features.
-            fc_hidden_features: Number of features in middle hidden layer.
-        """
-        super().__init__(in_features, out_features)
-        self.model = nn.Sequential(
-            nn.Linear(in_features=in_features, out_features=fc_hidden_features),
-            nn.ELU(),
-            nn.Linear(in_features=fc_hidden_features, out_features=out_features),
-            nn.ReLU(),
-        )
-    def forward(self, x):
-        """Run model forward"""
-        x = self.cat_modes(x)
-        return self.model(x)
-class ResFCNet(AbstractLinearNetwork):
-    """Fully-connected deep network based on ResNet architecture.
-    Internally, this network uses ELU activations throughout the residual blocks.
-    With n_res_blocks=0 this becomes equivalent to `DefaultFCNet`.
-    """
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-        fc_hidden_features: int = 128,
-        n_res_blocks: int = 4,
-        res_block_layers: int = 2,
-        dropout_frac: float = 0.2,
-    ):
-        """Fully-connected deep network based on ResNet architecture.
-        Args:
-            in_features: Number of input features.
-            out_features: Number of output features.
-            fc_hidden_features: Number of features in middle hidden layers.
-            n_res_blocks: Number of residual blocks to use.
-            res_block_layers: Number of fully-connected layers used in each residual block.
-            dropout_frac: Probability of an element to be zeroed in the residual pathways.
-        """
-        super().__init__(in_features, out_features)
-        model = [
-            nn.Linear(in_features=in_features, out_features=fc_hidden_features),
-        ]
-        for i in range(n_res_blocks):
-            model += [
-                ResidualLinearBlock(
-                    in_features=fc_hidden_features,
-                    n_layers=res_block_layers,
-                    dropout_frac=dropout_frac,
-                )
-            ]
-        model += [
-            nn.ELU(),
-            nn.Linear(in_features=fc_hidden_features, out_features=out_features),
-            nn.LeakyReLU(negative_slope=0.01),
-        ]
-        self.model = nn.Sequential(*model)
-    def forward(self, x):
-        """Run model forward"""
-        x = self.cat_modes(x)
-        return self.model(x)
-class ResFCNet2(AbstractLinearNetwork):
-    """Fully connected deep network based on ResNet architecture.
-    This architecture is similar to
-    `ResFCNet`, except that it uses LeakyReLU activations internally, and batchnorm in the residual
-    branches. The residual blocks are implemented based on the best performing block in [1].
-    Sources:
-        [1] https://arxiv.org/pdf/1603.05027.pdf
-    """
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-        fc_hidden_features: int = 128,
-        n_res_blocks: int = 4,
-        res_block_layers: int = 2,
-        dropout_frac=0.0,
-    ):
-        """Fully connected deep network based on ResNet architecture.
-        Args:
-            in_features: Number of input features.
-            out_features: Number of output features.
-            fc_hidden_features: Number of features in middle hidden layers.
-            n_res_blocks: Number of residual blocks to use.
-            res_block_layers: Number of fully-connected layers used in each residual block.
-            dropout_frac: Probability of an element to be zeroed in the residual pathways.
-        """
-        super().__init__(in_features, out_features)
-        model = [
-            nn.Linear(in_features=in_features, out_features=fc_hidden_features),
-        ]
-        for i in range(n_res_blocks):
-            model += [
-                ResidualLinearBlock2(
-                    in_features=fc_hidden_features,
-                    n_layers=res_block_layers,
-                    dropout_frac=dropout_frac,
-                )
-            ]
-        model += [
-            nn.LeakyReLU(),
-            nn.Linear(in_features=fc_hidden_features, out_features=out_features),
-            nn.LeakyReLU(negative_slope=0.01),
-        ]
-        self.model = nn.Sequential(*model)
-    def forward(self, x):
-        """Run model forward"""
-        x = self.cat_modes(x)
-        return self.model(x)
-class SNN(AbstractLinearNetwork):
-    """Self normalising neural network implementation borrowed from [1] and proposed in [2].
-    Sources:
-        [1] https://github.com/tonyduan/snn/blob/master/snn/models.py
-        [2] https://arxiv.org/pdf/1706.02515v5.pdf
-    Args:
-        in_features: Number of input features.
-        out_features: Number of output features.
-        fc_hidden_features: Number of features in middle hidden layers.
-        n_layers: Number of fully-connected layers used in the network.
-        dropout_frac: Probability of an element to be zeroed.
-    """
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-        fc_hidden_features: int = 128,
-        n_layers: int = 10,
-        dropout_frac: float = 0.0,
-    ):
-        """Self normalising neural network implementation borrowed from [1] and proposed in [2].
-        Sources:
-            [1] https://github.com/tonyduan/snn/blob/master/snn/models.py
-            [2] https://arxiv.org/pdf/1706.02515v5.pdf
-        Args:
-            in_features: Number of input features.
-            out_features: Number of output features.
-            fc_hidden_features: Number of features in middle hidden layers.
-            n_layers: Number of fully-connected layers used in the network.
-            dropout_frac: Probability of an element to be zeroed.
-        """
-        super().__init__(in_features, out_features)
-        layers = [
-            nn.Linear(in_features, fc_hidden_features, bias=False),
-            nn.SELU(),
-            nn.AlphaDropout(p=dropout_frac),
-        ]
-        for i in range(1, n_layers - 1):
-            layers += [
-                nn.Linear(fc_hidden_features, fc_hidden_features, bias=False),
-                nn.SELU(),
-                nn.AlphaDropout(p=dropout_frac),
-            ]
-        layers += [
-            nn.Linear(fc_hidden_features, out_features, bias=True),
-            nn.LeakyReLU(negative_slope=0.01),
-        ]
-        self.network = nn.Sequential(*layers)
-        self._reset_parameters()
-    def forward(self, x):
-        """Run model forward"""
-        x = self.cat_modes(x)
-        return self.network(x)
-    def _reset_parameters(self):
-        for layer in self.network:
-            if isinstance(layer, nn.Linear):
-                nn.init.normal_(layer.weight, std=layer.out_features**-0.5)
-                if layer.bias is not None:
-                    fan_in, _ = nn.init._calculate_fan_in_and_fan_out(layer.weight)
-                    bound = fan_in**-0.5
-                    nn.init.uniform_(layer.bias, -bound, bound)
-class TabNet(AbstractLinearNetwork):
-    """An implmentation of TabNet [1].
-    The implementation comes rom `pytorch_tabnet` and this must be installed for use.
-    Sources:
-        [1] https://arxiv.org/abs/1908.07442
-    """
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-        n_d=8,
-        n_a=8,
-        n_steps=3,
-        gamma=1.3,
-        cat_idxs=[],
-        cat_dims=[],
-        cat_emb_dim=1,
-        n_independent=2,
-        n_shared=2,
-        epsilon=1e-15,
-        virtual_batch_size=128,
-        momentum=0.02,
-        mask_type="sparsemax",
-    ):
-        """An implmentation of TabNet [1].
-        Sources:
-            [1] https://arxiv.org/abs/1908.07442
-        Args:
-            in_features: int
-                Number of input features.
-            out_features: int
-                Number of output features.
-            n_d : int
-                Dimension of the prediction  layer (usually between 4 and 64)
-            n_a : int
-                Dimension of the attention  layer (usually between 4 and 64)
-            n_steps : int
-                Number of successive steps in the network (usually between 3 and 10)
-            gamma : float
-                Float above 1, scaling factor for attention updates (usually between 1.0 to 2.0)
-            cat_idxs : list of int
-                Index of each categorical column in the dataset
-            cat_dims : list of int
-                Number of categories in each categorical column
-            cat_emb_dim : int or list of int
-                Size of the embedding of categorical features
-                if int, all categorical features will have same embedding size
-                if list of int, every corresponding feature will have specific size
-            n_independent : int
-                Number of independent GLU layer in each GLU block (default 2)
-            n_shared : int
-                Number of independent GLU layer in each GLU block (default 2)
-            epsilon : float
-                Avoid log(0), this should be kept very low
-            virtual_batch_size : int
-                Batch size for Ghost Batch Normalization
-            momentum : float
-                Float value between 0 and 1 which will be used for momentum in all batch norm
-            mask_type : str
-                Either "sparsemax" or "entmax" : this is the masking function to use
-        """
-        from pytorch_tabnet.tab_network import TabNet as _TabNetModel
-        super().__init__(in_features, out_features)
-        self._tabnet = _TabNetModel(
-            input_dim=in_features,
-            output_dim=out_features,
-            n_d=n_d,
-            n_a=n_a,
-            n_steps=n_steps,
-            gamma=gamma,
-            cat_idxs=cat_idxs,
-            cat_dims=cat_dims,
-            cat_emb_dim=cat_emb_dim,
-            n_independent=n_independent,
-            n_shared=n_shared,
-            epsilon=epsilon,
-            virtual_batch_size=virtual_batch_size,
-            momentum=momentum,
-            mask_type=mask_type,
-            group_attention_matrix=rand(4, in_features),
-        )
-        self.activation = nn.LeakyReLU(negative_slope=0.01)
-    def forward(self, x):
-        """Run model forward"""
-        # TODO: USE THIS LOSS COMPONENT
-        # loss = self.compute_loss(output, y)
-        # Add the overall sparsity loss
-        # loss = loss - self.lambda_sparse * M_loss
-        x = self.cat_modes(x)
-        out1, M_loss = self._tabnet(x)
-        return self.activation(out1)

pvnet/models/multimodal/multimodal.py DELETED Viewed

@@ -1,417 +0,0 @@
-"""The default composite model architecture for PVNet"""
-import logging
-from collections import OrderedDict
-from typing import Any, Optional
-import torch
-from omegaconf import DictConfig
-from torch import nn
-import pvnet
-from pvnet.models.base_model import BaseModel
-from pvnet.models.multimodal.basic_blocks import ImageEmbedding
-from pvnet.models.multimodal.encoders.basic_blocks import AbstractNWPSatelliteEncoder
-from pvnet.models.multimodal.linear_networks.basic_blocks import AbstractLinearNetwork
-from pvnet.models.multimodal.site_encoders.basic_blocks import AbstractSitesEncoder
-from pvnet.optimizers import AbstractOptimizer
-logger = logging.getLogger(__name__)
-class Model(BaseModel):
-    """Neural network which combines information from different sources
-    Architecture is roughly as follows:
-    - Satellite data, if included, is put through an encoder which transforms it from 4D, with time,
-        channel, height, and width dimensions to become a 1D feature vector.
-    - NWP, if included, is put through a similar encoder.
-    - PV site-level data, if included, is put through an encoder which transforms it from 2D, with
-        time and system-ID dimensions, to become a 1D feature vector.
-    - The satellite features*, NWP features*, PV site-level features*, GSP ID embedding*, and sun
-        paramters* are concatenated into a 1D feature vector and passed through another neural
-        network to combine them and produce a forecast.
-    * if included
-    """
-    name = "conv3d_sat_nwp"
-    def __init__(
-        self,
-        output_network: AbstractLinearNetwork,
-        output_quantiles: Optional[list[float]] = None,
-        nwp_encoders_dict: Optional[dict[AbstractNWPSatelliteEncoder]] = None,
-        sat_encoder: Optional[AbstractNWPSatelliteEncoder] = None,
-        pv_encoder: Optional[AbstractSitesEncoder] = None,
-        sensor_encoder: Optional[AbstractSitesEncoder] = None,
-        add_image_embedding_channel: bool = False,
-        include_gsp_yield_history: bool = True,
-        include_site_yield_history: Optional[bool] = False,
-        include_sun: bool = True,
-        include_time: bool = False,
-        location_id_mapping: Optional[dict[Any, int]] = None,
-        embedding_dim: Optional[int] = 16,
-        forecast_minutes: int = 30,
-        history_minutes: int = 60,
-        sat_history_minutes: Optional[int] = None,
-        min_sat_delay_minutes: Optional[int] = 30,
-        nwp_forecast_minutes: Optional[DictConfig] = None,
-        nwp_history_minutes: Optional[DictConfig] = None,
-        pv_history_minutes: Optional[int] = None,
-        sensor_history_minutes: Optional[int] = None,
-        sensor_forecast_minutes: Optional[int] = None,
-        optimizer: AbstractOptimizer = pvnet.optimizers.Adam(),
-        target_key: str = "gsp",
-        interval_minutes: int = 30,
-        nwp_interval_minutes: Optional[DictConfig] = None,
-        pv_interval_minutes: int = 5,
-        sat_interval_minutes: int = 5,
-        sensor_interval_minutes: int = 30,
-        timestep_intervals_to_plot: Optional[list[int]] = None,
-        adapt_batches: Optional[bool] = False,
-        forecast_minutes_ignore: Optional[int] = 0,
-        save_validation_results_csv: Optional[bool] = False,
-    ):
-        """Neural network which combines information from different sources.
-        Notes:
-            In the args, where it says a module `m` is partially instantiated, it means that a
-            normal pytorch module will be returned by running `mod = m(**kwargs)`. In this library,
-            this partial instantiation is generally achieved using partial instantiation via hydra.
-            However, the arg is still valid as long as `m(**kwargs)` returns a valid pytorch module
-            - for example if `m` is a regular function.
-        Args:
-            output_network: A partially instantiated pytorch Module class used to combine the 1D
-                features to produce the forecast.
-            output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to
-                None the output is a single value.
-            nwp_encoders_dict: A dictionary of partially instantiated pytorch Module class used to
-                encode the NWP data from 4D into a 1D feature vector from different sources.
-            sat_encoder: A partially instantiated pytorch Module class used to encode the satellite
-                data from 4D into a 1D feature vector.
-            pv_encoder: A partially instantiated pytorch Module class used to encode the site-level
-                PV data from 2D into a 1D feature vector.
-            add_image_embedding_channel: Add a channel to the NWP and satellite data with the
-                embedding of the GSP ID.
-            include_gsp_yield_history: Include GSP yield data.
-            include_site_yield_history: Include Site yield data.
-            include_sun: Include sun azimuth and altitude data.
-            include_time: Include sine and cosine of dates and times.
-            location_id_mapping: A dictionary mapping the location ID to an integer. ID embedding is
-                not used if this is not provided.
-            embedding_dim: Number of embedding dimensions to use for GSP ID.
-            forecast_minutes: The amount of minutes that should be forecasted.
-            history_minutes: The default amount of historical minutes that are used.
-            sat_history_minutes: Length of recent observations used for satellite inputs. Defaults
-                to `history_minutes` if not provided.
-            min_sat_delay_minutes: Minimum delay with respect to t0 of the latest available
-                satellite image.
-            nwp_forecast_minutes: Period of future NWP forecast data used as input. Defaults to
-                `forecast_minutes` if not provided.
-            nwp_history_minutes: Period of historical NWP forecast used as input. Defaults to
-                `history_minutes` if not provided.
-            pv_history_minutes: Length of recent site-level PV data used as
-            input. Defaults to `history_minutes` if not provided.
-            optimizer: Optimizer factory function used for network.
-            target_key: The key of the target variable in the batch.
-            interval_minutes: The interval between each sample of the target data
-            nwp_interval_minutes: Dictionary of the intervals between each sample of the NWP
-                data for each source
-            pv_interval_minutes: The interval between each sample of the PV data
-            sat_interval_minutes: The interval between each sample of the satellite data
-            sensor_interval_minutes: The interval between each sample of the sensor data
-            timestep_intervals_to_plot: Intervals, in timesteps, to plot in
-            addition to the full forecast
-            sensor_encoder: Encoder for sensor data
-            sensor_history_minutes: Length of recent sensor data used as input.
-            sensor_forecast_minutes: Length of forecast sensor data used as input.
-            adapt_batches: If set to true, we attempt to slice the batches to the expected shape for
-                the model to use. This allows us to overprepare batches and slice from them for the
-                data we need for a model run.
-            forecast_minutes_ignore: Number of forecast minutes to ignore when calculating losses.
-                For example if set to 60, the model doesnt predict the first 60 minutes
-            save_validation_results_csv: whether to save full csv outputs from validation results.
-        """
-        self.include_gsp_yield_history = include_gsp_yield_history
-        self.include_site_yield_history = include_site_yield_history
-        self.include_sat = sat_encoder is not None
-        self.include_nwp = nwp_encoders_dict is not None and len(nwp_encoders_dict) != 0
-        self.include_pv = pv_encoder is not None
-        self.include_sun = include_sun
-        self.include_time = include_time
-        self.include_sensor = sensor_encoder is not None
-        self.location_id_mapping = location_id_mapping
-        self.embedding_dim = embedding_dim
-        self.add_image_embedding_channel = add_image_embedding_channel
-        self.interval_minutes = interval_minutes
-        self.min_sat_delay_minutes = min_sat_delay_minutes
-        self.adapt_batches = adapt_batches
-        if self.location_id_mapping is None:
-            logger.warning("location_id_mapping` is not provided, "
-                           "defaulting to outdated GSP mapping (0 to 317)")
-            # Note 318 is the 2024 UK GSP count, so this is a temporary fix
-            # for models trained with this default embedding
-            self.location_id_mapping = {i: i for i in range(318)}
-        # in the future location_id_mapping could be None,
-        # and in this case use_id_embedding should be False
-        self.use_id_embedding = self.embedding_dim is not None
-        if self.use_id_embedding:
-            num_embeddings = max(self.location_id_mapping.values()) + 1
-        super().__init__(
-            history_minutes=history_minutes,
-            forecast_minutes=forecast_minutes,
-            optimizer=optimizer,
-            output_quantiles=output_quantiles,
-            target_key=target_key,
-            interval_minutes=interval_minutes,
-            timestep_intervals_to_plot=timestep_intervals_to_plot,
-            forecast_minutes_ignore=forecast_minutes_ignore,
-            save_validation_results_csv=save_validation_results_csv
-        )
-        # Number of features expected by the output_network
-        # Add to this as network pieces are constructed
-        fusion_input_features = 0
-        if self.include_sat:
-            # Param checks
-            assert sat_history_minutes is not None
-            self.sat_sequence_len = (
-                sat_history_minutes - min_sat_delay_minutes
-            ) // sat_interval_minutes + 1
-            self.sat_encoder = sat_encoder(
-                sequence_length=self.sat_sequence_len,
-                in_channels=sat_encoder.keywords["in_channels"] + add_image_embedding_channel,
-            )
-            if add_image_embedding_channel:
-                self.sat_embed = ImageEmbedding(
-                    num_embeddings, self.sat_sequence_len, self.sat_encoder.image_size_pixels
-                )
-            # Update num features
-            fusion_input_features += self.sat_encoder.out_features
-        if self.include_nwp:
-            # Param checks
-            assert nwp_forecast_minutes is not None
-            assert nwp_history_minutes is not None
-            # For each NWP encoder the forecast and history minutes must be set
-            assert set(nwp_encoders_dict.keys()) == set(nwp_forecast_minutes.keys())
-            assert set(nwp_encoders_dict.keys()) == set(nwp_history_minutes.keys())
-            if nwp_interval_minutes is None:
-                nwp_interval_minutes = dict.fromkeys(nwp_encoders_dict.keys(), 60)
-            self.nwp_encoders_dict = torch.nn.ModuleDict()
-            if add_image_embedding_channel:
-                self.nwp_embed_dict = torch.nn.ModuleDict()
-            for nwp_source in nwp_encoders_dict.keys():
-                nwp_sequence_len = (
-                    nwp_history_minutes[nwp_source] // nwp_interval_minutes[nwp_source]
-                    + nwp_forecast_minutes[nwp_source] // nwp_interval_minutes[nwp_source]
-                    + 1
-                )
-                self.nwp_encoders_dict[nwp_source] = nwp_encoders_dict[nwp_source](
-                    sequence_length=nwp_sequence_len,
-                    in_channels=(
-                        nwp_encoders_dict[nwp_source].keywords["in_channels"]
-                        + add_image_embedding_channel
-                    ),
-                )
-                if add_image_embedding_channel:
-                    self.nwp_embed_dict[nwp_source] = ImageEmbedding(
-                        num_embeddings,
-                        nwp_sequence_len,
-                        self.nwp_encoders_dict[nwp_source].image_size_pixels,
-                    )
-                # Update num features
-                fusion_input_features += self.nwp_encoders_dict[nwp_source].out_features
-        if self.include_pv:
-            assert pv_history_minutes is not None
-            self.pv_encoder = pv_encoder(
-                sequence_length=pv_history_minutes // pv_interval_minutes + 1,
-                target_key_to_use=self._target_key,
-                input_key_to_use="site",
-            )
-            # Update num features
-            fusion_input_features += self.pv_encoder.out_features
-        if self.include_sensor:
-            if sensor_history_minutes is None:
-                sensor_history_minutes = history_minutes
-            if sensor_forecast_minutes is None:
-                sensor_forecast_minutes = forecast_minutes
-            self.sensor_encoder = sensor_encoder(
-                sequence_length=sensor_history_minutes // sensor_interval_minutes
-                + sensor_forecast_minutes // sensor_interval_minutes
-                + 1,
-                target_key_to_use=self._target_key,
-                input_key_to_use="sensor",
-            )
-            # Update num features
-            fusion_input_features += self.sensor_encoder.out_features
-        if self.use_id_embedding:
-            self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim)
-            # Update num features
-            fusion_input_features += embedding_dim
-        if self.include_sun:
-            self.sun_fc1 = nn.Linear(
-                in_features=2
-                * (self.forecast_len + self.forecast_len_ignore + self.history_len + 1),
-                out_features=16,
-            )
-            # Update num features
-            fusion_input_features += 16
-        if self.include_time:
-            self.time_fc1 = nn.Linear(
-                in_features=4
-                * (self.forecast_len + self.forecast_len_ignore + self.history_len + 1),
-                out_features=32,
-            )
-            # Update num features
-            fusion_input_features += 32
-        if include_gsp_yield_history:
-            # Update num features
-            fusion_input_features += self.history_len
-        if include_site_yield_history:
-            # Update num features
-            fusion_input_features += self.history_len + 1
-        self.output_network = output_network(
-            in_features=fusion_input_features,
-            out_features=self.num_output_features,
-        )
-        self.save_hyperparameters()
-    def forward(self, x):
-        """Run model forward"""
-        if self.adapt_batches:
-            x = self._adapt_batch(x)
-        if self.use_id_embedding:
-            # eg: x['gsp_id] = [1] with location_id_mapping = {1:0}, would give [0]
-            id = torch.tensor(
-                [self.location_id_mapping[i.item()] for i in x[f"{self._target_key}_id"]],
-                device=self.device,
-                dtype=torch.int64,
-            )
-        modes = OrderedDict()
-        # ******************* Satellite imagery *************************
-        if self.include_sat:
-            # Shape: batch_size, seq_length, channel, height, width
-            sat_data = x["satellite_actual"][:, : self.sat_sequence_len]
-            sat_data = torch.swapaxes(sat_data, 1, 2).float()  # switch time and channels
-            if self.add_image_embedding_channel:
-                sat_data = self.sat_embed(sat_data, id)
-            modes["sat"] = self.sat_encoder(sat_data)
-        # *********************** NWP Data ************************************
-        if self.include_nwp:
-            # Loop through potentially many NMPs
-            for nwp_source in self.nwp_encoders_dict:
-                # shape: batch_size, seq_len, n_chans, height, width
-                nwp_data = x["nwp"][nwp_source]["nwp"].float()
-                nwp_data = torch.swapaxes(nwp_data, 1, 2)  # switch time and channels
-                # Some NWP variables can overflow into NaNs when normalised if they have extreme
-                # tails
-                nwp_data = torch.clip(nwp_data, min=-50, max=50)
-                if self.add_image_embedding_channel:
-                    nwp_data = self.nwp_embed_dict[nwp_source](nwp_data, id)
-                nwp_out = self.nwp_encoders_dict[nwp_source](nwp_data)
-                modes[f"nwp/{nwp_source}"] = nwp_out
-        # *********************** Site Data *************************************
-        # Add site-level yield history
-        if self.include_site_yield_history:
-            site_history = x["site"][:, : self.history_len + 1].float()
-            site_history = site_history.reshape(site_history.shape[0], -1)
-            modes["site"] = site_history
-        # Add site-level yield history through PV encoder
-        if self.include_pv:
-            if self._target_key != "site":
-                modes["site"] = self.pv_encoder(x)
-            else:
-                # Target is PV, so only take the history
-                # Copy batch
-                x_tmp = x.copy()
-                x_tmp["site"] = x_tmp["site"][:, : self.history_len + 1]
-                modes["site"] = self.pv_encoder(x_tmp)
-        # *********************** GSP Data ************************************
-        # add gsp yield history
-        if self.include_gsp_yield_history:
-            gsp_history = x["gsp"][:, : self.history_len].float()
-            gsp_history = gsp_history.reshape(gsp_history.shape[0], -1)
-            modes["gsp"] = gsp_history
-        # ********************** Embedding of GSP/Site ID ********************
-        if self.use_id_embedding:
-            modes["id"] = self.embed(id)
-        if self.include_sun:
-            # Use only new direct keys
-            sun = torch.cat(
-                (
-                    x["solar_azimuth"],
-                    x["solar_elevation"],
-                ),
-                dim=1,
-            ).float()
-            sun = self.sun_fc1(sun)
-            modes["sun"] = sun
-        if self.include_time:
-            time = torch.cat(
-                (
-                    x[f"{self._target_key}_date_sin"],
-                    x[f"{self._target_key}_date_cos"],
-                    x[f"{self._target_key}_time_sin"],
-                    x[f"{self._target_key}_time_cos"],
-                ),
-                dim=1,
-            ).float()
-            time = self.time_fc1(time)
-            modes["time"] = time
-        out = self.output_network(modes)
-        if self.use_quantile_regression:
-            # Shape: batch_size, seq_length * num_quantiles
-            out = out.reshape(out.shape[0], self.forecast_len, len(self.output_quantiles))
-        return out

pvnet/models/multimodal/readme.md DELETED Viewed

@@ -1,11 +0,0 @@
-## Multimodal model architecture
-These models fusion models to predict GSP power output based on NWP, non-HRV satellite, GSP output history, solor coordinates, and GSP ID.
-The core model is `multimodel.Model`, and its architecture is shown in the diagram below.
-![multimodal_model_diagram](https://github.com/openclimatefix/PVNet/assets/41546094/118393fa-52ec-4bfe-a0a3-268c94c25f1e)
-This model uses encoders which take 4D (time, channel, x, y) inputs of NWP and satellite and encode them into 1D feature vectors. Different encoders are contained inside `encoders`.
-Different choices for the fusion model are contained inside `linear_networks`.

pvnet/models/multimodal/site_encoders/__init__.py DELETED Viewed

	@@ -1 +0,0 @@
1	- """Submodels to encode site-level PV data"""

pvnet/models/multimodal/site_encoders/basic_blocks.py DELETED Viewed

@@ -1,35 +0,0 @@
-"""Basic blocks for PV-site encoders"""
-from abc import ABCMeta, abstractmethod
-from torch import nn
-class AbstractSitesEncoder(nn.Module, metaclass=ABCMeta):
-    """Abstract class for encoder for output data from multiple PV sites.
-    The encoder will take an input of shape (batch_size, sequence_length, num_sites)
-    and return an output of shape (batch_size, out_features).
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        num_sites: int,
-        out_features: int,
-    ):
-        """Abstract class for PV site-level encoder.
-        Args:
-            sequence_length: The time sequence length of the data.
-            num_sites: Number of PV sites in the input data.
-            out_features: Number of output features.
-        """
-        super().__init__()
-        self.sequence_length = sequence_length
-        self.num_sites = num_sites
-        self.out_features = out_features
-    @abstractmethod
-    def forward(self):
-        """Run model forward"""
-        pass

pvnet/models/multimodal/site_encoders/encoders.py DELETED Viewed

@@ -1,284 +0,0 @@
-"""Encoder modules for the site-level PV data.
-"""
-import einops
-import torch
-from torch import nn
-from pvnet.models.multimodal.linear_networks.networks import ResFCNet2
-from pvnet.models.multimodal.site_encoders.basic_blocks import AbstractSitesEncoder
-class SimpleLearnedAggregator(AbstractSitesEncoder):
-    """A simple model which learns a different weighted-average across all PV sites for each GSP.
-    Each sequence from each site is independently encodeded through some dense layers wih skip-
-    connections, then the encoded form of each sequence is aggregated through a learned weighted-sum
-    and finally put through more dense layers.
-    This model was written to be a simplified version of a single-headed attention layer.
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        num_sites: int,
-        out_features: int,
-        value_dim: int = 10,
-        value_enc_resblocks: int = 2,
-        final_resblocks: int = 2,
-    ):
-        """A simple sequence encoder and weighted-average model.
-        Args:
-            sequence_length: The time sequence length of the data.
-            num_sites: Number of PV sites in the input data.
-            out_features: Number of output features.
-            value_dim: The number of features in each encoded sequence. Similar to the value
-                dimension in single- or multi-head attention.
-            value_dim: The number of features in each encoded sequence. Similar to the value
-                dimension in single- or multi-head attention.
-            value_enc_resblocks: Number of residual blocks in the value-encoder sub-network.
-            final_resblocks: Number of residual blocks in the final sub-network.
-        """
-        super().__init__(sequence_length, num_sites, out_features)
-        # Network used to encode each PV site sequence
-        self._value_encoder = nn.Sequential(
-            ResFCNet2(
-                in_features=sequence_length,
-                out_features=value_dim,
-                fc_hidden_features=value_dim,
-                n_res_blocks=value_enc_resblocks,
-                res_block_layers=2,
-                dropout_frac=0,
-            ),
-        )
-        # The learned weighted average is stored in an embedding layer for ease of use
-        self._attention_network = nn.Sequential(
-            nn.Embedding(318, num_sites),
-            nn.Softmax(dim=1),
-        )
-        # Network used to process weighted average
-        self.output_network = ResFCNet2(
-            in_features=value_dim,
-            out_features=out_features,
-            fc_hidden_features=value_dim,
-            n_res_blocks=final_resblocks,
-            res_block_layers=2,
-            dropout_frac=0,
-        )
-    def _calculate_attention(self, x):
-        gsp_ids = x["gsp_id"].squeeze().int()
-        attention = self._attention_network(gsp_ids)
-        return attention
-    def _encode_value(self, x):
-        # Shape: [batch size, sequence length, PV site]
-        pv_site_seqs = x["pv"].float()
-        batch_size = pv_site_seqs.shape[0]
-        pv_site_seqs = pv_site_seqs.swapaxes(1, 2).flatten(0, 1)
-        x_seq_enc = self._value_encoder(pv_site_seqs)
-        x_seq_out = x_seq_enc.unflatten(0, (batch_size, self.num_sites))
-        return x_seq_out
-    def forward(self, x):
-        """Run model forward"""
-        # Output has shape: [batch size, num_sites, value_dim]
-        encodeded_seqs = self._encode_value(x)
-        # Calculate learned averaging weights
-        attn_avg_weights = self._calculate_attention(x)
-        # Take weighted average across num_sites
-        value_weighted_avg = (encodeded_seqs * attn_avg_weights.unsqueeze(-1)).sum(dim=1)
-        # Put through final processing layers
-        x_out = self.output_network(value_weighted_avg)
-        return x_out
-class SingleAttentionNetwork(AbstractSitesEncoder):
-    """A simple attention-based model with a single multihead attention layer
-    For the attention layer the query is based on the target alone, the key is based on the
-    input ID and the recent input data, the value is based on the recent input data.
-    """
-    def __init__(
-        self,
-        sequence_length: int,
-        num_sites: int,
-        out_features: int,
-        kdim: int = 10,
-        id_embed_dim: int = 10,
-        num_heads: int = 2,
-        n_kv_res_blocks: int = 2,
-        kv_res_block_layers: int = 2,
-        use_id_in_value: bool = False,
-        target_id_dim: int = 318,
-        target_key_to_use: str = "gsp",
-        input_key_to_use: str = "site",
-        num_channels: int = 1,
-        num_sites_in_inference: int = 1,
-    ):
-        """A simple attention-based model with a single multihead attention layer
-        Args:
-            sequence_length: The time sequence length of the data.
-            num_sites: Number of sites in the input data.
-            out_features: Number of output features. In this network this is also the embed and
-                value dimension in the multi-head attention layer.
-            kdim: The dimensions used the keys.
-            id_embed_dim: Number of dimensiosn used in the site ID embedding layer(s).
-            num_heads: Number of parallel attention heads. Note that `out_features` will be split
-                across `num_heads` so `out_features` must be a multiple of `num_heads`.
-            n_kv_res_blocks: Number of residual blocks to use in the key and value encoders.
-            kv_res_block_layers: Number of fully-connected layers used in each residual block within
-                the key and value encoders.
-            use_id_in_value: Whether to use a site ID embedding in network used to produce the
-                value for the attention layer.
-            target_id_dim: The number of unique IDs.
-            target_key_to_use: The key to use for the target in the attention layer.
-            input_key_to_use: The key to use for the input in the attention layer.
-            num_channels: Number of channels in the input data. For single site generation,
-                this will be 1, as there is not channel dimension, for Sensors,
-                 this will probably be higher than that
-            num_sites_in_inference: Number of sites to use in inference.
-                This is used to determine the number of sites to use in the
-                 attention layer, for a single site, 1 works, while for multiple sites
-                (such as multiple sensors), this would be higher than that
-        """
-        super().__init__(sequence_length, num_sites, out_features)
-        self.sequence_length = sequence_length
-        self.target_id_embedding = nn.Embedding(target_id_dim, out_features)
-        self.site_id_embedding = nn.Embedding(num_sites, id_embed_dim)
-        self._ids = nn.parameter.Parameter(torch.arange(num_sites), requires_grad=False)
-        self.use_id_in_value = use_id_in_value
-        self.target_key_to_use = target_key_to_use
-        self.input_key_to_use = input_key_to_use
-        self.num_channels = num_channels
-        self.num_sites_in_inference = num_sites_in_inference
-        if use_id_in_value:
-            self.value_id_embedding = nn.Embedding(num_sites, id_embed_dim)
-        self._value_encoder = nn.Sequential(
-            ResFCNet2(
-                in_features=sequence_length * self.num_channels
-                + int(use_id_in_value) * id_embed_dim,
-                out_features=out_features,
-                fc_hidden_features=sequence_length * self.num_channels,
-                n_res_blocks=n_kv_res_blocks,
-                res_block_layers=kv_res_block_layers,
-                dropout_frac=0,
-            ),
-        )
-        self._key_encoder = nn.Sequential(
-            ResFCNet2(
-                in_features=id_embed_dim + sequence_length * self.num_channels,
-                out_features=kdim,
-                fc_hidden_features=id_embed_dim + sequence_length * self.num_channels,
-                n_res_blocks=n_kv_res_blocks,
-                res_block_layers=kv_res_block_layers,
-                dropout_frac=0,
-            ),
-        )
-        self.multihead_attn = nn.MultiheadAttention(
-            embed_dim=out_features,
-            kdim=kdim,
-            vdim=out_features,
-            num_heads=num_heads,
-            batch_first=True,
-        )
-    def _encode_inputs(self, x):
-        # Shape: [batch size, sequence length, number of sites]
-        # Shape: [batch size,  station_id, sequence length,  channels]
-        input_data = x[f"{self.input_key_to_use}"]
-        if len(input_data.shape) == 2:  # one site per sample
-            input_data = input_data.unsqueeze(-1)  # add dimension of 1 to end to make 3D
-        if len(input_data.shape) == 4:  # Has multiple channels
-            input_data = input_data[:, :, : self.sequence_length]
-            input_data = einops.rearrange(input_data, "b id s c -> b (s c) id")
-        else:
-            input_data = input_data[:, : self.sequence_length]
-        site_seqs = input_data.float()
-        batch_size = site_seqs.shape[0]
-        site_seqs = site_seqs.swapaxes(1, 2)  # [batch size, Site ID, sequence length]
-        return site_seqs, batch_size
-    def _encode_query(self, x):
-        # Select the first one
-        if self.target_key_to_use == "gsp":
-            # GSP seems to have a different structure
-            ids = x[f"{self.target_key_to_use}_id"]
-        else:
-            ids = x[f"{self.input_key_to_use}_id"]
-        ids = ids.int()
-        query = self.target_id_embedding(ids).unsqueeze(1)
-        return query
-    def _encode_key(self, x):
-        site_seqs, batch_size = self._encode_inputs(x)
-        # site ID embeddings are the same for each sample
-        site_id_embed = torch.tile(self.site_id_embedding(self._ids), (batch_size, 1, 1))
-        # Each concated (site sequence, site ID embedding) is processed with encoder
-        x_seq_in = torch.cat((site_seqs, site_id_embed), dim=2).flatten(0, 1)
-        key = self._key_encoder(x_seq_in)
-        # Reshape to [batch size, site, kdim]
-        key = key.unflatten(0, (batch_size, self.num_sites))
-        return key
-    def _encode_value(self, x):
-        site_seqs, batch_size = self._encode_inputs(x)
-        if self.use_id_in_value:
-            # site ID embeddings are the same for each sample
-            site_id_embed = torch.tile(self.value_id_embedding(self._ids), (batch_size, 1, 1))
-            # Each concated (site sequence, site ID embedding) is processed with encoder
-            x_seq_in = torch.cat((site_seqs, site_id_embed), dim=2).flatten(0, 1)
-        else:
-            # Encode each site sequence independently
-            x_seq_in = site_seqs.flatten(0, 1)
-        value = self._value_encoder(x_seq_in)
-        # Reshape to [batch size, site, vdim]
-        value = value.unflatten(0, (batch_size, self.num_sites))
-        return value
-    def _attention_forward(self, x, average_attn_weights=True):
-        query = self._encode_query(x)
-        key = self._encode_key(x)
-        value = self._encode_value(x)
-        attn_output, attn_weights = self.multihead_attn(
-            query, key, value, average_attn_weights=average_attn_weights
-        )
-        return attn_output, attn_weights
-    def forward(self, x):
-        """Run model forward"""
-        # Do slicing here to only get history
-        attn_output, attn_output_weights = self._attention_forward(x)
-        # Reshape from [batch_size, 1, vdim] to [batch_size, vdim]
-        x_out = attn_output.squeeze()
-        if len(x_out.shape) == 1:
-            x_out = x_out.unsqueeze(0)
-        return x_out

pvnet/models/multimodal/unimodal_teacher.py DELETED Viewed

@@ -1,447 +0,0 @@
-"""The default composite model architecture for PVNet"""
-import glob
-from collections import OrderedDict
-from typing import Any, Optional
-import hydra
-import torch
-import torch.nn.functional as F
-from pyaml_env import parse_config
-from torch import nn
-import pvnet
-from pvnet.models.base_model import BaseModel
-from pvnet.models.multimodal.linear_networks.basic_blocks import AbstractLinearNetwork
-from pvnet.optimizers import AbstractOptimizer
-class Model(BaseModel):
-    """Neural network which combines information from different sources
-    The network is trained via unimodal teachers [1].
-    Architecture is roughly as follows:
-    - Satellite data, if included, is put through an encoder which transforms it from 4D, with time,
-        channel, height, and width dimensions to become a 1D feature vector.
-    - NWP, if included, is put through a similar encoder.
-    - PV site-level data, if included, is put through an encoder which transforms it from 2D, with
-        time and system-ID dimensions, to become a 1D feature vector.
-    - The satellite features*, NWP features*, PV site-level features*, GSP ID embedding*, and sun
-        paramters* are concatenated into a 1D feature vector and passed through another neural
-        network to combine them and produce a forecast.
-    * if included
-    [1] https://arxiv.org/pdf/2305.01233.pdf
-    """
-    name = "unimodal_teacher"
-    def __init__(
-        self,
-        output_network: AbstractLinearNetwork,
-        output_quantiles: Optional[list[float]] = None,
-        include_gsp_yield_history: bool = True,
-        include_sun: bool = True,
-        location_id_mapping: Optional[dict[Any, int]] = None,
-        embedding_dim: Optional[int] = 16,
-        forecast_minutes: int = 30,
-        history_minutes: int = 60,
-        optimizer: AbstractOptimizer = pvnet.optimizers.Adam(),
-        mode_teacher_dict: dict = {},
-        val_best: bool = True,
-        cold_start: bool = True,
-        enc_loss_frac: float = 0.3,
-        adapt_batches: Optional[bool] = False,
-    ):
-        """Neural network which combines information from different sources.
-        The network is trained via unimodal teachers [1].
-        [1] https://arxiv.org/pdf/2305.01233.pdf
-        Notes:
-            In the args, where it says a module `m` is partially instantiated, it means that a
-            normal pytorch module will be returned by running `mod = m(**kwargs)`. In this library,
-            this partial instantiation is generally achieved using partial instantiation via hydra.
-            However, the arg is still valid as long as `m(**kwargs)` returns a valid pytorch module
-            - for example if `m` is a regular function.
-        Args:
-            output_network: A partially instatiated pytorch Module class used to combine the 1D
-                features to produce the forecast.
-            output_quantiles: A list of float (0.0, 1.0) quantiles to predict values for. If set to
-                None the output is a single value.
-            include_gsp_yield_history: Include GSP yield data.
-            include_sun: Include sun azimuth and altitude data.
-            location_id_mapping: A dictionary mapping the location ID to an integer. ID embedding is
-                not used if this is not provided.
-            embedding_dim: Number of embedding dimensions to use for GSP ID
-            forecast_minutes: The amount of minutes that should be forecasted.
-            history_minutes: The default amount of historical minutes that are used.
-            optimizer: Optimizer factory function used for network.
-            mode_teacher_dict: A dictionary of paths to different model checkpoint directories,
-                which will be used as the unimodal teachers.
-            val_best: Whether to load the model which performed best on the validation set. Else the
-                last checkpoint is loaded.
-            cold_start: Whether to train the uni-modal encoders from scratch. Else start them with
-                weights from the uni-modal teachers.
-            enc_loss_frac: Fraction of total loss attributed to the teacher encoders.
-            adapt_batches: If set to true, we attempt to slice the batches to the expected shape for
-                the model to use. This allows us to overprepare batches and slice from them for the
-                data we need for a model run.
-        """
-        self.include_gsp_yield_history = include_gsp_yield_history
-        self.include_sun = include_sun
-        self.location_id_mapping = location_id_mapping
-        self.embedding_dim = embedding_dim
-        self.enc_loss_frac = enc_loss_frac
-        self.include_sat = False
-        self.include_nwp = False
-        self.include_pv = False
-        self.adapt_batches = adapt_batches
-        self.use_id_embedding = location_id_mapping is not None
-        if self.use_id_embedding:
-            num_embeddings = max(location_id_mapping.values()) + 1
-        # This is set but modified later based on the teachers
-        self.add_image_embedding_channel = False
-        super().__init__(
-            history_minutes=history_minutes,
-            forecast_minutes=forecast_minutes,
-            optimizer=optimizer,
-            output_quantiles=output_quantiles,
-            target_key="gsp",
-        )
-        # Number of features expected by the output_network
-        # Add to this as network pices are constructed
-        fusion_input_features = 0
-        self.teacher_models = torch.nn.ModuleDict()
-        self.mode_teacher_dict = mode_teacher_dict
-        for mode, path in mode_teacher_dict.items():
-            # load teacher model and freeze its weights
-            self.teacher_models[mode] = self.get_unimodal_encoder(path, True, val_best=val_best)
-            for param in self.teacher_models[mode].parameters():
-                param.requires_grad = False
-            # Recreate model as student
-            mode_student_model = self.get_unimodal_encoder(
-                path, load_weights=(not cold_start), val_best=val_best
-            )
-            if mode == "sat":
-                self.include_sat = True
-                self.sat_sequence_len = mode_student_model.sat_sequence_len
-                self.sat_encoder = mode_student_model.sat_encoder
-                if mode_student_model.add_image_embedding_channel:
-                    self.sat_embed = mode_student_model.sat_embed
-                    self.add_image_embedding_channel = True
-                fusion_input_features += self.sat_encoder.out_features
-            elif mode == "site":
-                self.include_pv = True
-                self.site_encoder = mode_student_model.site_encoder
-                fusion_input_features += self.site_encoder.out_features
-            elif mode.startswith("nwp"):
-                nwp_source = mode.removeprefix("nwp/")
-                if not self.include_nwp:
-                    self.include_nwp = True
-                    self.nwp_encoders_dict = torch.nn.ModuleDict()
-                    if mode_student_model.add_image_embedding_channel:
-                        self.add_image_embedding_channel = True
-                        self.nwp_embed_dict = torch.nn.ModuleDict()
-                self.nwp_encoders_dict[nwp_source] = mode_student_model.nwp_encoders_dict[
-                    nwp_source
-                ]
-                if self.add_image_embedding_channel:
-                    self.nwp_embed_dict[nwp_source] = mode_student_model.nwp_embed_dict[nwp_source]
-                fusion_input_features += self.nwp_encoders_dict[nwp_source].out_features
-        if self.embedding_dim:
-            self.embed = nn.Embedding(num_embeddings=num_embeddings, embedding_dim=embedding_dim)
-            fusion_input_features += embedding_dim
-        if self.include_sun:
-            self.sun_fc1 = nn.Linear(
-                in_features=2 * (self.forecast_len + self.history_len + 1),
-                out_features=16,
-            )
-            fusion_input_features += 16
-        if include_gsp_yield_history:
-            fusion_input_features += self.history_len
-        self.output_network = output_network(
-            in_features=fusion_input_features,
-            out_features=self.num_output_features,
-        )
-        self.save_hyperparameters()
-    def get_unimodal_encoder(self, path, load_weights, val_best):
-        """Load a model to function as a unimodal teacher"""
-        model_config = parse_config(f"{path}/model_config.yaml")
-        # Load the teacher model
-        encoder = hydra.utils.instantiate(model_config)
-        if load_weights:
-            if val_best:
-                # Only one epoch (best) saved per model
-                files = glob.glob(f"{path}/epoch*.ckpt")
-                assert len(files) == 1
-                checkpoint = torch.load(files[0], map_location="cpu")
-            else:
-                checkpoint = torch.load(f"{path}/last.ckpt", map_location="cpu")
-            encoder.load_state_dict(state_dict=checkpoint["state_dict"])
-        return encoder
-    def teacher_forward(self, x):
-        """Run the teacher models and return their encodings"""
-        if self.use_id_embedding:
-            # eg: x['gsp_id] = [1] with location_id_mapping = {1:0}, would give [0]
-            id = torch.tensor(
-                [self.location_id_mapping[i.item()] for i in x[f"{self._target_key}_id"]],
-                device=self.device,
-                dtype=torch.int64,
-            )
-        modes = OrderedDict()
-        for mode, teacher_model in self.teacher_models.items():
-            # ******************* Satellite imagery *************************
-            if mode == "sat":
-                # Shape: batch_size, seq_length, channel, height, width
-                sat_data = x["satellite_actual"][:, : teacher_model.sat_sequence_len]
-                sat_data = torch.swapaxes(sat_data, 1, 2).float()  # switch time and channels
-                if self.add_image_embedding_channel:
-                    sat_data = teacher_model.sat_embed(sat_data, id)
-                modes[mode] = teacher_model.sat_encoder(sat_data)
-            # *********************** NWP Data ************************************
-            if mode.startswith("nwp"):
-                nwp_source = mode.removeprefix("nwp/")
-                # shape: batch_size, seq_len, n_chans, height, width
-                nwp_data = x["nwp"][nwp_source]["nwp"].float()
-                nwp_data = torch.swapaxes(nwp_data, 1, 2)  # switch time and channels
-                nwp_data = torch.clip(nwp_data, min=-50, max=50)
-                if teacher_model.add_image_embedding_channel:
-                    nwp_data = teacher_model.nwp_embed_dict[nwp_source](nwp_data, id)
-                nwp_out = teacher_model.nwp_encoders_dict[nwp_source](nwp_data)
-                modes[mode] = nwp_out
-            # *********************** PV Data *************************************
-            # Add site-level PV yield
-            if mode == "site":
-                modes[mode] = teacher_model.site_encoder(x)
-        return modes
-    def forward(self, x, return_modes=False):
-        """Run model forward"""
-        if self.adapt_batches:
-            x = self._adapt_batch(x)
-        if self.use_id_embedding:
-            # eg: x['gsp_id] = [1] with location_id_mapping = {1:0}, would give [0]
-            id = torch.tensor(
-                [self.location_id_mapping[i.item()] for i in x[f"{self._target_key}_id"]],
-                device=self.device,
-                dtype=torch.int64,
-            )
-        modes = OrderedDict()
-        # ******************* Satellite imagery *************************
-        if self.include_sat:
-            # Shape: batch_size, seq_length, channel, height, width
-            sat_data = x["satellite_actual"][:, : self.sat_sequence_len]
-            sat_data = torch.swapaxes(sat_data, 1, 2).float()  # switch time and channels
-            if self.add_image_embedding_channel:
-                sat_data = self.sat_embed(sat_data, id)
-            modes["sat"] = self.sat_encoder(sat_data)
-        # *********************** NWP Data ************************************
-        if self.include_nwp:
-            # Loop through potentially many NMPs
-            for nwp_source in self.nwp_encoders_dict:
-                # shape: batch_size, seq_len, n_chans, height, width
-                nwp_data = x["nwp"][nwp_source]["nwp"].float()
-                nwp_data = torch.swapaxes(nwp_data, 1, 2)  # switch time and channels
-                # Some NWP variables can overflow into NaNs when normalised if they have extreme
-                # tails
-                nwp_data = torch.clip(nwp_data, min=-50, max=50)
-                if self.add_image_embedding_channel:
-                    nwp_data = self.nwp_embed_dict[nwp_source](nwp_data, id)
-                nwp_out = self.nwp_encoders_dict[nwp_source](nwp_data)
-                modes[f"nwp/{nwp_source}"] = nwp_out
-        # *********************** PV Data *************************************
-        # Add site-level PV yield
-        if self.include_pv:
-            if self._target_key != "site":
-                modes["site"] = self.site_encoder(x)
-            else:
-                # Target is PV, so only take the history
-                pv_history = x["pv"][:, : self.history_len].float()
-                modes["site"] = self.site_encoder(pv_history)
-        # *********************** GSP Data ************************************
-        # add gsp yield history
-        if self.include_gsp_yield_history:
-            gsp_history = x["gsp"][:, : self.history_len].float()
-            gsp_history = gsp_history.reshape(gsp_history.shape[0], -1)
-            modes["gsp"] = gsp_history
-        # ********************** Embedding of GSP ID ********************
-        if self.use_id_embedding:
-            modes["id"] = self.embed(id)
-        if self.include_sun:
-            # Use only new direct keys
-            sun = torch.cat(
-                (
-                    x["solar_azimuth"],
-                    x["solar_elevation"],
-                ),
-                dim=1,
-            ).float()
-            sun = self.sun_fc1(sun)
-            modes["sun"] = sun
-        out = self.output_network(modes)
-        if self.use_quantile_regression:
-            # Shape: batch_size, seq_length * num_quantiles
-            out = out.reshape(out.shape[0], self.forecast_len, len(self.output_quantiles))
-        if return_modes:
-            return out, modes
-        else:
-            return out
-    def _calculate_teacher_loss(self, modes, teacher_modes):
-        enc_losses = {}
-        for m, enc in teacher_modes.items():
-            enc_losses[f"enc_loss/{m}"] = F.l1_loss(enc, modes[m])
-        enc_losses["enc_loss/total"] = sum([v for k, v in enc_losses.items()])
-        return enc_losses
-    def training_step(self, batch, batch_idx):
-        """Run training step"""
-        y_hat, modes = self.forward(batch, return_modes=True)
-        y = batch[self._target_key][:, -self.forecast_len :, 0]
-        losses = self._calculate_common_losses(y, y_hat)
-        teacher_modes = self.teacher_forward(batch)
-        teacher_loss = self._calculate_teacher_loss(modes, teacher_modes)
-        losses.update(teacher_loss)
-        if self.use_quantile_regression:
-            opt_target = losses["quantile_loss"]
-        else:
-            opt_target = losses["MAE"]
-        t_loss = teacher_loss["enc_loss/total"]
-        # The scales of the two losses
-        l_s = opt_target.detach()
-        tl_s = max(t_loss.detach(), 1e-9)
-        # opt_target = t_loss/tl_s * l_s * self.enc_loss_frac + opt_target * (1-self.enc_loss_frac)
-        losses["opt_loss"] = t_loss / tl_s * l_s * self.enc_loss_frac + opt_target * (
-            1 - self.enc_loss_frac
-        )
-        losses = {f"{k}/train": v for k, v in losses.items()}
-        self._training_accumulate_log(batch, batch_idx, losses, y_hat)
-        return losses["opt_loss/train"]
-    def convert_to_multimodal_model(self, config):
-        """Convert the model into a multimodal model class whilst preserving weights"""
-        config = config.copy()
-        if "cold_start" in config:
-            del config["cold_start"]
-        config["_target_"] = "pvnet.models.multimodal.multimodal.Model"
-        sources = []
-        for mode, path in config["mode_teacher_dict"].items():
-            model_config = parse_config(f"{path}/model_config.yaml")
-            if mode.startswith("nwp"):
-                nwp_source = mode.removeprefix("nwp/")
-                if "nwp_encoders_dict" in config:
-                    for key in ["nwp_encoders_dict", "nwp_history_minutes", "nwp_forecast_minutes"]:
-                        config[key][nwp_source] = model_config[key][nwp_source]
-                    sources.append("nwp")
-                else:
-                    for key in ["nwp_encoders_dict", "nwp_history_minutes", "nwp_forecast_minutes"]:
-                        config[key] = {nwp_source: model_config[key][nwp_source]}
-                config["add_image_embedding_channel"] = model_config["add_image_embedding_channel"]
-            elif mode == "sat":
-                for key in [
-                    "sat_encoder",
-                    "add_image_embedding_channel",
-                    "min_sat_delay_minutes",
-                    "sat_history_minutes",
-                ]:
-                    config[key] = model_config[key]
-                sources.append("sat")
-            elif mode == "site":
-                for key in ["site_encoder", "site_history_minutes"]:
-                    config[key] = model_config[key]
-                sources.append("site")
-        del config["mode_teacher_dict"]
-        # Load the teacher model
-        multimodal_model = hydra.utils.instantiate(config)
-        if "sat" in sources:
-            multimodal_model.sat_encoder.load_state_dict(self.sat_encoder.state_dict())
-        if "nwp" in sources:
-            multimodal_model.nwp_encoders_dict.load_state_dict(self.nwp_encoders_dict.state_dict())
-        if "site" in sources:
-            multimodal_model.site_encoder.load_state_dict(self.site_encoder.state_dict())
-        multimodal_model.output_network.load_state_dict(self.output_network.state_dict())
-        if self.embedding_dim:
-            multimodal_model.embed.load_state_dict(self.embed.state_dict())
-        if self.include_sun:
-            multimodal_model.sun_fc1.load_state_dict(self.sun_fc1.state_dict())
-        return multimodal_model, config

pvnet/models/utils.py DELETED Viewed

@@ -1,123 +0,0 @@
-"""Utility functions"""
-import logging
-import numpy as np
-import torch
-logger = logging.getLogger(__name__)
-logger = logging.getLogger(__name__)
-class PredAccumulator:
-    """A class for accumulating y-predictions using grad accumulation and small batch size.
-    Attributes:
-        _y_hats (list[torch.Tensor]): List of prediction tensors
-    """
-    def __init__(self):
-        """Prediction accumulator"""
-        self._y_hats = []
-    def __bool__(self):
-        return len(self._y_hats) > 0
-    def append(self, y_hat: torch.Tensor):
-        """Append a sub-batch of predictions"""
-        self._y_hats.append(y_hat)
-    def flush(self) -> torch.Tensor:
-        """Return all appended predictions as single tensor and remove from accumulated store."""
-        y_hat = torch.cat(self._y_hats, dim=0)
-        self._y_hats = []
-        return y_hat
-class DictListAccumulator:
-    """Abstract class for accumulating dictionaries of lists"""
-    @staticmethod
-    def _dict_list_append(d1, d2):
-        for k, v in d2.items():
-            d1[k].append(v)
-    @staticmethod
-    def _dict_init_list(d):
-        return {k: [v] for k, v in d.items()}
-class MetricAccumulator(DictListAccumulator):
-    """Dictionary of metrics accumulator.
-    A class for accumulating, and finding the mean of logging metrics when using grad
-    accumulation and the batch size is small.
-    Attributes:
-        _metrics (Dict[str, list[float]]): Dictionary containing lists of metrics.
-    """
-    def __init__(self):
-        """Dictionary of metrics accumulator."""
-        self._metrics = {}
-    def __bool__(self):
-        return self._metrics != {}
-    def append(self, loss_dict: dict[str, float]):
-        """Append lictionary of metrics to self"""
-        if not self:
-            self._metrics = self._dict_init_list(loss_dict)
-        else:
-            self._dict_list_append(self._metrics, loss_dict)
-    def flush(self) -> dict[str, float]:
-        """Calculate mean of all accumulated metrics and clear"""
-        mean_metrics = {k: np.mean(v) for k, v in self._metrics.items()}
-        self._metrics = {}
-        return mean_metrics
-class BatchAccumulator(DictListAccumulator):
-    """A class for accumulating batches when using grad accumulation and the batch size is small.
-    Attributes:
-        _batches (Dict[str, list[torch.Tensor]]): Dictionary containing lists of metrics.
-    """
-    def __init__(self, key_to_keep: str = "gsp"):
-        """Batch accumulator"""
-        self._batches = {}
-        self.key_to_keep = key_to_keep
-    def __bool__(self):
-        return self._batches != {}
-    # @staticmethod
-    def _filter_batch_dict(self, d):
-        keep_keys = [
-            self.key_to_keep,
-            f"{self.key_to_keep}_id",
-            f"{self.key_to_keep}_t0_idx",
-            f"{self.key_to_keep}_time_utc",
-        ]
-        return {k: v for k, v in d.items() if k in keep_keys}
-    def append(self, batch: dict[str, list[torch.Tensor]]):
-        """Append batch to self"""
-        if not self:
-            self._batches = self._dict_init_list(self._filter_batch_dict(batch))
-        else:
-            self._dict_list_append(self._batches, self._filter_batch_dict(batch))
-    def flush(self) -> dict[str, list[torch.Tensor]]:
-        """Concatenate all accumulated batches, return, and clear self"""
-        batch = {}
-        for k, v in self._batches.items():
-            if k == f"{self.key_to_keep}_t0_idx":
-                batch[k] = v[0]
-            else:
-                batch[k] = torch.cat(v, dim=0)
-        self._batches = {}
-        return batch

pvnet/optimizers.py DELETED Viewed

@@ -1,200 +0,0 @@
-"""Optimizer factory-function classes.
-"""
-from abc import ABC, abstractmethod
-import torch
-class AbstractOptimizer(ABC):
-    """Abstract class for optimizer
-    Optimizer classes will be used by model like:
-    > OptimizerGenerator = AbstractOptimizer()
-    > optimizer = OptimizerGenerator(model)
-    The returned object `optimizer` must be something that may be returned by `pytorch_lightning`'s
-    `configure_optimizers()` method.
-    See :
-        https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#configure-optimizers
-    """
-    @abstractmethod
-    def __call__(self):
-        """Abstract call"""
-        pass
-class Adam(AbstractOptimizer):
-    """Adam optimizer"""
-    def __init__(self, lr=0.0005, **kwargs):
-        """Adam optimizer"""
-        self.lr = lr
-        self.kwargs = kwargs
-    def __call__(self, model):
-        """Return optimizer"""
-        return torch.optim.Adam(model.parameters(), lr=self.lr, **self.kwargs)
-class AdamW(AbstractOptimizer):
-    """AdamW optimizer"""
-    def __init__(self, lr=0.0005, **kwargs):
-        """AdamW optimizer"""
-        self.lr = lr
-        self.kwargs = kwargs
-    def __call__(self, model):
-        """Return optimizer"""
-        return torch.optim.AdamW(model.parameters(), lr=self.lr, **self.kwargs)
-def find_submodule_parameters(model, search_modules):
-    """Finds all parameters within given submodule types
-    Args:
-        model: torch Module to search through
-        search_modules: List of submodule types to search for
-    """
-    if isinstance(model, search_modules):
-        return model.parameters()
-    children = list(model.children())
-    if len(children) == 0:
-        return []
-    else:
-        params = []
-        for c in children:
-            params += find_submodule_parameters(c, search_modules)
-        return params
-def find_other_than_submodule_parameters(model, ignore_modules):
-    """Finds all parameters not with given submodule types
-    Args:
-        model: torch Module to search through
-        ignore_modules: List of submodule types to ignore
-    """
-    if isinstance(model, ignore_modules):
-        return []
-    children = list(model.children())
-    if len(children) == 0:
-        return model.parameters()
-    else:
-        params = []
-        for c in children:
-            params += find_other_than_submodule_parameters(c, ignore_modules)
-        return params
-class EmbAdamWReduceLROnPlateau(AbstractOptimizer):
-    """AdamW optimizer and reduce on plateau scheduler"""
-    def __init__(
-        self, lr=0.0005, weight_decay=0.01, patience=3, factor=0.5, threshold=2e-4, **opt_kwargs
-    ):
-        """AdamW optimizer and reduce on plateau scheduler"""
-        self.lr = lr
-        self.weight_decay = weight_decay
-        self.patience = patience
-        self.factor = factor
-        self.threshold = threshold
-        self.opt_kwargs = opt_kwargs
-    def __call__(self, model):
-        """Return optimizer"""
-        search_modules = (torch.nn.Embedding,)
-        no_decay = find_submodule_parameters(model, search_modules)
-        decay = find_other_than_submodule_parameters(model, search_modules)
-        optim_groups = [
-            {"params": decay, "weight_decay": self.weight_decay},
-            {"params": no_decay, "weight_decay": 0.0},
-        ]
-        opt = torch.optim.AdamW(optim_groups, lr=self.lr, **self.opt_kwargs)
-        sch = torch.optim.lr_scheduler.ReduceLROnPlateau(
-            opt,
-            factor=self.factor,
-            patience=self.patience,
-            threshold=self.threshold,
-        )
-        sch = {
-            "scheduler": sch,
-            "monitor": "quantile_loss/val" if model.use_quantile_regression else "MAE/val",
-        }
-        return [opt], [sch]
-class AdamWReduceLROnPlateau(AbstractOptimizer):
-    """AdamW optimizer and reduce on plateau scheduler"""
-    def __init__(
-        self, lr=0.0005, patience=3, factor=0.5, threshold=2e-4, step_freq=None, **opt_kwargs
-    ):
-        """AdamW optimizer and reduce on plateau scheduler"""
-        self._lr = lr
-        self.patience = patience
-        self.factor = factor
-        self.threshold = threshold
-        self.step_freq = step_freq
-        self.opt_kwargs = opt_kwargs
-    def _call_multi(self, model):
-        remaining_params = {k: p for k, p in model.named_parameters()}
-        group_args = []
-        for key in self._lr.keys():
-            if key == "default":
-                continue
-            submodule_params = []
-            for param_name in list(remaining_params.keys()):
-                if param_name.startswith(key):
-                    submodule_params += [remaining_params.pop(param_name)]
-            group_args += [{"params": submodule_params, "lr": self._lr[key]}]
-        remaining_params = [p for k, p in remaining_params.items()]
-        group_args += [{"params": remaining_params}]
-        opt = torch.optim.AdamW(
-            group_args, lr=self._lr["default"] if model.lr is None else model.lr, **self.opt_kwargs
-        )
-        sch = {
-            "scheduler": torch.optim.lr_scheduler.ReduceLROnPlateau(
-                opt,
-                factor=self.factor,
-                patience=self.patience,
-                threshold=self.threshold,
-            ),
-            "monitor": "quantile_loss/val" if model.use_quantile_regression else "MAE/val",
-        }
-        return [opt], [sch]
-    def __call__(self, model):
-        """Return optimizer"""
-        if not isinstance(self._lr, float):
-            return self._call_multi(model)
-        else:
-            default_lr = self._lr if model.lr is None else model.lr
-            opt = torch.optim.AdamW(model.parameters(), lr=default_lr, **self.opt_kwargs)
-            sch = torch.optim.lr_scheduler.ReduceLROnPlateau(
-                opt,
-                factor=self.factor,
-                patience=self.patience,
-                threshold=self.threshold,
-            )
-            sch = {
-                "scheduler": sch,
-                "monitor": "quantile_loss/val" if model.use_quantile_regression else "MAE/val",
-            }
-            return [opt], [sch]

pvnet/training.py DELETED Viewed

@@ -1,183 +0,0 @@
-"""Training"""
-import os
-import shutil
-from typing import Optional
-import hydra
-import torch
-from lightning.pytorch import (
-    Callback,
-    LightningDataModule,
-    LightningModule,
-    Trainer,
-    seed_everything,
-)
-from lightning.pytorch.callbacks import ModelCheckpoint
-from lightning.pytorch.loggers import Logger
-from lightning.pytorch.loggers.wandb import WandbLogger
-from omegaconf import DictConfig, OmegaConf
-from pvnet import utils
-log = utils.get_logger(__name__)
-torch.set_default_dtype(torch.float32)
-def _callbacks_to_phase(callbacks, phase):
-    for c in callbacks:
-        if hasattr(c, "switch_phase"):
-            c.switch_phase(phase)
-def resolve_monitor_loss(output_quantiles):
-    """Return the desired metric to monitor based on whether quantile regression is being used.
-    The adds the option to use something like:
-        monitor: "${resolve_monitor_loss:${model.output_quantiles}}"
-    in early stopping and model checkpoint callbacks so the callbacks config does not need to be
-    modified depending on whether quantile regression is being used or not.
-    """
-    if output_quantiles is None:
-        return "MAE/val"
-    else:
-        return "quantile_loss/val"
-OmegaConf.register_new_resolver("resolve_monitor_loss", resolve_monitor_loss)
-def train(config: DictConfig) -> Optional[float]:
-    """Contains training pipeline.
-    Instantiates all PyTorch Lightning objects from config.
-    Args:
-        config (DictConfig): Configuration composed by Hydra.
-    Returns:
-        Optional[float]: Metric score for hyperparameter optimization.
-    """
-    # Set seed for random number generators in pytorch, numpy and python.random
-    if "seed" in config:
-        seed_everything(config.seed, workers=True)
-    # Init lightning datamodule
-    log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
-    datamodule: LightningDataModule = hydra.utils.instantiate(config.datamodule)
-    # Init lightning model
-    log.info(f"Instantiating model <{config.model._target_}>")
-    model: LightningModule = hydra.utils.instantiate(config.model)
-    # Init lightning loggers
-    loggers: list[Logger] = []
-    if "logger" in config:
-        for _, lg_conf in config.logger.items():
-            if "_target_" in lg_conf:
-                log.info(f"Instantiating logger <{lg_conf._target_}>")
-                loggers.append(hydra.utils.instantiate(lg_conf))
-    # Init lightning callbacks
-    callbacks: list[Callback] = []
-    if "callbacks" in config:
-        for _, cb_conf in config.callbacks.items():
-            if "_target_" in cb_conf:
-                log.info(f"Instantiating callback <{cb_conf._target_}>")
-                callbacks.append(hydra.utils.instantiate(cb_conf))
-    # Align the wandb id with the checkpoint path
-    # - only works if wandb logger and model checkpoint used
-    # - this makes it easy to push the model to huggingface
-    use_wandb_logger = False
-    for logger in loggers:
-        log.info(f"{logger}")
-        if isinstance(logger, WandbLogger):
-            use_wandb_logger = True
-            wandb_logger = logger
-            break
-    if use_wandb_logger:
-        for callback in callbacks:
-            log.info(f"{callback}")
-            if isinstance(callback, ModelCheckpoint):
-                # Need to call the .experiment property to initialise the logger
-                wandb_logger.experiment
-                callback.dirpath = "/".join(
-                    callback.dirpath.split("/")[:-1] + [wandb_logger.version]
-                )
-                # Also save model config here - this makes for easy model push to huggingface
-                os.makedirs(callback.dirpath, exist_ok=True)
-                OmegaConf.save(config.model, f"{callback.dirpath}/model_config.yaml")
-                # Similarly save the data config
-                data_config = config.datamodule.configuration
-                if data_config is None:
-                    # Data config can be none if using presaved batches. We go to the presaved
-                    # batches to get the data config
-                    data_config = f"{config.datamodule.sample_dir}/data_configuration.yaml"
-                assert os.path.isfile(data_config), f"Data config file not found: {data_config}"
-                shutil.copyfile(data_config, f"{callback.dirpath}/data_config.yaml")
-                # upload configuration up to wandb
-                OmegaConf.save(config, "./experiment_config.yaml")
-                wandb_logger.experiment.save(
-                    f"{callback.dirpath}/data_config.yaml", callback.dirpath
-                )
-                wandb_logger.experiment.save("./experiment_config.yaml")
-                break
-    should_pretrain = False
-    for c in callbacks:
-        should_pretrain |= hasattr(c, "training_phase") and c.training_phase == "pretrain"
-    if should_pretrain:
-        _callbacks_to_phase(callbacks, "pretrain")
-    trainer: Trainer = hydra.utils.instantiate(
-        config.trainer,
-        logger=loggers,
-        _convert_="partial",
-        callbacks=callbacks,
-    )
-    # TODO: remove this option
-    if should_pretrain:
-        # Pre-train the model
-        raise NotImplementedError("Pre-training is not yet supported")
-        # The parameter `block_nwp_and_sat` is not available in data-sampler
-        # If pretraining is re-supported in the future it is likely any pre-training logic should
-        # go here or perhaps in the callbacks
-        # datamodule.block_nwp_and_sat = True
-        trainer.fit(model=model, datamodule=datamodule)
-    _callbacks_to_phase(callbacks, "main")
-    trainer.should_stop = False
-    # Train the model completely
-    trainer.fit(model=model, datamodule=datamodule)
-    # Make sure everything closed properly
-    log.info("Finalizing!")
-    utils.finish(
-        config=config,
-        model=model,
-        datamodule=datamodule,
-        trainer=trainer,
-        callbacks=callbacks,
-        loggers=loggers,
-    )
-    # Print path to best checkpoint
-    log.info(f"Best checkpoint path:\n{trainer.checkpoint_callback.best_model_path}")
-    # Return metric score for hyperparameter optimization
-    optimized_metric = config.get("optimized_metric")
-    if optimized_metric:
-        return trainer.callback_metrics[optimized_metric]

pvnet/utils.py DELETED Viewed

@@ -1,321 +0,0 @@
-"""Utils"""
-import logging
-import warnings
-from collections.abc import Sequence
-from typing import Optional
-import lightning.pytorch as pl
-import matplotlib.pyplot as plt
-import pandas as pd
-import pylab
-import rich.syntax
-import rich.tree
-import xarray as xr
-from lightning.pytorch.loggers import Logger
-from lightning.pytorch.utilities import rank_zero_only
-from ocf_data_sampler.select.location import Location
-from omegaconf import DictConfig, OmegaConf
-def get_logger(name=__name__, level=logging.INFO) -> logging.Logger:
-    """Initializes multi-GPU-friendly python logger."""
-    logger = logging.getLogger(name)
-    logger.setLevel(level)
-    # this ensures all logging levels get marked with the rank zero decorator
-    # otherwise logs would get multiplied for each GPU process in multi-GPU setup
-    for level in (
-        "debug",
-        "info",
-        "warning",
-        "error",
-        "exception",
-        "fatal",
-        "critical",
-    ):
-        setattr(logger, level, rank_zero_only(getattr(logger, level)))
-    return logger
-class GSPLocationLookup:
-    """Query object for GSP location from GSP ID"""
-    def __init__(self, x_osgb: xr.DataArray, y_osgb: xr.DataArray):
-        """Query object for GSP location from GSP ID
-        Args:
-            x_osgb: DataArray of the OSGB x-coordinate for any given GSP ID
-            y_osgb: DataArray of the OSGB y-coordinate for any given GSP ID
-        """
-        self.x_osgb = x_osgb
-        self.y_osgb = y_osgb
-    def __call__(self, gsp_id: int) -> Location:
-        """Returns the locations for the input GSP IDs.
-        Args:
-            gsp_id: Integer ID of the GSP
-        """
-        return Location(
-            x=self.x_osgb.sel(gsp_id=gsp_id).item(),
-            y=self.y_osgb.sel(gsp_id=gsp_id).item(),
-            id=gsp_id,
-        )
-class SiteLocationLookup:
-    """Query object for site location from site ID"""
-    def __init__(self, long: xr.DataArray, lat: xr.DataArray):
-        """Query object for site location from site ID
-        Args:
-            long: DataArray of the longitude coordinates for any given site ID
-            lat: DataArray of the latitude coordinates for any given site ID
-        """
-        self.longitude = long
-        self.latitude = lat
-    def __call__(self, site_id: int) -> Location:
-        """Returns the locations for the input site IDs.
-        Args:
-            site_id: Integer ID of the site
-        """
-        return Location(
-            coordinate_system="lon_lat",
-            x=self.longitude.sel(pv_system_id=site_id).item(),
-            y=self.latitude.sel(pv_system_id=site_id).item(),
-            id=site_id,
-        )
-def extras(config: DictConfig) -> None:
-    """A couple of optional utilities.
-    Controlled by main config file:
-    - disabling warnings
-    - easier access to debug mode
-    - forcing debug friendly configuration
-    Modifies DictConfig in place.
-    Args:
-        config (DictConfig): Configuration composed by Hydra.
-    """
-    log = get_logger()
-    # enable adding new keys to config
-    OmegaConf.set_struct(config, False)
-    # disable python warnings if <config.ignore_warnings=True>
-    if config.get("ignore_warnings"):
-        log.info("Disabling python warnings! <config.ignore_warnings=True>")
-        warnings.filterwarnings("ignore")
-    # set <config.trainer.fast_dev_run=True> if <config.debug=True>
-    if config.get("debug"):
-        log.info("Running in debug mode! <config.debug=True>")
-        config.trainer.fast_dev_run = True
-    # force debugger friendly configuration if <config.trainer.fast_dev_run=True>
-    if config.trainer.get("fast_dev_run"):
-        log.info("Forcing debugger friendly configuration! <config.trainer.fast_dev_run=True>")
-        # Debuggers don't like GPUs or multiprocessing
-        if config.trainer.get("gpus"):
-            config.trainer.gpus = 0
-        if config.datamodule.get("pin_memory"):
-            config.datamodule.pin_memory = False
-        if config.datamodule.get("num_workers"):
-            config.datamodule.num_workers = 0
-    # disable adding new keys to config
-    OmegaConf.set_struct(config, True)
-@rank_zero_only
-def print_config(
-    config: DictConfig,
-    fields: Sequence[str] = (
-        "trainer",
-        "model",
-        "datamodule",
-        "callbacks",
-        "logger",
-        "seed",
-    ),
-    resolve: bool = True,
-) -> None:
-    """Prints content of DictConfig using Rich library and its tree structure.
-    Args:
-        config (DictConfig): Configuration composed by Hydra.
-        fields (Sequence[str], optional): Determines which main fields from config will
-        be printed and in what order.
-        resolve (bool, optional): Whether to resolve reference fields of DictConfig.
-    """
-    style = "dim"
-    tree = rich.tree.Tree("CONFIG", style=style, guide_style=style)
-    for field in fields:
-        branch = tree.add(field, style=style, guide_style=style)
-        config_section = config.get(field)
-        branch_content = str(config_section)
-        if isinstance(config_section, DictConfig):
-            branch_content = OmegaConf.to_yaml(config_section, resolve=resolve)
-        branch.add(rich.syntax.Syntax(branch_content, "yaml"))
-    rich.print(tree)
-    with open("config_tree.txt", "w") as fp:
-        rich.print(tree, file=fp)
-def empty(*args, **kwargs):
-    """Returns nothing"""
-    pass
-@rank_zero_only
-def log_hyperparameters(
-    config: DictConfig,
-    model: pl.LightningModule,
-    datamodule: pl.LightningDataModule,
-    trainer: pl.Trainer,
-    callbacks: list[pl.Callback],
-    logger: list[Logger],
-) -> None:
-    """This method controls which parameters from Hydra config are saved by Lightning loggers.
-    Additionaly saves:
-        - number of trainable model parameters
-    """
-    hparams = {}
-    # choose which parts of hydra config will be saved to loggers
-    hparams["trainer"] = config["trainer"]
-    hparams["model"] = config["model"]
-    hparams["datamodule"] = config["datamodule"]
-    if "seed" in config:
-        hparams["seed"] = config["seed"]
-    if "callbacks" in config:
-        hparams["callbacks"] = config["callbacks"]
-    # save number of model parameters
-    hparams["model/params_total"] = sum(p.numel() for p in model.parameters())
-    hparams["model/params_trainable"] = sum(
-        p.numel() for p in model.parameters() if p.requires_grad
-    )
-    hparams["model/params_not_trainable"] = sum(
-        p.numel() for p in model.parameters() if not p.requires_grad
-    )
-    # send hparams to all loggers
-    trainer.logger.log_hyperparams(hparams)
-    # disable logging any more hyperparameters for all loggers
-    # this is just a trick to prevent trainer from logging hparams of model,
-    # since we already did that above
-    trainer.logger.log_hyperparams = empty
-def finish(
-    config: DictConfig,
-    model: pl.LightningModule,
-    datamodule: pl.LightningDataModule,
-    trainer: pl.Trainer,
-    callbacks: list[pl.Callback],
-    loggers: list[Logger],
-) -> None:
-    """Makes sure everything closed properly."""
-    # without this sweeps with wandb logger might crash!
-    if any([isinstance(logger, pl.loggers.wandb.WandbLogger) for logger in loggers]):
-        import wandb
-        wandb.finish()
-def plot_batch_forecasts(
-    batch,
-    y_hat,
-    batch_idx=None,
-    quantiles=None,
-    key_to_plot: str = "gsp",
-    timesteps_to_plot: Optional[list[int]] = None,
-):
-    """Plot a batch of data and the forecast from that batch"""
-    def _get_numpy(key):
-        return batch[key].cpu().numpy().squeeze()
-    y_key = key_to_plot
-    y_id_key = f"{key_to_plot}_id"
-    time_utc_key = f"{key_to_plot}_time_utc"
-    y = batch[y_key].cpu().numpy()  # Select the one it is trained on
-    y_hat = y_hat.cpu().numpy()
-    # Select between the timesteps in timesteps to plot
-    plotting_name = key_to_plot.upper()
-    gsp_ids = batch[y_id_key].cpu().numpy().squeeze()
-    times_utc = batch[time_utc_key].cpu().numpy().squeeze().astype("datetime64[ns]")
-    times_utc = [pd.to_datetime(t) for t in times_utc]
-    if timesteps_to_plot is not None:
-        y = y[:, timesteps_to_plot[0] : timesteps_to_plot[1]]
-        y_hat = y_hat[:, timesteps_to_plot[0] : timesteps_to_plot[1]]
-        times_utc = [t[timesteps_to_plot[0] : timesteps_to_plot[1]] for t in times_utc]
-    batch_size = y.shape[0]
-    fig, axes = plt.subplots(4, 4, figsize=(16, 16))
-    for i, ax in enumerate(axes.ravel()):
-        if i >= batch_size:
-            ax.axis("off")
-            continue
-        ax.plot(times_utc[i], y[i], marker=".", color="k", label=r"$y$")
-        if quantiles is None:
-            ax.plot(
-                times_utc[i][-len(y_hat[i]) :], y_hat[i], marker=".", color="r", label=r"$\hat{y}$"
-            )
-        else:
-            cm = pylab.get_cmap("twilight")
-            for nq, q in enumerate(quantiles):
-                ax.plot(
-                    times_utc[i][-len(y_hat[i]) :],
-                    y_hat[i, :, nq],
-                    color=cm(q),
-                    label=r"$\hat{y}$" + f"({q})",
-                    alpha=0.7,
-                )
-        ax.set_title(f"ID: {gsp_ids[i]} | {times_utc[i][0].date()}", fontsize="small")
-        xticks = [t for t in times_utc[i] if t.minute == 0][::2]
-        ax.set_xticks(ticks=xticks, labels=[f"{t.hour:02}" for t in xticks], rotation=90)
-        ax.grid()
-    axes[0, 0].legend(loc="best")
-    for ax in axes[-1, :]:
-        ax.set_xlabel("Time (hour of day)")
-    if batch_idx is not None:
-        title = f"Normed {plotting_name} output : batch_idx={batch_idx}"
-    else:
-        title = f"Normed {plotting_name} output"
-    plt.suptitle(title)
-    plt.tight_layout()
-    return fig