Delete scripts

Browse files

Files changed (5) hide show

scripts/backtest_sites.py +0 -539
scripts/backtest_uk_gsp.py +0 -431
scripts/checkpoint_to_huggingface.py +0 -83
scripts/save_concurrent_samples.py +0 -189
scripts/save_samples.py +0 -218

scripts/backtest_sites.py DELETED Viewed

@@ -1,539 +0,0 @@
-"""
-A script to run backtest for PVNet for specific sites
-Use:
-- This script uses hydra to construct the config, just like in `run.py`. So you need to make sure
-  that the data config is set up appropriate for the model being run in this script
-- The PVNet model checkpoint; the time range over which to make predictions are made;
-  the site ids to produce forecasts for and the output directory where the results
-  near the top of the script as hard coded user variables. These should be changed.
-```
-python scripts/backtest_sites.py
-```
-"""
-try:
-    import torch.multiprocessing as mp
-    mp.set_start_method("spawn", force=True)
-    mp.set_sharing_strategy("file_system")
-except RuntimeError:
-    pass
-import json
-import logging
-import os
-import sys
-import hydra
-import numpy as np
-import pandas as pd
-import torch
-import xarray as xr
-from huggingface_hub import hf_hub_download
-from huggingface_hub.constants import CONFIG_NAME, PYTORCH_WEIGHTS_NAME
-from ocf_data_sampler.sample.base import batch_to_tensor, copy_batch_to_device
-from ocf_datapipes.batch import (
-    BatchKey,
-    NumpyBatch,
-    stack_np_examples_into_batch,
-)
-from ocf_datapipes.config.load import load_yaml_configuration
-from ocf_datapipes.load.pv.pv import OpenPVFromNetCDFIterDataPipe
-from ocf_datapipes.training.common import create_t0_and_loc_datapipes
-from ocf_datapipes.training.pvnet_site import (
-    DictDatasetIterDataPipe,
-    _get_datapipes_dict,
-    construct_sliced_data_pipeline,
-    split_dataset_dict_dp,
-)
-from ocf_datapipes.utils.consts import ELEVATION_MEAN, ELEVATION_STD
-from omegaconf import DictConfig
-from torch.utils.data import DataLoader, IterDataPipe, functional_datapipe
-from torch.utils.data.datapipes.iter import IterableWrapper
-from tqdm import tqdm
-from pvnet.load_model import get_model_from_checkpoints
-from pvnet.utils import SiteLocationLookup
-# ------------------------------------------------------------------
-# USER CONFIGURED VARIABLES TO RUN THE SCRIPT
-# Directory path to save results
-output_dir = "PLACEHOLDER"
-# Local directory to load the PVNet checkpoint from. By default this should pull the best performing
-# checkpoint on the val set
-model_chckpoint_dir = "PLACEHOLDER"
-hf_revision = None
-hf_token = None
-hf_model_id = None
-# Forecasts will be made for all available init times between these
-start_datetime = "2022-05-08 00:00"
-end_datetime = "2022-05-08 00:30"
-# ------------------------------------------------------------------
-# SET UP LOGGING
-logger = logging.getLogger(__name__)
-logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-# ------------------------------------------------------------------
-# DERIVED VARIABLES
-# This will run on GPU if it exists
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ------------------------------------------------------------------
-# GLOBAL VARIABLES
-# The frequency of the pv site data
-FREQ_MINS = 30
-# When sun as elevation below this, the forecast is set to zero
-MIN_DAY_ELEVATION = 0
-# Add all pv site ids here that you wish to produce forecasts for
-ALL_SITE_IDS = []
-# Need to be in ascending order
-ALL_SITE_IDS.sort()
-# ------------------------------------------------------------------
-# FUNCTIONS
-@functional_datapipe("pad_forward_pv")
-class PadForwardPVIterDataPipe(IterDataPipe):
-    """
-    Pads forecast pv.
-    Sun position is calculated based off of pv time index
-    and for t0's close to end of pv data can have wrong shape as pv starts
-    to run out of data to slice for the forecast part.
-    """
-    def __init__(
-        self,
-        pv_dp: IterDataPipe,
-        forecast_duration: np.timedelta64,
-        history_duration: np.timedelta64,
-        time_resolution_minutes: np.timedelta64,
-    ):
-        """Init"""
-        super().__init__()
-        self.pv_dp = pv_dp
-        self.forecast_duration = forecast_duration
-        self.history_duration = history_duration
-        self.time_resolution_minutes = time_resolution_minutes
-        self.min_seq_length = history_duration // time_resolution_minutes
-    def __iter__(self):
-        """Iter"""
-        for xr_data in self.pv_dp:
-            t_end = (
-                xr_data.time_utc.data[0]
-                + self.history_duration
-                + self.forecast_duration
-                + self.time_resolution_minutes
-            )
-            time_idx = np.arange(xr_data.time_utc.data[0], t_end, self.time_resolution_minutes)
-            if len(xr_data.time_utc.data) < self.min_seq_length:
-                raise ValueError("Not enough PV data to predict")
-            yield xr_data.reindex(time_utc=time_idx, fill_value=-1)
-def load_model_from_hf(model_id: str, revision: str, token: str):
-    """
-    Loads model from HuggingFace
-    """
-    model_file = hf_hub_download(
-        repo_id=model_id,
-        filename=PYTORCH_WEIGHTS_NAME,
-        revision=revision,
-        token=token,
-    )
-    # load config file
-    config_file = hf_hub_download(
-        repo_id=model_id,
-        filename=CONFIG_NAME,
-        revision=revision,
-        token=token,
-    )
-    with open(config_file, "r", encoding="utf-8") as f:
-        config = json.load(f)
-    model = hydra.utils.instantiate(config)
-    state_dict = torch.load(model_file, map_location=torch.device("cuda"))
-    model.load_state_dict(state_dict)  # type: ignore
-    model.eval()  # type: ignore
-    return model
-def preds_to_dataarray(preds, model, valid_times, site_ids):
-    """Put numpy array of predictions into a dataarray"""
-    if model.use_quantile_regression:
-        output_labels = [f"forecast_mw_plevel_{int(q*100):02}" for q in model.output_quantiles]
-        output_labels[output_labels.index("forecast_mw_plevel_50")] = "forecast_mw"
-    else:
-        output_labels = ["forecast_mw"]
-        preds = preds[..., np.newaxis]
-    da = xr.DataArray(
-        data=preds,
-        dims=["pv_system_id", "target_datetime_utc", "output_label"],
-        coords=dict(
-            pv_system_id=site_ids,
-            target_datetime_utc=valid_times,
-            output_label=output_labels,
-        ),
-    )
-    return da
-# TODO change this to load the PV sites data (metadata?)
-def get_sites_ds(config_path: str) -> xr.Dataset:
-    """Load site data from the path in the data config.
-    Args:
-        config_path: Path to the data configuration file
-    Returns:
-        xarray.Dataset of PVLive truths and capacities
-    """
-    config = load_yaml_configuration(config_path)
-    site_datapipe = OpenPVFromNetCDFIterDataPipe(pv=config.input_data.pv)
-    ds_sites = next(iter(site_datapipe))
-    return ds_sites
-def get_available_t0_times(start_datetime, end_datetime, config_path):
-    """Filter a list of t0 init-times to those for which all required input data is available.
-    Args:
-        start_datetime: First potential t0 time
-        end_datetime: Last potential t0 time
-        config_path: Path to data config file
-    Returns:
-        pandas.DatetimeIndex of the init-times available for required inputs
-    """
-    start_datetime = pd.Timestamp(start_datetime)
-    end_datetime = pd.Timestamp(end_datetime)
-    # Open all the input data so we can check what of the potential data init times we have input
-    # data for
-    datapipes_dict = _get_datapipes_dict(config_path, production=False)
-    # Pop out the config file
-    config = datapipes_dict.pop("config")
-    # We are going to abuse the `create_t0_and_loc_datapipes()` function to find the init-times in
-    # potential_init_times which we have input data for. To do this, we will feed in some fake site
-    # data which has the potential_init_times as timestamps. This is a bit hacky but works for now
-    # Set up init-times we would like to make predictions for
-    potential_init_times = pd.date_range(start_datetime, end_datetime, freq=f"{FREQ_MINS}min")
-    # We buffer the potential init-times so that we don't lose any init-times from the
-    # start and end. Again this is a hacky step
-    history_duration = pd.Timedelta(config.input_data.pv.history_minutes, "min")
-    forecast_duration = pd.Timedelta(config.input_data.pv.forecast_minutes, "min")
-    buffered_potential_init_times = pd.date_range(
-        start_datetime - history_duration, end_datetime + forecast_duration, freq=f"{FREQ_MINS}min"
-    )
-    ds_fake_site = (
-        buffered_potential_init_times.to_frame().to_xarray().rename({"index": "time_utc"})
-    )
-    ds_fake_site = ds_fake_site.rename({0: "site_pv_power_mw"})
-    ds_fake_site = ds_fake_site.expand_dims("pv_system_id", axis=1)
-    ds_fake_site = ds_fake_site.assign_coords(
-        pv_system_id=[0],
-        latitude=("pv_system_id", [0]),
-        longitude=("pv_system_id", [0]),
-    )
-    ds_fake_site = ds_fake_site.site_pv_power_mw.astype(float) * 1e-18
-    # Overwrite the site data which is already in the datapipes dict
-    datapipes_dict["pv"] = IterableWrapper([ds_fake_site])
-    # Use create_t0_and_loc_datapipes to get datapipe of init-times
-    location_pipe, t0_datapipe = create_t0_and_loc_datapipes(
-        datapipes_dict,
-        configuration=config,
-        key_for_t0="pv",
-        shuffle=False,
-    )
-    # Create a full list of available init-times. Note that we need to loop over the t0s AND
-    # locations to avoid the torch datapipes buffer overflow but we don't actually use the location
-    available_init_times = [t0 for _, t0 in zip(location_pipe, t0_datapipe)]
-    available_init_times = pd.to_datetime(available_init_times)
-    logger.info(
-        f"{len(available_init_times)} out of {len(potential_init_times)} "
-        "requested init-times have required input data"
-    )
-    return available_init_times
-def get_loctimes_datapipes(config_path):
-    """Create location and init-time datapipes
-    Args:
-        config_path: Path to data config file
-    Returns:
-        tuple: A tuple of datapipes
-            - Datapipe yielding locations
-            - Datapipe yielding init-times
-    """
-    # Set up ID location query object
-    ds_sites = get_sites_ds(config_path)
-    site_id_to_loc = SiteLocationLookup(ds_sites.longitude, ds_sites.latitude)
-    # Filter the init-times to times we have all input data for
-    available_target_times = get_available_t0_times(
-        start_datetime,
-        end_datetime,
-        config_path,
-    )
-    num_t0s = len(available_target_times)
-    # Save the init-times which predictions are being made for. This is really helpful to check
-    # whilst the backtest is running since it takes a long time. This lets you see what init-times
-    # the backtest will end up producing
-    available_target_times.to_frame().to_csv(f"{output_dir}/t0_times.csv")
-    # Cycle the site locations
-    location_pipe = IterableWrapper([[site_id_to_loc(site_id) for site_id in ALL_SITE_IDS]]).repeat(
-        num_t0s
-    )
-    # Shard and then unbatch the locations so that each worker will generate all samples for all
-    # sites and for a single init-time
-    location_pipe = location_pipe.sharding_filter()
-    location_pipe = location_pipe.unbatch(
-        unbatch_level=1
-    )  # might not need this part since the site datapipe is creating examples
-    # Create times datapipe so each worker receives
-    # len(ALL_SITE_IDS) copies of the same datetime for its batch
-    t0_datapipe = IterableWrapper(
-        [[t0 for site_id in ALL_SITE_IDS] for t0 in available_target_times]
-    )
-    t0_datapipe = t0_datapipe.sharding_filter()
-    t0_datapipe = t0_datapipe.unbatch(
-        unbatch_level=1
-    )  # might not need this part since the site datapipe is creating examples
-    t0_datapipe = t0_datapipe.set_length(num_t0s * len(ALL_SITE_IDS))
-    location_pipe = location_pipe.set_length(num_t0s * len(ALL_SITE_IDS))
-    return location_pipe, t0_datapipe
-class ModelPipe:
-    """A class to conveniently make and process predictions from batches"""
-    def __init__(self, model, ds_site: xr.Dataset):
-        """A class to conveniently make and process predictions from batches
-        Args:
-            model: PVNet site level model
-            ds_site:xarray dataset of pv site true values and capacities
-        """
-        self.model = model
-        self.ds_site = ds_site
-    def predict_batch(self, batch: NumpyBatch) -> xr.Dataset:
-        """Run the batch through the model and compile the predictions into an xarray DataArray
-        Args:
-            batch: A batch of samples with inputs for each site for the same init-time
-        Returns:
-            xarray.Dataset of all site and national forecasts for the batch
-        """
-        # Unpack some variables from the batch
-        id0 = batch[BatchKey.pv_t0_idx]
-        t0 = batch[BatchKey.pv_time_utc].cpu().numpy().astype("datetime64[s]")[0, id0]
-        n_valid_times = len(batch[BatchKey.pv_time_utc][0, id0 + 1 :])
-        model = self.model
-        # Get valid times for this forecast
-        valid_times = pd.to_datetime(
-            [t0 + np.timedelta64((i + 1) * FREQ_MINS, "m") for i in range(n_valid_times)]
-        )
-        # Get effective capacities for this forecast
-        site_capacities = self.ds_site.nominal_capacity_wp.values
-        # Get the solar elevations. We need to un-normalise these from the values in the batch
-        elevation = batch[BatchKey.pv_solar_elevation] * ELEVATION_STD + ELEVATION_MEAN
-        # We only need elevation mask for forecasted values, not history
-        elevation = elevation[:, id0 + 1 :]
-        # Make mask dataset for sundown
-        da_sundown_mask = xr.DataArray(
-            data=elevation < MIN_DAY_ELEVATION,
-            dims=["pv_system_id", "target_datetime_utc"],
-            coords=dict(
-                pv_system_id=ALL_SITE_IDS,
-                target_datetime_utc=valid_times,
-            ),
-        )
-        with torch.no_grad():
-            # Run batch through model to get 0-1 predictions for all sites
-            device_batch = copy_batch_to_device(batch_to_tensor(batch), device)
-            y_normed_site = model(device_batch).detach().cpu().numpy()
-        da_normed_site = preds_to_dataarray(y_normed_site, model, valid_times, ALL_SITE_IDS)
-        # Multiply normalised forecasts by capacities and clip negatives
-        da_abs_site = da_normed_site.clip(0, None) * site_capacities[:, None, None]
-        # Apply sundown mask
-        da_abs_site = da_abs_site.where(~da_sundown_mask).fillna(0.0)
-        da_abs_site = da_abs_site.expand_dims(dim="init_time_utc", axis=0).assign_coords(
-            init_time_utc=np.array([t0], dtype="datetime64[ns]")
-        )
-        return da_abs_site
-def get_datapipe(config_path: str) -> NumpyBatch:
-    """Construct datapipe yielding batches of concurrent samples for all sites
-    Args:
-        config_path: Path to the data configuration file
-    Returns:
-        NumpyBatch: Concurrent batch of samples for each site
-    """
-    # Construct location and init-time datapipes
-    location_pipe, t0_datapipe = get_loctimes_datapipes(config_path)
-    # Get the number of init-times
-    # num_batches = len(t0_datapipe)
-    num_batches = len(t0_datapipe) // len(ALL_SITE_IDS)
-    # Construct sample datapipes
-    data_pipeline = construct_sliced_data_pipeline(
-        config_path,
-        location_pipe,
-        t0_datapipe,
-    )
-    config = load_yaml_configuration(config_path)
-    data_pipeline["pv"] = data_pipeline["pv"].pad_forward_pv(
-        forecast_duration=np.timedelta64(config.input_data.pv.forecast_minutes, "m"),
-        history_duration=np.timedelta64(config.input_data.pv.history_minutes, "m"),
-        time_resolution_minutes=np.timedelta64(config.input_data.pv.time_resolution_minutes, "m"),
-    )
-    data_pipeline = DictDatasetIterDataPipe(
-        {k: v for k, v in data_pipeline.items() if k != "config"},
-    ).map(split_dataset_dict_dp)
-    data_pipeline = data_pipeline.pvnet_site_convert_to_numpy_batch()
-    # Batch so that each worker returns a batch of all locations for a single init-time
-    # Also convert to tensor for model
-    data_pipeline = (
-        data_pipeline.batch(len(ALL_SITE_IDS))
-        .map(stack_np_examples_into_batch)
-        .map(batch_to_tensor)
-    )
-    data_pipeline = data_pipeline.set_length(num_batches)
-    return data_pipeline
-@hydra.main(config_path="../configs", config_name="config.yaml", version_base="1.2")
-def main(config: DictConfig):
-    """Runs the backtest"""
-    dataloader_kwargs = dict(
-        shuffle=False,
-        batch_size=None,
-        sampler=None,
-        batch_sampler=None,
-        # Number of workers set in the config file
-        num_workers=config.datamodule.num_workers,
-        collate_fn=None,
-        pin_memory=False,
-        drop_last=False,
-        timeout=0,
-        worker_init_fn=None,
-        prefetch_factor=config.datamodule.prefetch_factor,
-        persistent_workers=False,
-    )
-    # Set up output dir
-    os.makedirs(output_dir)
-    # Create concurrent batch datapipe
-    # Each batch includes a sample for each of the n sites for a single init-time
-    batch_pipe = get_datapipe(config.datamodule.configuration)
-    num_batches = len(batch_pipe)
-    # Load the site data as an xarray object
-    ds_site = get_sites_ds(config.datamodule.configuration)
-    # Create a dataloader for the concurrent batches and use multiprocessing
-    dataloader = DataLoader(batch_pipe, **dataloader_kwargs)
-    # Load the PVNet model
-    if model_chckpoint_dir:
-        model, *_ = get_model_from_checkpoints([model_chckpoint_dir], val_best=True)
-    elif hf_model_id:
-        model = load_model_from_hf(hf_model_id, hf_revision, hf_token)
-    else:
-        raise ValueError("Provide a model checkpoint or a HuggingFace model")
-    model = model.eval().to(device)
-    # Create object to make predictions for each input batch
-    model_pipe = ModelPipe(model, ds_site)
-    # Loop through the batches
-    pbar = tqdm(total=num_batches)
-    for i, batch in zip(range(num_batches), dataloader):
-        try:
-            # Make predictions for the init-time
-            ds_abs_all = model_pipe.predict_batch(batch)
-            t0 = ds_abs_all.init_time_utc.values[0]
-            # Save the predictions
-            filename = f"{output_dir}/{t0}.nc"
-            ds_abs_all.to_netcdf(filename)
-            pbar.update()
-        except Exception as e:
-            print(f"Exception {e} at batch {i}")
-            pass
-    # Close down
-    pbar.close()
-    del dataloader
-if __name__ == "__main__":
-    main()

scripts/backtest_uk_gsp.py DELETED Viewed

@@ -1,431 +0,0 @@
-"""
-A script to run backtest for PVNet and the summation model for UK regional and national
-Use:
-- This script uses hydra to construct the config, just like in `run.py`. So you need to make sure
-  that the data config is set up appropriate for the model being run in this script
-- The PVNet and summation model checkpoints; the time range over which to make predictions are made;
-  and the output directory where the results near the top of the script as hard coded user
-  variables. These should be changed.
-```
-python backtest_uk_gsp.py
-```
-"""
-try:
-    import torch.multiprocessing as mp
-    mp.set_start_method("spawn", force=True)
-    mp.set_sharing_strategy("file_system")
-except RuntimeError:
-    pass
-import logging
-import os
-import sys
-import hydra
-import numpy as np
-import pandas as pd
-import torch
-import xarray as xr
-from ocf_data_sampler.sample.base import batch_to_tensor, copy_batch_to_device
-from ocf_datapipes.batch import (
-    BatchKey,
-    NumpyBatch,
-)
-from ocf_datapipes.config.load import load_yaml_configuration
-from ocf_datapipes.load import OpenGSP
-from ocf_datapipes.training.common import _get_datapipes_dict
-from ocf_datapipes.training.pvnet_all_gsp import construct_sliced_data_pipeline, create_t0_datapipe
-from ocf_datapipes.utils.consts import ELEVATION_MEAN, ELEVATION_STD
-from omegaconf import DictConfig
-# TODO: Having this script rely on pvnet_app sets up a circular dependency. The function
-# `preds_to_dataarray()` should probably be moved here
-from pvnet_app.utils import preds_to_dataarray
-from torch.utils.data import DataLoader
-from torch.utils.data.datapipes.iter import IterableWrapper
-from tqdm import tqdm
-from pvnet.load_model import get_model_from_checkpoints
-# ------------------------------------------------------------------
-# USER CONFIGURED VARIABLES
-output_dir = "/mnt/disks/extra_batches/test_backtest"
-# Local directory to load the PVNet checkpoint from. By default this should pull the best performing
-# checkpoint on the val set
-model_chckpoint_dir = "/home/jamesfulton/repos/PVNet/checkpoints/q911tei5"
-# Local directory to load the summation model checkpoint from. By default this should pull the best
-# performing checkpoint on the val set. If set to None a simple sum is used instead
-summation_chckpoint_dir = (
-    "/home/jamesfulton/repos/PVNet_summation/checkpoints/pvnet_summation/73oa4w9t"
-)
-# Forecasts will be made for all available init times between these
-start_datetime = "2022-05-08 00:00"
-end_datetime = "2022-05-08 00:30"
-# ------------------------------------------------------------------
-# SET UP LOGGING
-logger = logging.getLogger(__name__)
-logging.basicConfig(stream=sys.stdout, level=logging.INFO)
-# ------------------------------------------------------------------
-# DERIVED VARIABLES
-# This will run on GPU if it exists
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# ------------------------------------------------------------------
-# GLOBAL VARIABLES
-# The frequency of the GSP data
-FREQ_MINS = 30
-# When sun as elevation below this, the forecast is set to zero
-MIN_DAY_ELEVATION = 0
-# All regional GSP IDs - not including national which is treated separately
-ALL_GSP_IDS = np.arange(1, 318)
-# ------------------------------------------------------------------
-# FUNCTIONS
-def get_gsp_ds(config_path: str) -> xr.Dataset:
-    """Load GSP data from the path in the data config.
-    Args:
-        config_path: Path to the data configuration file
-    Returns:
-        xarray.Dataset of PVLive truths and capacities
-    """
-    config = load_yaml_configuration(config_path)
-    gsp_datapipe = OpenGSP(gsp_pv_power_zarr_path=config.input_data.gsp.gsp_zarr_path)
-    ds_gsp = next(iter(gsp_datapipe))
-    return ds_gsp
-def get_available_t0_times(start_datetime, end_datetime, config_path):
-    """Filter a list of t0 init-times to those for which all required input data is available.
-    Args:
-        start_datetime: First potential t0 time
-        end_datetime: Last potential t0 time
-        config_path: Path to data config file
-    Returns:
-        pandas.DatetimeIndex of the init-times available for required inputs
-    """
-    start_datetime = pd.Timestamp(start_datetime)
-    end_datetime = pd.Timestamp(end_datetime)
-    # Open all the input data so we can check what of the potential data init times we have input
-    # data for
-    datapipes_dict = _get_datapipes_dict(config_path, production=False)
-    # Pop out the config file
-    config = datapipes_dict.pop("config")
-    # We are going to abuse the `create_t0_datapipe()` function to find the init-times in
-    # potential_init_times which we have input data for. To do this, we will feed in some fake GSP
-    # data which has the potential_init_times as timestamps. This is a bit hacky but works for now
-    # Set up init-times we would like to make predictions for
-    potential_init_times = pd.date_range(start_datetime, end_datetime, freq=f"{FREQ_MINS}min")
-    # We buffer the potential init-times so that we don't lose any init-times from the
-    # start and end. Again this is a hacky step
-    history_duration = pd.Timedelta(config.input_data.gsp.history_minutes, "min")
-    forecast_duration = pd.Timedelta(config.input_data.gsp.forecast_minutes, "min")
-    buffered_potential_init_times = pd.date_range(
-        start_datetime - history_duration, end_datetime + forecast_duration, freq=f"{FREQ_MINS}min"
-    )
-    ds_fake_gsp = buffered_potential_init_times.to_frame().to_xarray().rename({"index": "time_utc"})
-    ds_fake_gsp = ds_fake_gsp.rename({0: "gsp_pv_power_mw"})
-    ds_fake_gsp = ds_fake_gsp.expand_dims("gsp_id", axis=1)
-    ds_fake_gsp = ds_fake_gsp.assign_coords(
-        gsp_id=[0],
-        x_osgb=("gsp_id", [0]),
-        y_osgb=("gsp_id", [0]),
-    )
-    ds_fake_gsp = ds_fake_gsp.gsp_pv_power_mw.astype(float) * 1e-18
-    # Overwrite the GSP data which is already in the datapipes dict
-    datapipes_dict["gsp"] = IterableWrapper([ds_fake_gsp])
-    # Use create_t0_datapipe to get datapipe of init-times
-    t0_datapipe = create_t0_datapipe(
-        datapipes_dict,
-        configuration=config,
-        shuffle=False,
-    )
-    # Create a full list of available init-times
-    available_init_times = pd.to_datetime([t0 for t0 in t0_datapipe])
-    logger.info(
-        f"{len(available_init_times)} out of {len(potential_init_times)} "
-        "requested init-times have required input data"
-    )
-    return available_init_times
-def get_times_datapipe(config_path):
-    """Create init-time datapipe
-    Args:
-        config_path: Path to data config file
-    Returns:
-        Datapipe: A Datapipe yielding init-times
-    """
-    # Filter the init-times to times we have all input data for
-    available_target_times = get_available_t0_times(
-        start_datetime,
-        end_datetime,
-        config_path,
-    )
-    num_t0s = len(available_target_times)
-    # Save the init-times which predictions are being made for. This is really helpful to check
-    # whilst the backtest is running since it takes a long time. This lets you see what init-times
-    # the backtest will end up producing
-    available_target_times.to_frame().to_csv(f"{output_dir}/t0_times.csv")
-    # Create times datapipe so each worker receives 317 copies of the same datetime for its batch
-    t0_datapipe = IterableWrapper(available_target_times)
-    t0_datapipe = t0_datapipe.sharding_filter()
-    t0_datapipe = t0_datapipe.set_length(num_t0s)
-    return t0_datapipe
-class ModelPipe:
-    """A class to conveniently make and process predictions from batches"""
-    def __init__(self, model, summation_model, ds_gsp: xr.Dataset):
-        """A class to conveniently make and process predictions from batches
-        Args:
-            model: PVNet GSP level model
-            summation_model: Summation model to make national forecast from GSP level forecasts
-            ds_gsp:xarray dataset of PVLive true values and capacities
-        """
-        self.model = model
-        self.summation_model = summation_model
-        self.ds_gsp = ds_gsp
-    def predict_batch(self, batch: NumpyBatch) -> xr.Dataset:
-        """Run the batch through the model and compile the predictions into an xarray DataArray
-        Args:
-            batch: A batch of samples with inputs for each GSP for the same init-time
-        Returns:
-            xarray.Dataset of all GSP and national forecasts for the batch
-        """
-        # Unpack some variables from the batch
-        id0 = batch[BatchKey.gsp_t0_idx]
-        t0 = batch[BatchKey.gsp_time_utc].cpu().numpy().astype("datetime64[s]")[0, id0]
-        n_valid_times = len(batch[BatchKey.gsp_time_utc][0, id0 + 1 :])
-        ds_gsp = self.ds_gsp
-        model = self.model
-        summation_model = self.summation_model
-        # Get valid times for this forecast
-        valid_times = pd.to_datetime(
-            [t0 + np.timedelta64((i + 1) * FREQ_MINS, "m") for i in range(n_valid_times)]
-        )
-        # Get effective capacities for this forecast
-        gsp_capacities = ds_gsp.effective_capacity_mwp.sel(
-            time_utc=t0, gsp_id=slice(1, None)
-        ).values
-        national_capacity = ds_gsp.effective_capacity_mwp.sel(time_utc=t0, gsp_id=0).item()
-        # Get the solar elevations. We need to un-normalise these from the values in the batch
-        elevation = batch[BatchKey.gsp_solar_elevation] * ELEVATION_STD + ELEVATION_MEAN
-        # We only need elevation mask for forecasted values, not history
-        elevation = elevation[:, id0 + 1 :]
-        # Make mask dataset for sundown
-        da_sundown_mask = xr.DataArray(
-            data=elevation < MIN_DAY_ELEVATION,
-            dims=["gsp_id", "target_datetime_utc"],
-            coords=dict(
-                gsp_id=ALL_GSP_IDS,
-                target_datetime_utc=valid_times,
-            ),
-        )
-        with torch.no_grad():
-            # Run batch through model to get 0-1 predictions for all GSPs
-            device_batch = copy_batch_to_device(batch_to_tensor(batch), device)
-            y_normed_gsp = model(device_batch).detach().cpu().numpy()
-        da_normed_gsp = preds_to_dataarray(y_normed_gsp, model, valid_times, ALL_GSP_IDS)
-        # Multiply normalised forecasts by capacities and clip negatives
-        da_abs_gsp = da_normed_gsp.clip(0, None) * gsp_capacities[:, None, None]
-        # Apply sundown mask
-        da_abs_gsp = da_abs_gsp.where(~da_sundown_mask).fillna(0.0)
-        # Make national predictions using summation model
-        if summation_model is not None:
-            with torch.no_grad():
-                # Construct sample for the summation model
-                summation_inputs = {
-                    "pvnet_outputs": torch.Tensor(y_normed_gsp[np.newaxis]).to(device),
-                    "effective_capacity": (
-                        torch.Tensor(gsp_capacities / national_capacity)
-                        .to(device)
-                        .unsqueeze(0)
-                        .unsqueeze(-1)
-                    ),
-                }
-                # Run batch through the summation model
-                y_normed_national = (
-                    summation_model(summation_inputs).detach().squeeze().cpu().numpy()
-                )
-            # Convert national predictions to DataArray
-            da_normed_national = preds_to_dataarray(
-                y_normed_national[np.newaxis], summation_model, valid_times, gsp_ids=[0]
-            )
-            # Multiply normalised forecasts by capacities and clip negatives
-            da_abs_national = da_normed_national.clip(0, None) * national_capacity
-            # Apply sundown mask - All GSPs must be masked to mask national
-            da_abs_national = da_abs_national.where(~da_sundown_mask.all(dim="gsp_id")).fillna(0.0)
-        # If no summation model, make national predictions using simple sum
-        else:
-            da_abs_national = (
-                da_abs_gsp.sum(dim="gsp_id")
-                .expand_dims(dim="gsp_id", axis=0)
-                .assign_coords(gsp_id=[0])
-            )
-        # Concat the regional GSP and national predictions
-        da_abs_all = xr.concat([da_abs_national, da_abs_gsp], dim="gsp_id")
-        ds_abs_all = da_abs_all.to_dataset(name="hindcast")
-        ds_abs_all = ds_abs_all.expand_dims(dim="init_time_utc", axis=0).assign_coords(
-            init_time_utc=[t0]
-        )
-        return ds_abs_all
-def get_datapipe(config_path: str) -> NumpyBatch:
-    """Construct datapipe yielding batches of concurrent samples for all GSPs
-    Args:
-        config_path: Path to the data configuration file
-    Returns:
-        NumpyBatch: Concurrent batch of samples for each GSP
-    """
-    # Construct location and init-time datapipes
-    t0_datapipe = get_times_datapipe(config_path)
-    # Construct sample datapipes
-    data_pipeline = construct_sliced_data_pipeline(
-        config_path,
-        t0_datapipe,
-    )
-    # Convert to tensor for model
-    data_pipeline = data_pipeline.map(batch_to_tensor).set_length(len(t0_datapipe))
-    return data_pipeline
-@hydra.main(config_path="../configs", config_name="config.yaml", version_base="1.2")
-def main(config: DictConfig):
-    """Runs the backtest"""
-    dataloader_kwargs = dict(
-        shuffle=False,
-        batch_size=None,
-        sampler=None,
-        batch_sampler=None,
-        # Number of workers set in the config file
-        num_workers=config.datamodule.num_workers,
-        collate_fn=None,
-        pin_memory=False,
-        drop_last=False,
-        timeout=0,
-        worker_init_fn=None,
-        prefetch_factor=config.datamodule.prefetch_factor,
-        persistent_workers=False,
-    )
-    # Set up output dir
-    os.makedirs(output_dir)
-    # Create concurrent batch datapipe
-    # Each batch includes a sample for each of the 317 GSPs for a single init-time
-    batch_pipe = get_datapipe(config.datamodule.configuration)
-    num_batches = len(batch_pipe)
-    # Load the GSP data as an xarray object
-    ds_gsp = get_gsp_ds(config.datamodule.configuration)
-    # Create a dataloader for the concurrent batches and use multiprocessing
-    dataloader = DataLoader(batch_pipe, **dataloader_kwargs)
-    # Load the PVNet model and summation model
-    model, *_ = get_model_from_checkpoints([model_chckpoint_dir], val_best=True)
-    model = model.eval().to(device)
-    if summation_chckpoint_dir is None:
-        summation_model = None
-    else:
-        summation_model, *_ = get_model_from_checkpoints([summation_chckpoint_dir], val_best=True)
-        summation_model = summation_model.eval().to(device)
-    # Create object to make predictions for each input batch
-    model_pipe = ModelPipe(model, summation_model, ds_gsp)
-    # Loop through the batches
-    pbar = tqdm(total=num_batches)
-    for i, batch in zip(range(num_batches), dataloader):
-        # Make predictions for the init-time
-        ds_abs_all = model_pipe.predict_batch(batch)
-        t0 = ds_abs_all.init_time_utc.values[0]
-        # Save the predictioons
-        filename = f"{output_dir}/{t0}.nc"
-        ds_abs_all.to_netcdf(filename)
-        pbar.update()
-    # Close down
-    pbar.close()
-    del dataloader
-if __name__ == "__main__":
-    main()

scripts/checkpoint_to_huggingface.py DELETED Viewed

@@ -1,83 +0,0 @@
-"""Command line tool to push locally save model checkpoints to huggingface
-use:
-python checkpoint_to_huggingface.py "path/to/model/checkpoints" \
-    --huggingface-repo="openclimatefix/pvnet_uk_region" \
-    --wandb-repo="openclimatefix/pvnet2.1" \
-    --local-path="~/tmp/this_model" \
-    --no-push-to-hub
-"""
-import tempfile
-import typer
-import wandb
-from pvnet.load_model import get_model_from_checkpoints
-app = typer.Typer(pretty_exceptions_show_locals=False)
-@app.command()
-def push_to_huggingface(
-    checkpoint_dir_paths: list[str],
-    huggingface_repo: str = "openclimatefix/pvnet_uk_region",  # e.g. openclimatefix/windnet_india
-    wandb_repo: str = "openclimatefix/pvnet2.1",
-    val_best: bool = True,
-    wandb_ids: list[str] = [],
-    local_path: str = None,
-    push_to_hub: bool = True,
-):
-    """Push a local model to a huggingface model repo
-    Args:
-        checkpoint_dir_paths: Path(s) of the checkpoint directory(ies)
-        huggingface_repo: Name of the HuggingFace repo to push the model to
-        wandb_repo: Name of the wandb repo which has training logs
-        val_best: Use best model according to val loss, else last saved model
-        wandb_ids: The wandb ID code(s)
-        local_path: Where to save the local copy of the model
-        push_to_hub: Whether to push the model to the hub or just create local version.
-    """
-    assert push_to_hub or local_path is not None
-    is_ensemble = len(checkpoint_dir_paths) > 1
-    # Check if checkpoint dir name is wandb run ID
-    if wandb_ids == []:
-        all_wandb_ids = [run.id for run in wandb.Api().runs(path=wandb_repo)]
-        for path in checkpoint_dir_paths:
-            dirname = path.split("/")[-1]
-            if dirname in all_wandb_ids:
-                wandb_ids.append(dirname)
-            else:
-                wandb_ids.append(None)
-    model, model_config, data_config = get_model_from_checkpoints(checkpoint_dir_paths, val_best)
-    if not is_ensemble:
-        wandb_ids = wandb_ids[0]
-    # Push to hub
-    if local_path is None:
-        temp_dir = tempfile.TemporaryDirectory()
-        model_output_dir = temp_dir.name
-    else:
-        model_output_dir = local_path
-    model.save_pretrained(
-        model_output_dir,
-        config=model_config,
-        data_config=data_config,
-        wandb_repo=wandb_repo,
-        wandb_ids=wandb_ids,
-        push_to_hub=push_to_hub,
-        repo_id=huggingface_repo if push_to_hub else None,
-    )
-    if local_path is None:
-        temp_dir.cleanup()
-if __name__ == "__main__":
-    app()

scripts/save_concurrent_samples.py DELETED Viewed

@@ -1,189 +0,0 @@
-"""
-Constructs batches where each batch includes all GSPs and only a single timestamp.
-Currently a slightly hacky implementation due to the way the configs are done. This script will use
-the same config file currently set to train the model. In the datamodule config it is possible
-to set the batch_output_dir and number of train/val batches, they can also be overriden in the
-command as shown in the example below.
-use:
-```
-python save_concurrent_samples.py \
-    +datamodule.sample_output_dir="/mnt/disks/concurrent_batches/concurrent_samples_sat_pred_test" \
-    +datamodule.num_train_samples=20 \
-    +datamodule.num_val_samples=20
-```
-"""
-# Ensure this block of code runs only in the main process to avoid issues with worker processes.
-if __name__ == "__main__":
-    import torch.multiprocessing as mp
-    # Set the start method for torch multiprocessing. Choose either "forkserver" or "spawn" to be
-    # compatible with dask's multiprocessing.
-    mp.set_start_method("forkserver")
-    # Set the sharing strategy to 'file_system' to handle file descriptor limitations. This is
-    # important because libraries like Zarr may open many files, which can exhaust the file
-    # descriptor limit if too many workers are used.
-    mp.set_sharing_strategy("file_system")
-import logging
-import os
-import shutil
-import sys
-import warnings
-import hydra
-import numpy as np
-import torch
-from ocf_data_sampler.torch_datasets.datasets.pvnet_uk import PVNetUKConcurrentDataset
-from omegaconf import DictConfig, OmegaConf
-from sqlalchemy import exc as sa_exc
-from torch.utils.data import DataLoader, Dataset
-from tqdm import tqdm
-from pvnet.utils import print_config
-# ------- filter warning and set up config  -------
-warnings.filterwarnings("ignore", category=sa_exc.SAWarning)
-logger = logging.getLogger(__name__)
-logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
-# -------------------------------------------------
-class SaveFuncFactory:
-    """Factory for creating a function to save a sample to disk."""
-    def __init__(self, save_dir: str):
-        """Factory for creating a function to save a sample to disk."""
-        self.save_dir = save_dir
-    def __call__(self, sample, sample_num: int):
-        """Save a sample to disk"""
-        torch.save(sample, f"{self.save_dir}/{sample_num:08}.pt")
-def save_samples_with_dataloader(
-    dataset: Dataset,
-    save_dir: str,
-    num_samples: int,
-    dataloader_kwargs: dict,
-) -> None:
-    """Save samples from a dataset using a dataloader."""
-    save_func = SaveFuncFactory(save_dir)
-    gsp_ids = np.array([loc.id for loc in dataset.locations])
-    dataloader = DataLoader(dataset, **dataloader_kwargs)
-    pbar = tqdm(total=num_samples)
-    for i, sample in zip(range(num_samples), dataloader):
-        check_sample(sample, gsp_ids)
-        save_func(sample, i)
-        pbar.update()
-    pbar.close()
-def check_sample(sample, gsp_ids):
-    """Check if sample is valid concurrent batch for all GSPs"""
-    # Check all GSP IDs are included and in correct order
-    assert (sample["gsp_id"].flatten().numpy() == gsp_ids).all()
-    # Check all times are the same
-    assert len(np.unique(sample["gsp_time_utc"][:, 0].numpy())) == 1
-@hydra.main(config_path="../configs/", config_name="config.yaml", version_base="1.2")
-def main(config: DictConfig) -> None:
-    """Constructs and saves validation and training samples."""
-    config_dm = config.datamodule
-    print_config(config, resolve=False)
-    # Set up directory
-    os.makedirs(config_dm.sample_output_dir, exist_ok=False)
-    # Copy across configs which define the samples into the new sample directory
-    with open(f"{config_dm.sample_output_dir}/datamodule.yaml", "w") as f:
-        f.write(OmegaConf.to_yaml(config_dm))
-    shutil.copyfile(
-        config_dm.configuration, f"{config_dm.sample_output_dir}/data_configuration.yaml"
-    )
-    # Define the keywargs going into the train and val dataloaders
-    dataloader_kwargs = dict(
-        shuffle=True,
-        batch_size=None,
-        sampler=None,
-        batch_sampler=None,
-        num_workers=config_dm.num_workers,
-        collate_fn=None,
-        pin_memory=False,  # Only using CPU to prepare samples so pinning is not beneficial
-        drop_last=False,
-        timeout=0,
-        worker_init_fn=None,
-        prefetch_factor=config_dm.prefetch_factor,
-        persistent_workers=False,  # Not needed since we only enter the dataloader loop once
-    )
-    if config_dm.num_val_samples > 0:
-        print("----- Saving val samples -----")
-        val_output_dir = f"{config_dm.sample_output_dir}/val"
-        # Make directory for val samples
-        os.mkdir(val_output_dir)
-        # Get the dataset
-        val_dataset = PVNetUKConcurrentDataset(
-            config_dm.configuration,
-            start_time=config_dm.val_period[0],
-            end_time=config_dm.val_period[1],
-        )
-        # Save samples
-        save_samples_with_dataloader(
-            dataset=val_dataset,
-            save_dir=val_output_dir,
-            num_samples=config_dm.num_val_samples,
-            dataloader_kwargs=dataloader_kwargs,
-        )
-        del val_dataset
-    if config_dm.num_train_samples > 0:
-        print("----- Saving train samples -----")
-        train_output_dir = f"{config_dm.sample_output_dir}/train"
-        # Make directory for train samples
-        os.mkdir(train_output_dir)
-        # Get the dataset
-        train_dataset = PVNetUKConcurrentDataset(
-            config_dm.configuration,
-            start_time=config_dm.train_period[0],
-            end_time=config_dm.train_period[1],
-        )
-        # Save samples
-        save_samples_with_dataloader(
-            dataset=train_dataset,
-            save_dir=train_output_dir,
-            num_samples=config_dm.num_train_samples,
-            dataloader_kwargs=dataloader_kwargs,
-        )
-        del train_dataset
-    print("----- Saving complete -----")
-if __name__ == "__main__":
-    main()

scripts/save_samples.py DELETED Viewed

@@ -1,218 +0,0 @@
-"""
-Constructs samples and saves them to disk.
-Currently a slightly hacky implementation due to the way the configs are done. This script will use
-the same config file currently set to train the model.
-use:
-```
-python save_samples.py
-```
-if setting all values in the datamodule config file, or
-```
-python save_samples.py \
-    +datamodule.sample_output_dir="/mnt/disks/bigbatches/samples_v0" \
-    +datamodule.num_train_samples=0 \
-    +datamodule.num_val_samples=2 \
-    datamodule.num_workers=2 \
-    datamodule.prefetch_factor=2
-```
-if wanting to override these values for example
-"""
-# Ensure this block of code runs only in the main process to avoid issues with worker processes.
-if __name__ == "__main__":
-    import torch.multiprocessing as mp
-    # Set the start method for torch multiprocessing. Choose either "forkserver" or "spawn" to be
-    # compatible with dask's multiprocessing.
-    mp.set_start_method("forkserver")
-    # Set the sharing strategy to 'file_system' to handle file descriptor limitations. This is
-    # important because libraries like Zarr may open many files, which can exhaust the file
-    # descriptor limit if too many workers are used.
-    mp.set_sharing_strategy("file_system")
-import logging
-import os
-import shutil
-import sys
-import warnings
-import dask
-import hydra
-from ocf_data_sampler.torch_datasets.datasets import PVNetUKRegionalDataset, SitesDataset
-from ocf_data_sampler.torch_datasets.sample.site import SiteSample
-from ocf_data_sampler.torch_datasets.sample.uk_regional import UKRegionalSample
-from omegaconf import DictConfig, OmegaConf
-from sqlalchemy import exc as sa_exc
-from torch.utils.data import DataLoader, Dataset
-from tqdm import tqdm
-from pvnet.utils import print_config
-dask.config.set(scheduler="threads", num_workers=4)
-# ------- filter warning and set up config  -------
-warnings.filterwarnings("ignore", category=sa_exc.SAWarning)
-logger = logging.getLogger(__name__)
-logging.basicConfig(stream=sys.stdout, level=logging.ERROR)
-# -------------------------------------------------
-class SaveFuncFactory:
-    """Factory for creating a function to save a sample to disk."""
-    def __init__(self, save_dir: str, renewable: str = "pv_uk"):
-        """Factory for creating a function to save a sample to disk."""
-        self.save_dir = save_dir
-        self.renewable = renewable
-    def __call__(self, sample, sample_num: int):
-        """Save a sample to disk"""
-        save_path = f"{self.save_dir}/{sample_num:08}"
-        if self.renewable == "pv_uk":
-            sample_class = UKRegionalSample(sample)
-            filename = f"{save_path}.pt"
-        elif self.renewable == "site":
-            sample_class = SiteSample(sample)
-            filename = f"{save_path}.nc"
-        else:
-            raise ValueError(f"Unknown renewable: {self.renewable}")
-        # Assign data and save
-        sample_class._data = sample
-        sample_class.save(filename)
-def get_dataset(
-    config_path: str, start_time: str, end_time: str, renewable: str = "pv_uk"
-) -> Dataset:
-    """Get the dataset for the given renewable type."""
-    if renewable == "pv_uk":
-        dataset_cls = PVNetUKRegionalDataset
-    elif renewable == "site":
-        dataset_cls = SitesDataset
-    else:
-        raise ValueError(f"Unknown renewable: {renewable}")
-    return dataset_cls(config_path, start_time=start_time, end_time=end_time)
-def save_samples_with_dataloader(
-    dataset: Dataset,
-    save_dir: str,
-    num_samples: int,
-    dataloader_kwargs: dict,
-    renewable: str = "pv_uk",
-) -> None:
-    """Save samples from a dataset using a dataloader."""
-    save_func = SaveFuncFactory(save_dir, renewable=renewable)
-    dataloader = DataLoader(dataset, **dataloader_kwargs)
-    pbar = tqdm(total=num_samples)
-    for i, sample in zip(range(num_samples), dataloader):
-        save_func(sample, i)
-        pbar.update()
-    pbar.close()
-@hydra.main(config_path="../configs/", config_name="config.yaml", version_base="1.2")
-def main(config: DictConfig) -> None:
-    """Constructs and saves validation and training samples."""
-    config_dm = config.datamodule
-    print_config(config, resolve=False)
-    # Set up directory
-    os.makedirs(config_dm.sample_output_dir, exist_ok=False)
-    # Copy across configs which define the samples into the new sample directory
-    with open(f"{config_dm.sample_output_dir}/datamodule.yaml", "w") as f:
-        f.write(OmegaConf.to_yaml(config_dm))
-    shutil.copyfile(
-        config_dm.configuration, f"{config_dm.sample_output_dir}/data_configuration.yaml"
-    )
-    # Define the keywargs going into the train and val dataloaders
-    dataloader_kwargs = dict(
-        shuffle=True,
-        batch_size=None,
-        sampler=None,
-        batch_sampler=None,
-        num_workers=config_dm.num_workers,
-        collate_fn=None,
-        pin_memory=False,  # Only using CPU to prepare samples so pinning is not beneficial
-        drop_last=False,
-        timeout=0,
-        worker_init_fn=None,
-        prefetch_factor=config_dm.prefetch_factor,
-        persistent_workers=False,  # Not needed since we only enter the dataloader loop once
-    )
-    if config_dm.num_val_samples > 0:
-        print("----- Saving val samples -----")
-        val_output_dir = f"{config_dm.sample_output_dir}/val"
-        # Make directory for val samples
-        os.mkdir(val_output_dir)
-        # Get the dataset
-        val_dataset = get_dataset(
-            config_dm.configuration,
-            *config_dm.val_period,
-            renewable=config.renewable,
-        )
-        # Save samples
-        save_samples_with_dataloader(
-            dataset=val_dataset,
-            save_dir=val_output_dir,
-            num_samples=config_dm.num_val_samples,
-            dataloader_kwargs=dataloader_kwargs,
-            renewable=config.renewable,
-        )
-        del val_dataset
-    if config_dm.num_train_samples > 0:
-        print("----- Saving train samples -----")
-        train_output_dir = f"{config_dm.sample_output_dir}/train"
-        # Make directory for train samples
-        os.mkdir(train_output_dir)
-        # Get the dataset
-        train_dataset = get_dataset(
-            config_dm.configuration,
-            *config_dm.train_period,
-            renewable=config.renewable,
-        )
-        # Save samples
-        save_samples_with_dataloader(
-            dataset=train_dataset,
-            save_dir=train_output_dir,
-            num_samples=config_dm.num_train_samples,
-            dataloader_kwargs=dataloader_kwargs,
-            renewable=config.renewable,
-        )
-        del train_dataset
-    print("----- Saving complete -----")
-if __name__ == "__main__":
-    main()