| data: |
| format: zarr |
| resolution: n320 |
| frequency: 6h |
| timestep: 6h |
| forcing: |
| - cos_latitude |
| - cos_longitude |
| - sin_latitude |
| - sin_longitude |
| - cos_julian_day |
| - cos_local_time |
| - sin_julian_day |
| - sin_local_time |
| - insolation |
| - lsm |
| - sdor |
| - slor |
| - z |
| diagnostic: |
| - tp |
| - cp |
| - sf |
| - tcc |
| - hcc |
| - lcc |
| - mcc |
| - ro |
| - ssrd |
| - strd |
| - 100u |
| - 100v |
| remapped: null |
| normalizer: |
| default: mean-std |
| remap: |
| cp: tp |
| sf: tp |
| std: |
| - tp |
| - cp |
| - sf |
| - ro |
| - tcw |
| - ssrd |
| - q_50 |
| - q_100 |
| - q_150 |
| - q_200 |
| - q_250 |
| - q_300 |
| - q_400 |
| - q_500 |
| - q_600 |
| - q_700 |
| - q_850 |
| - q_925 |
| - q_1000 |
| min-max: null |
| max: |
| - sdor |
| - slor |
| - z |
| none: |
| - cos_latitude |
| - cos_longitude |
| - sin_latitude |
| - sin_longitude |
| - cos_julian_day |
| - cos_local_time |
| - sin_julian_day |
| - sin_local_time |
| - insolation |
| - lsm |
| - tcc |
| - mcc |
| - hcc |
| - lcc |
| - swvl1 |
| - swvl2 |
| imputer: |
| default: none |
| minimum: |
| - swvl1 |
| - swvl2 |
| - ro |
| mean: |
| - stl1 |
| - stl2 |
| remapper: |
| default: none |
| processors: |
| imputer: |
| _target_: anemoi.models.preprocessing.imputer.InputImputer |
| _convert_: all |
| config: ${data.imputer} |
| normalizer: |
| _target_: anemoi.models.preprocessing.normalizer.InputNormalizer |
| config: ${data.normalizer} |
| num_features: null |
| dataloader: |
| prefetch_factor: 2 |
| pin_memory: true |
| read_group_size: ${hardware.num_gpus_per_model} |
| num_workers: |
| training: 8 |
| validation: 8 |
| test: 1 |
| predict: 1 |
| batch_size: |
| training: 1 |
| validation: 1 |
| test: 4 |
| predict: 4 |
| limit_batches: |
| training: 1000 |
| validation: 10 |
| test: 20 |
| predict: 20 |
| grid_indices: |
| _target_: anemoi.training.data.grid_indices.FullGrid |
| nodes_name: ${graph.data} |
| dataset: ${hardware.paths.data}/${hardware.files.dataset} |
| training: |
| dataset: |
| - dataset: ${hardware.paths.data}/${hardware.files.dataset} |
| start: null |
| end: 2022 |
| frequency: ${data.frequency} |
| start: null |
| end: 2022 |
| drop: [] |
| validation: |
| dataset: |
| - dataset: ${hardware.paths.data}/${hardware.files.dataset} |
| start: 2022 |
| end: 2024 |
| frequency: ${data.frequency} |
| start: 2022 |
| end: 2024 |
| drop: [] |
| test: |
| dataset: |
| - dataset: ${hardware.paths.data}/${hardware.files.dataset} |
| start: 2022 |
| end: null |
| frequency: ${data.frequency} |
| start: 2022 |
| end: null |
| drop: [] |
| diagnostics: |
| plot: |
| asynchronous: true |
| datashader: true |
| frequency: |
| batch: 750 |
| epoch: 5 |
| parameters: |
| - z_500 |
| - t_850 |
| - u_850 |
| - v_850 |
| - 2t |
| - 10u |
| - 10v |
| - sp |
| - tp |
| - cp |
| sample_idx: 0 |
| precip_and_related_fields: |
| - tp |
| - cp |
| colormaps: |
| default: |
| _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap |
| name: viridis |
| error: |
| _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormap |
| name: bwr |
| precip: |
| _target_: anemoi.training.utils.custom_colormaps.MatplotlibColormapClevels |
| clevels: |
| - '#ffffff' |
| - '#04e9e7' |
| - '#019ff4' |
| - '#0300f4' |
| - '#02fd02' |
| - '#01c501' |
| - '#008e00' |
| - '#fdf802' |
| - '#e5bc00' |
| - '#fd9500' |
| - '#fd0000' |
| - '#d40000' |
| - '#bc0000' |
| - '#f800fd' |
| variables: ${diagnostics.plot.precip_and_related_fields} |
| callbacks: [] |
| callbacks: [] |
| benchmark_profiler: |
| memory: |
| enabled: true |
| steps: 5 |
| warmup: 2 |
| extra_plots: false |
| trace_rank0_only: false |
| time: |
| enabled: true |
| verbose: false |
| speed: |
| enabled: true |
| system: |
| enabled: true |
| model_summary: |
| enabled: true |
| snapshot: |
| enabled: true |
| steps: 4 |
| warmup: 0 |
| debug: |
| anomaly_detection: false |
| profiler: false |
| enable_checkpointing: true |
| checkpoint: |
| every_n_minutes: |
| save_frequency: 30 |
| num_models_saved: 3 |
| every_n_epochs: |
| save_frequency: 1 |
| num_models_saved: -1 |
| every_n_train_steps: |
| save_frequency: null |
| num_models_saved: 0 |
| log: |
| wandb: |
| enabled: false |
| offline: false |
| log_model: false |
| project: Anemoi |
| entity: ??? |
| gradients: false |
| parameters: false |
| tensorboard: |
| enabled: false |
| mlflow: |
| enabled: false |
| offline: false |
| authentication: false |
| log_model: false |
| tracking_uri: ??? |
| experiment_name: ??? |
| project_name: ??? |
| system: true |
| terminal: true |
| run_name: null |
| on_resume_create_child: true |
| expand_hyperparams: |
| - config |
| http_max_retries: 35 |
| interval: 100 |
| enable_progress_bar: true |
| print_memory_summary: false |
| hardware: |
| paths: |
| data: ${oc.decode:${oc.env:DATASETS_PATH}} |
| output: ${oc.decode:${oc.env:OUTPUT_PATH}} |
| logs: |
| base: ${hardware.paths.output}logs/ |
| wandb: ${hardware.paths.logs.base} |
| mlflow: ${hardware.paths.logs.base}mlflow/ |
| tensorboard: ${hardware.paths.logs.base}tensorboard/ |
| checkpoints: ${hardware.paths.output}checkpoint/ |
| plots: ${hardware.paths.output}plots/ |
| profiler: ${hardware.paths.output}profiler/ |
| graph: ${hardware.paths.output}graphs/ |
| files: |
| dataset: aifs-ea-an-oper-0001-mars-${data.resolution}-1979-2024-6h-v1-aifs-single-v1.zarr |
| graph: graph_enc_proc_dec_${data.resolution}.pt |
| checkpoint: |
| every_n_epochs: aifs-by_epoch-epoch_{epoch:03d}-val_wmse_{val_wmse:.3e} |
| every_n_train_steps: aifs-by_step-epoch_{epoch:03d}-step_{step:06d} |
| every_n_minutes: aifs-by_time-epoch_{epoch:03d}-step_{step:06d} |
| warm_start: null |
| accelerator: auto |
| num_gpus_per_node: 4 |
| num_nodes: 16 |
| num_gpus_per_model: 4 |
| graph: |
| overwrite: true |
| data: data |
| hidden: hidden |
| nodes: |
| data: |
| node_builder: |
| _target_: anemoi.graphs.nodes.ZarrDatasetNodes |
| dataset: ${dataloader.dataset} |
| attributes: ${graph.attributes.nodes} |
| hidden: |
| node_builder: |
| _target_: anemoi.graphs.nodes.ReducedGaussianGridNodes |
| grid: o96 |
| edges: |
| - source_name: ${graph.data} |
| target_name: ${graph.hidden} |
| edge_builders: |
| - _target_: anemoi.graphs.edges.CutOffEdges |
| cutoff_factor: 0.6 |
| source_mask_attr_name: null |
| target_mask_attr_name: null |
| attributes: ${graph.attributes.edges} |
| - source_name: ${graph.hidden} |
| target_name: ${graph.data} |
| edge_builders: |
| - _target_: anemoi.graphs.edges.KNNEdges |
| num_nearest_neighbours: 3 |
| source_mask_attr_name: null |
| target_mask_attr_name: null |
| attributes: ${graph.attributes.edges} |
| attributes: |
| nodes: |
| area_weight: |
| _target_: anemoi.graphs.nodes.attributes.SphericalAreaWeights |
| norm: unit-max |
| fill_value: 0 |
| edges: |
| edge_length: |
| _target_: anemoi.graphs.edges.attributes.EdgeLength |
| norm: unit-std |
| edge_dirs: |
| _target_: anemoi.graphs.edges.attributes.EdgeDirection |
| norm: unit-std |
| post_processors: [] |
| model: |
| activation: GELU |
| num_channels: 1024 |
| cpu_offload: false |
| output_mask: null |
| model: |
| _target_: anemoi.models.models.encoder_processor_decoder.AnemoiModelEncProcDec |
| layer_kernels: |
| processor: |
| LayerNorm: |
| _target_: torch.nn.LayerNorm |
| _partial_: true |
| Linear: |
| _target_: torch.nn.Linear |
| _partial_: true |
| QueryNorm: |
| _target_: anemoi.models.layers.normalization.AutocastLayerNorm |
| _partial_: true |
| bias: false |
| KeyNorm: |
| _target_: anemoi.models.layers.normalization.AutocastLayerNorm |
| _partial_: true |
| bias: false |
| encoder: |
| LayerNorm: |
| _target_: torch.nn.LayerNorm |
| _partial_: true |
| Linear: |
| _target_: torch.nn.Linear |
| _partial_: true |
| decoder: |
| LayerNorm: |
| _target_: torch.nn.LayerNorm |
| _partial_: true |
| Linear: |
| _target_: torch.nn.Linear |
| _partial_: true |
| processor: |
| _target_: anemoi.models.layers.processor.TransformerProcessor |
| activation: ${model.activation} |
| num_layers: 16 |
| num_chunks: 2 |
| mlp_hidden_ratio: 4 |
| num_heads: 16 |
| window_size: 1120 |
| dropout_p: 0.0 |
| attention_implementation: flash_attention |
| qk_norm: false |
| softcap: 0.0 |
| use_alibi_slopes: false |
| cpu_offload: ${model.cpu_offload} |
| encoder: |
| _target_: anemoi.models.layers.mapper.GraphTransformerForwardMapper |
| trainable_size: ${model.trainable_parameters.data2hidden} |
| sub_graph_edge_attributes: ${model.attributes.edges} |
| activation: ${model.activation} |
| num_chunks: 1 |
| mlp_hidden_ratio: 4 |
| num_heads: 16 |
| qk_norm: false |
| cpu_offload: ${model.cpu_offload} |
| decoder: |
| _target_: anemoi.models.layers.mapper.GraphTransformerBackwardMapper |
| trainable_size: ${model.trainable_parameters.hidden2data} |
| sub_graph_edge_attributes: ${model.attributes.edges} |
| activation: ${model.activation} |
| num_chunks: 1 |
| mlp_hidden_ratio: 4 |
| num_heads: 16 |
| initialise_data_extractor_zero: false |
| qk_norm: false |
| cpu_offload: ${model.cpu_offload} |
| trainable_parameters: |
| data: 8 |
| hidden: 8 |
| data2hidden: 8 |
| hidden2data: 8 |
| attributes: |
| edges: |
| - edge_length |
| - edge_dirs |
| nodes: [] |
| bounding: |
| - _target_: anemoi.models.layers.bounding.ReluBounding |
| variables: |
| - tp |
| - ro |
| - tcw |
| - ssrd |
| - ro |
| - q_50 |
| - q_100 |
| - q_150 |
| - q_200 |
| - q_250 |
| - q_300 |
| - q_400 |
| - q_500 |
| - q_600 |
| - q_700 |
| - q_850 |
| - q_925 |
| - q_1000 |
| - _target_: anemoi.models.layers.bounding.HardtanhBounding |
| variables: |
| - tcc |
| - swvl1 |
| - swvl2 |
| min_val: 0 |
| max_val: 1 |
| - _target_: anemoi.models.layers.bounding.FractionBounding |
| variables: |
| - cp |
| - sf |
| min_val: 0 |
| max_val: 1 |
| total_var: tp |
| - _target_: anemoi.models.layers.bounding.FractionBounding |
| variables: |
| - lcc |
| - mcc |
| - hcc |
| min_val: 0 |
| max_val: 1 |
| total_var: tcc |
| training: |
| run_id: null |
| fork_run_id: ${oc.decode:${oc.env:PRETRAINING_RUN_ID}} |
| transfer_learning: false |
| load_weights_only: true |
| deterministic: false |
| precision: 16-mixed |
| multistep_input: 2 |
| accum_grad_batches: 1 |
| num_sanity_val_steps: 6 |
| gradient_clip: |
| val: 32.0 |
| algorithm: value |
| swa: |
| enabled: false |
| lr: 0.0001 |
| optimizer: |
| zero: false |
| kwargs: |
| betas: |
| - 0.9 |
| - 0.95 |
| model_task: anemoi.training.train.forecaster.GraphForecaster |
| strategy: |
| _target_: anemoi.training.distributed.strategy.DDPGroupStrategy |
| num_gpus_per_model: ${hardware.num_gpus_per_model} |
| read_group_size: ${dataloader.read_group_size} |
| loss_gradient_scaling: false |
| training_loss: |
| _target_: anemoi.training.losses.mse.WeightedMSELoss |
| scalars: |
| - variable |
| - loss_weights_mask |
| ignore_nans: false |
| validation_metrics: |
| - _target_: anemoi.training.losses.mse.WeightedMSELoss |
| scalars: [] |
| ignore_nans: true |
| scale_validation_metrics: |
| scalars_to_apply: |
| - variable |
| metrics: |
| - all |
| rollout: |
| start: 1 |
| epoch_increment: 1 |
| max: 12 |
| max_epochs: 13 |
| max_steps: 150000 |
| lr: |
| warmup: 1000 |
| rate: 8.0e-07 |
| iterations: 7900 |
| min: 3.0e-07 |
| warmup_t: 100 |
| variable_loss_scaling: |
| default: 1 |
| pl: |
| q: 0.6 |
| t: 6 |
| u: 0.8 |
| v: 0.5 |
| w: 0.001 |
| z: 12 |
| sfc: |
| sp: 10 |
| 10u: 0.5 |
| 10v: 0.5 |
| 100u: 0.1 |
| 100v: 0.1 |
| 2d: 0.5 |
| tp: 0.025 |
| cp: 0.0025 |
| ro: 0.0025 |
| sf: 0.025 |
| tcc: 0.1 |
| mcc: 0.1 |
| lcc: 0.1 |
| hcc: 0.1 |
| swvl2: 2 |
| swvl1: 1 |
| stl2: 10 |
| stl1: 1 |
| ssrd: 0.05 |
| strd: 0.1 |
| metrics: |
| - z_500 |
| - t_850 |
| - u_850 |
| - v_850 |
| pressure_level_scaler: |
| _target_: anemoi.training.data.scaling.ReluPressureLevelScaler |
| minimum: 0.2 |
| slope: 0.001 |
| node_loss_weights: |
| _target_: anemoi.training.losses.nodeweights.GraphNodeAttribute |
| target_nodes: ${graph.data} |
| node_attribute: area_weight |
| submodules_to_freeze: [] |
|
|