# LILITH-Base Configuration # ~150M parameters, balanced performance model: variant: base hidden_dim: 256 num_heads: 8 ffn_dim: 1024 # Input/Output input_features: 7 output_features: 3 sequence_length: 30 forecast_length: 90 # Component depths gat_layers: 3 temporal_layers: 6 sfno_layers: 4 # Grid configuration use_grid: true nlat: 64 nlon: 128 # Features use_climate_embed: true use_solar_position: true use_flash_attention: true use_rope: true # Ensemble ensemble_method: gaussian ensemble_members: 10 # Regularization dropout: 0.1 # Memory optimization gradient_checkpointing: true training: learning_rate: 1e-4 weight_decay: 0.01 max_grad_norm: 1.0 warmup_steps: 1000 max_steps: 100000 batch_size: 8 gradient_accumulation_steps: 4 use_amp: true amp_dtype: float16 curriculum_enabled: true curriculum_stages: [7, 14, 30, 60, 90] curriculum_switch_steps: [10000, 30000, 60000, 80000] inference: quantization: dynamic_int8 batch_size: 16 max_stations: 200