| # LILITH-Base Configuration | |
| # ~150M parameters, balanced performance | |
| model: | |
| variant: base | |
| hidden_dim: 256 | |
| num_heads: 8 | |
| ffn_dim: 1024 | |
| # Input/Output | |
| input_features: 7 | |
| output_features: 3 | |
| sequence_length: 30 | |
| forecast_length: 90 | |
| # Component depths | |
| gat_layers: 3 | |
| temporal_layers: 6 | |
| sfno_layers: 4 | |
| # Grid configuration | |
| use_grid: true | |
| nlat: 64 | |
| nlon: 128 | |
| # Features | |
| use_climate_embed: true | |
| use_solar_position: true | |
| use_flash_attention: true | |
| use_rope: true | |
| # Ensemble | |
| ensemble_method: gaussian | |
| ensemble_members: 10 | |
| # Regularization | |
| dropout: 0.1 | |
| # Memory optimization | |
| gradient_checkpointing: true | |
| training: | |
| learning_rate: 1e-4 | |
| weight_decay: 0.01 | |
| max_grad_norm: 1.0 | |
| warmup_steps: 1000 | |
| max_steps: 100000 | |
| batch_size: 8 | |
| gradient_accumulation_steps: 4 | |
| use_amp: true | |
| amp_dtype: float16 | |
| curriculum_enabled: true | |
| curriculum_stages: [7, 14, 30, 60, 90] | |
| curriculum_switch_steps: [10000, 30000, 60000, 80000] | |
| inference: | |
| quantization: dynamic_int8 | |
| batch_size: 16 | |
| max_stations: 200 | |