| # LILITH-Tiny Configuration | |
| # ~50M parameters, suitable for RTX 3060 inference | |
| model: | |
| variant: tiny | |
| hidden_dim: 128 | |
| num_heads: 4 | |
| ffn_dim: 512 | |
| # Input/Output | |
| input_features: 7 | |
| output_features: 3 | |
| sequence_length: 30 | |
| forecast_length: 90 | |
| # Component depths | |
| gat_layers: 2 | |
| temporal_layers: 4 | |
| sfno_layers: 2 | |
| # Grid configuration | |
| use_grid: true | |
| nlat: 32 | |
| nlon: 64 | |
| # Features | |
| use_climate_embed: true | |
| use_solar_position: true | |
| use_flash_attention: true | |
| use_rope: true | |
| # Ensemble | |
| ensemble_method: gaussian | |
| ensemble_members: 10 | |
| # Regularization | |
| dropout: 0.1 | |
| # Memory optimization | |
| gradient_checkpointing: false | |
| training: | |
| learning_rate: 2e-4 | |
| weight_decay: 0.01 | |
| max_grad_norm: 1.0 | |
| warmup_steps: 500 | |
| max_steps: 50000 | |
| batch_size: 16 | |
| gradient_accumulation_steps: 2 | |
| use_amp: true | |
| amp_dtype: float16 | |
| curriculum_enabled: true | |
| curriculum_stages: [7, 14, 30, 60, 90] | |
| curriculum_switch_steps: [5000, 15000, 30000, 40000] | |
| inference: | |
| quantization: dynamic_int8 | |
| batch_size: 32 | |
| max_stations: 100 | |