yx21e commited on 2 days ago

Commit

d3bbb53

verified ·

1 Parent(s): d3bc17d

Remove manuscript TeX and table source artifacts

Browse files

Files changed (41) hide show

README.md +6 -7
artifacts/manifests/paper_outputs.sha256 +0 -20
artifacts/manifests/paper_outputs.yml +1 -41
artifacts/results/fireprone_contract_progression_table.generated.tex +0 -69
artifacts/results/selection_regret_full_head_table.generated.tex +0 -2
artifacts/results/selection_regret_main_table.generated.tex +0 -24
artifacts/results/selection_regret_scope_sweep_20260505.generated.tex +0 -24
artifacts/results/selection_regret_tolerance_family_table.generated.tex +0 -2
docs/artifact_map.md +19 -44
docs/huggingface_release_design.md +10 -4
paper/main.tex +0 -141
paper/manuscript_final.pdf +0 -3
paper/references.bib +0 -465
paper/sections/0_abstract.tex +0 -4
paper/sections/1_intro.tex +0 -77
paper/sections/2_backbone.tex +0 -39
paper/sections/3_prelim.tex +0 -84
paper/sections/4_experiments.tex +0 -435
paper/sections/5_conclusion.tex +0 -31
paper/sections/appendix.tex +0 -733
paper_outputs/figures/fig_selection_regret_rq2.tikz +0 -120
paper_outputs/tables/tab_app_analog_rank_depth.tex +0 -24
paper_outputs/tables/tab_app_burned_area_median_acre.tex +0 -24
paper_outputs/tables/tab_app_contract_params_full.tex +0 -22
paper_outputs/tables/tab_app_head_architectures.tex +0 -36
paper_outputs/tables/tab_app_heat_event_pr.tex +0 -24
paper_outputs/tables/tab_app_matching_rule_params.tex +0 -17
paper_outputs/tables/tab_app_occupancy_ppr_scope.tex +0 -27
paper_outputs/tables/tab_app_scope_params.tex +0 -19
paper_outputs/tables/tab_app_seed_robustness.tex +0 -36
paper_outputs/tables/tab_app_smoke_high_event.tex +0 -24
paper_outputs/tables/tab_app_spread_ap_by_scope.tex +0 -24
paper_outputs/tables/tab_appendix_selection_regret_tolerance.tex +0 -2
paper_outputs/tables/tab_fireprone_contract_progression.tex +0 -69
paper_outputs/tables/tab_primary_results.tex +0 -62
paper_outputs/tables/tab_selection_regret_full_head.tex +0 -2
paper_outputs/tables/tab_selection_regret_scope.tex +0 -24
paper_outputs/tables/tab_selection_regret_scope_sweep.tex +0 -24
paper_outputs/tables/tab_supporting_results.tex +0 -120
scripts/audit_release.py +7 -13
scripts/reproduce_paper_outputs.py +4 -6

README.md CHANGED Viewed

@@ -17,7 +17,7 @@ pretty_name: WildFIRE-FM
 ![WildFIRE-FM summary](assets/wildfire_fm_model_card.svg)
-**WildFIRE-FM** is a wildfire-specialized regional reference backbone for 12-hour gridded wildfire occupancy prediction on a 5 km California grid. It is released with five seeded PyTorch checkpoints, model code, final-paper artifacts, and data-source notes. The raw data are **not** redistributed.
 The model is intended as a reproducible reference backbone for fixed-contract wildfire evaluation, not as a general global wildfire forecasting product. It was trained with regional weather, active-fire supervision, static fuel/canopy/exposure layers, and event-level wildfire resources used by supporting tasks in the paper.
@@ -29,7 +29,7 @@ The model is intended as a reproducible reference backbone for fixed-contract wi
 **Model code.** The compact U-Net definition is provided in `models/wildfire_fm/modeling_unet.py`, with a short loading example below.
-**Paper artifacts.** The final manuscript PDF and the final paper figures/tables are included under `paper/` and `paper_outputs/`. Compact CSV/JSON summaries are under `artifacts/results/`.
 **Data notes.** Data sources and access entry points are documented in `data_sources/DATA_SOURCES.md`; users must obtain source data from the original providers.
@@ -80,7 +80,7 @@ The paper evaluates WildFIRE-FM and ten Earth-FM comparators under fixed task co
 - **Smoke PM2.5 RMSE:** `4.4646 ± 0.0060`, where lower is better.
 - **Extreme-heat RMSE-C:** `0.2179 ± 0.0043`, where lower is better.
-The full final-paper tables are included as TeX blocks under `paper_outputs/tables/`.
 ### Fixed-Contract Checks From The Final Paper
@@ -111,7 +111,7 @@ See `data_sources/DATA_SOURCES.md` for source roles and access links.
 ## Reproducing Released Paper Outputs
-The lightweight check verifies the released final-paper artifacts from compact summaries. It does not require raw data or GPUs.
 ```bash
 python3 scripts/reproduce_paper_outputs.py
@@ -123,9 +123,8 @@ Full raw-data reruns require separately downloaded source data, local feature ca
 ```text
 models/wildfire_fm/        model code, manifests, and checkpoint metadata
-paper/                     final manuscript PDF and LaTeX source snapshot
-paper_outputs/             final paper figures and TeX table blocks
-artifacts/results/         compact CSV/JSON summaries for released outputs
 experiments/               sanitized raw-rerun references and Slurm template
 data_sources/              source-data roles and access notes
 scripts/                   artifact verification and figure/table rebuild helpers

 ![WildFIRE-FM summary](assets/wildfire_fm_model_card.svg)
+**WildFIRE-FM** is a wildfire-specialized regional reference backbone for 12-hour gridded wildfire occupancy prediction on a 5 km California grid. It is released with five seeded PyTorch checkpoints, model code, final-paper figure previews, numeric summaries, and data-source notes. The raw data are **not** redistributed.
 The model is intended as a reproducible reference backbone for fixed-contract wildfire evaluation, not as a general global wildfire forecasting product. It was trained with regional weather, active-fire supervision, static fuel/canopy/exposure layers, and event-level wildfire resources used by supporting tasks in the paper.
 **Model code.** The compact U-Net definition is provided in `models/wildfire_fm/modeling_unet.py`, with a short loading example below.
+**Evaluation artifacts.** Final-paper figure previews and sanitized compact CSV/JSON summaries are included under `assets/`, `paper_outputs/`, and `artifacts/results/`. Manuscript TeX/PDF files are intentionally not included in this model release.
 **Data notes.** Data sources and access entry points are documented in `data_sources/DATA_SOURCES.md`; users must obtain source data from the original providers.
 - **Smoke PM2.5 RMSE:** `4.4646 ± 0.0060`, where lower is better.
 - **Extreme-heat RMSE-C:** `0.2179 ± 0.0043`, where lower is better.
+The public release includes sanitized CSV/JSON summaries used to audit the displayed values. Manuscript table TeX is not included.
 ### Fixed-Contract Checks From The Final Paper
 ## Reproducing Released Paper Outputs
+The lightweight check verifies the released sanitized artifacts from compact summaries. It does not require raw data or GPUs.
 ```bash
 python3 scripts/reproduce_paper_outputs.py
 ```text
 models/wildfire_fm/        model code, manifests, and checkpoint metadata
+paper_outputs/             final-paper figure PDFs retained for reproducibility
+artifacts/results/         sanitized compact CSV/JSON summaries for released outputs
 experiments/               sanitized raw-rerun references and Slurm template
 data_sources/              source-data roles and access notes
 scripts/                   artifact verification and figure/table rebuild helpers

artifacts/manifests/paper_outputs.sha256 CHANGED Viewed

@@ -4,31 +4,11 @@ ca11c75c03078a9be26421b527ab5a49f5fc43ce8e5edd7da14af120a247b67c  assets/primary
 5552fb6cca6a0a683592e724b4bd562f923cf99c04e2abdb846546b1d67aecc4  assets/selection_regret_final.png
 34807e65ca71365a26a3b74cae70e6b40ae6f2151110e12c53e0efa9f8b726aa  assets/supporting_rank_map_final.png
 024505248c8ba2bbb50d36d0b015d7fd7fbf5577b8b34faadda0efc972c6d3e8  assets/wildfire_fm_model_card.svg
-c342978b2f0f25cf6e430b860702895bbb3b512145c8c6e38aa2233b416d835e  paper/manuscript_final.pdf
 b369d13e0419fa8272ccdc994b6642f3b141248a879c030218e387c583537eb2  paper_outputs/figures/fig_fireprone_contract_progression_compact.pdf
 e3110c70c3cf8ecb8671163a401a155920266e3f907f9c6baf08e27ec6e6c410  paper_outputs/figures/fig_primary_rank_change_map.pdf
 4e5b791ba4d136f722bd75a61097203836819ce9411def1caac4cc1e6d881275  paper_outputs/figures/fig_rank_heatmap1.pdf
-b2e56403e2774c457dd12c4685e2dc7492e22e32df46fcc5c37b3087110f2439  paper_outputs/figures/fig_selection_regret_rq2.tikz
 fabb8b55aac901199cc03773741a26685becffd074f52568c93bee517c2c42c0  paper_outputs/figures/fig_selection_regret_scatter.pdf
 bc4d35ad9cb4c1f9ba8f31c7c340d9684c9dd2d55f5a2e60604a2b58b90cbe40  paper_outputs/figures/fig_task_contract_tiles.pdf
 c382f5d69f25cc2f5db174601a33d0fd0928b44910a2a4b1c131954bd42113d9  paper_outputs/figures/fig_task_rank_map.pdf
 015ab951b0af5c130e4894092a5dd0bb0fd62e710467163a9df8246d8cf369f4  paper_outputs/figures/matching.pdf
 7dca6ae4a9b179693802f47d24dd66734c0f332b372a7976832a0d429333b755  paper_outputs/figures/overview_wildfire.pdf
-e8abbd2668517f5cae14933ed943fe103e74132886b0ff48ecd1685978549504  paper_outputs/tables/tab_app_analog_rank_depth.tex
-81db28aace3366625f1cfd5935892eb5af672d5ecd8327e6dcba00b7b04e2b3c  paper_outputs/tables/tab_app_burned_area_median_acre.tex
-4a93401ef355c02eb0cc6b2e9a1506f9ed9d912301ec6829581247e40991bdfb  paper_outputs/tables/tab_app_contract_params_full.tex
-3c5398c28e6243b1784b27d2e9eab1a5c60e6e6d2cfd14a79aa6fd1e0499b871  paper_outputs/tables/tab_app_head_architectures.tex
-f740b8f076490e852efa88fa8180ca08bb6b12901ff3ec3687c7e5c0b236da4e  paper_outputs/tables/tab_app_heat_event_pr.tex
-86e97a394ceae8cc6eafd6d1021b44d13a117378ead87bfee662cc90a1e0e54b  paper_outputs/tables/tab_app_matching_rule_params.tex
-0b1ad4587dd440fdabf771000b1c971daa9222e946a3404c9beae10dd7ea67c6  paper_outputs/tables/tab_app_occupancy_ppr_scope.tex
-4e79672c28a938cd9ba1bc0e423e7169eca389251a22357aff6fe84d3cbfa889  paper_outputs/tables/tab_app_scope_params.tex
-6850ee131e203f66392c79f17f59214672b362274f42285b252b83ac0ede1eb3  paper_outputs/tables/tab_app_seed_robustness.tex
-1ca91ca451f846e59cb62ea64a616780c698b9dee80918a05467bd6c40df2dd5  paper_outputs/tables/tab_app_smoke_high_event.tex
-cd65372622e8dd388adb1122a3e93b22d2090fba836405b08a078d5159b182de  paper_outputs/tables/tab_app_spread_ap_by_scope.tex
-2b168c92af29ae40c324e9660d48177ea0c79e4559a3c2aa571d53043ee83b53  paper_outputs/tables/tab_appendix_selection_regret_tolerance.tex
-c822daa85e29dde4ac92b4be34f4d41040fa04da3a2674bdc4d0494dbaaceb69  paper_outputs/tables/tab_fireprone_contract_progression.tex
-6672c62a150d83a351f4fa23ac04537d9aaae01af6056f689437d9b7d8bcee40  paper_outputs/tables/tab_primary_results.tex
-d11d82273acb389b46c8fc1d15c1e37f1f90332ae9d1fb7b8eb5ff0f8847dc2d  paper_outputs/tables/tab_selection_regret_full_head.tex
-11f230e0462ded2821f3d5d45421d8b8278b61695d76799246d2e8bf873e2789  paper_outputs/tables/tab_selection_regret_scope.tex
-3b1277700ececdbb4107667a5d4166a75224a84282810ec5d21bbf2ebc7fa163  paper_outputs/tables/tab_selection_regret_scope_sweep.tex
-717555b2584658c936aa8fc27b63f1068dc5f796a297bcef0576cf020b3ddaf8  paper_outputs/tables/tab_supporting_results.tex

 5552fb6cca6a0a683592e724b4bd562f923cf99c04e2abdb846546b1d67aecc4  assets/selection_regret_final.png
 34807e65ca71365a26a3b74cae70e6b40ae6f2151110e12c53e0efa9f8b726aa  assets/supporting_rank_map_final.png
 024505248c8ba2bbb50d36d0b015d7fd7fbf5577b8b34faadda0efc972c6d3e8  assets/wildfire_fm_model_card.svg
 b369d13e0419fa8272ccdc994b6642f3b141248a879c030218e387c583537eb2  paper_outputs/figures/fig_fireprone_contract_progression_compact.pdf
 e3110c70c3cf8ecb8671163a401a155920266e3f907f9c6baf08e27ec6e6c410  paper_outputs/figures/fig_primary_rank_change_map.pdf
 4e5b791ba4d136f722bd75a61097203836819ce9411def1caac4cc1e6d881275  paper_outputs/figures/fig_rank_heatmap1.pdf
 fabb8b55aac901199cc03773741a26685becffd074f52568c93bee517c2c42c0  paper_outputs/figures/fig_selection_regret_scatter.pdf
 bc4d35ad9cb4c1f9ba8f31c7c340d9684c9dd2d55f5a2e60604a2b58b90cbe40  paper_outputs/figures/fig_task_contract_tiles.pdf
 c382f5d69f25cc2f5db174601a33d0fd0928b44910a2a4b1c131954bd42113d9  paper_outputs/figures/fig_task_rank_map.pdf
 015ab951b0af5c130e4894092a5dd0bb0fd62e710467163a9df8246d8cf369f4  paper_outputs/figures/matching.pdf
 7dca6ae4a9b179693802f47d24dd66734c0f332b372a7976832a0d429333b755  paper_outputs/figures/overview_wildfire.pdf

artifacts/manifests/paper_outputs.yml CHANGED Viewed

@@ -1,4 +1,4 @@
-# Auto-generated output manifest for the Hugging Face release.
 outputs:
   - path: assets/overview_final.png
     sha256: 6db4f4aff90da8709edff97e4782aee9b4c5e8feefec7c7431a4ec8787cfe57c
@@ -12,16 +12,12 @@ outputs:
     sha256: 34807e65ca71365a26a3b74cae70e6b40ae6f2151110e12c53e0efa9f8b726aa
   - path: assets/wildfire_fm_model_card.svg
     sha256: 024505248c8ba2bbb50d36d0b015d7fd7fbf5577b8b34faadda0efc972c6d3e8
-  - path: paper/manuscript_final.pdf
-    sha256: c342978b2f0f25cf6e430b860702895bbb3b512145c8c6e38aa2233b416d835e
   - path: paper_outputs/figures/fig_fireprone_contract_progression_compact.pdf
     sha256: b369d13e0419fa8272ccdc994b6642f3b141248a879c030218e387c583537eb2
   - path: paper_outputs/figures/fig_primary_rank_change_map.pdf
     sha256: e3110c70c3cf8ecb8671163a401a155920266e3f907f9c6baf08e27ec6e6c410
   - path: paper_outputs/figures/fig_rank_heatmap1.pdf
     sha256: 4e5b791ba4d136f722bd75a61097203836819ce9411def1caac4cc1e6d881275
-  - path: paper_outputs/figures/fig_selection_regret_rq2.tikz
-    sha256: b2e56403e2774c457dd12c4685e2dc7492e22e32df46fcc5c37b3087110f2439
   - path: paper_outputs/figures/fig_selection_regret_scatter.pdf
     sha256: fabb8b55aac901199cc03773741a26685becffd074f52568c93bee517c2c42c0
   - path: paper_outputs/figures/fig_task_contract_tiles.pdf
@@ -32,39 +28,3 @@ outputs:
     sha256: 015ab951b0af5c130e4894092a5dd0bb0fd62e710467163a9df8246d8cf369f4
   - path: paper_outputs/figures/overview_wildfire.pdf
     sha256: 7dca6ae4a9b179693802f47d24dd66734c0f332b372a7976832a0d429333b755
-  - path: paper_outputs/tables/tab_app_analog_rank_depth.tex
-    sha256: e8abbd2668517f5cae14933ed943fe103e74132886b0ff48ecd1685978549504
-  - path: paper_outputs/tables/tab_app_burned_area_median_acre.tex
-    sha256: 81db28aace3366625f1cfd5935892eb5af672d5ecd8327e6dcba00b7b04e2b3c
-  - path: paper_outputs/tables/tab_app_contract_params_full.tex
-    sha256: 4a93401ef355c02eb0cc6b2e9a1506f9ed9d912301ec6829581247e40991bdfb
-  - path: paper_outputs/tables/tab_app_head_architectures.tex
-    sha256: 3c5398c28e6243b1784b27d2e9eab1a5c60e6e6d2cfd14a79aa6fd1e0499b871
-  - path: paper_outputs/tables/tab_app_heat_event_pr.tex
-    sha256: f740b8f076490e852efa88fa8180ca08bb6b12901ff3ec3687c7e5c0b236da4e
-  - path: paper_outputs/tables/tab_app_matching_rule_params.tex
-    sha256: 86e97a394ceae8cc6eafd6d1021b44d13a117378ead87bfee662cc90a1e0e54b
-  - path: paper_outputs/tables/tab_app_occupancy_ppr_scope.tex
-    sha256: 0b1ad4587dd440fdabf771000b1c971daa9222e946a3404c9beae10dd7ea67c6
-  - path: paper_outputs/tables/tab_app_scope_params.tex
-    sha256: 4e79672c28a938cd9ba1bc0e423e7169eca389251a22357aff6fe84d3cbfa889
-  - path: paper_outputs/tables/tab_app_seed_robustness.tex
-    sha256: 6850ee131e203f66392c79f17f59214672b362274f42285b252b83ac0ede1eb3
-  - path: paper_outputs/tables/tab_app_smoke_high_event.tex
-    sha256: 1ca91ca451f846e59cb62ea64a616780c698b9dee80918a05467bd6c40df2dd5
-  - path: paper_outputs/tables/tab_app_spread_ap_by_scope.tex
-    sha256: cd65372622e8dd388adb1122a3e93b22d2090fba836405b08a078d5159b182de
-  - path: paper_outputs/tables/tab_appendix_selection_regret_tolerance.tex
-    sha256: 2b168c92af29ae40c324e9660d48177ea0c79e4559a3c2aa571d53043ee83b53
-  - path: paper_outputs/tables/tab_fireprone_contract_progression.tex
-    sha256: c822daa85e29dde4ac92b4be34f4d41040fa04da3a2674bdc4d0494dbaaceb69
-  - path: paper_outputs/tables/tab_primary_results.tex
-    sha256: 6672c62a150d83a351f4fa23ac04537d9aaae01af6056f689437d9b7d8bcee40
-  - path: paper_outputs/tables/tab_selection_regret_full_head.tex
-    sha256: d11d82273acb389b46c8fc1d15c1e37f1f90332ae9d1fb7b8eb5ff0f8847dc2d
-  - path: paper_outputs/tables/tab_selection_regret_scope.tex
-    sha256: 11f230e0462ded2821f3d5d45421d8b8278b61695d76799246d2e8bf873e2789
-  - path: paper_outputs/tables/tab_selection_regret_scope_sweep.tex
-    sha256: 3b1277700ececdbb4107667a5d4166a75224a84282810ec5d21bbf2ebc7fa163
-  - path: paper_outputs/tables/tab_supporting_results.tex
-    sha256: 717555b2584658c936aa8fc27b63f1068dc5f796a297bcef0576cf020b3ddaf8

+# Auto-generated public-output manifest for the Hugging Face release.
 outputs:
   - path: assets/overview_final.png
     sha256: 6db4f4aff90da8709edff97e4782aee9b4c5e8feefec7c7431a4ec8787cfe57c
     sha256: 34807e65ca71365a26a3b74cae70e6b40ae6f2151110e12c53e0efa9f8b726aa
   - path: assets/wildfire_fm_model_card.svg
     sha256: 024505248c8ba2bbb50d36d0b015d7fd7fbf5577b8b34faadda0efc972c6d3e8
   - path: paper_outputs/figures/fig_fireprone_contract_progression_compact.pdf
     sha256: b369d13e0419fa8272ccdc994b6642f3b141248a879c030218e387c583537eb2
   - path: paper_outputs/figures/fig_primary_rank_change_map.pdf
     sha256: e3110c70c3cf8ecb8671163a401a155920266e3f907f9c6baf08e27ec6e6c410
   - path: paper_outputs/figures/fig_rank_heatmap1.pdf
     sha256: 4e5b791ba4d136f722bd75a61097203836819ce9411def1caac4cc1e6d881275
   - path: paper_outputs/figures/fig_selection_regret_scatter.pdf
     sha256: fabb8b55aac901199cc03773741a26685becffd074f52568c93bee517c2c42c0
   - path: paper_outputs/figures/fig_task_contract_tiles.pdf
     sha256: 015ab951b0af5c130e4894092a5dd0bb0fd62e710467163a9df8246d8cf369f4
   - path: paper_outputs/figures/overview_wildfire.pdf
     sha256: 7dca6ae4a9b179693802f47d24dd66734c0f332b372a7976832a0d429333b755

artifacts/results/fireprone_contract_progression_table.generated.tex DELETED Viewed

@@ -1,69 +0,0 @@
-\begin{table*}[t]
-    \centering
-    \scriptsize
-    \setlength{\tabcolsep}{4pt}
-    \caption{Occupancy scores across global and fire-prone scopes. Global uses the full validation/test domain; top-\(k\) rows use train-defined fire-prone masks from historical fire frequency. Values are \(F_1\) percentages from the same validation-selected strict threshold. Tolerance is spatial-only; union adds temporal and spatial matching. Difference is union minus strict. Rows report five-seed mean with small std. Values use four decimals.}
-    \label{tab:fireprone_contract_progression}
-    \begin{adjustbox}{max width=\textwidth}
-    \begin{tabular}{@{}llcccc@{}}
-        \toprule
-        Backbone & Scope & Strict \(F_1\uparrow\) & Tolerance \(F_1\uparrow\) & Union \(F_1\uparrow\) & Difference \(\uparrow\) \\
-        \midrule
-        \textcolor{blue}{FireWx-FM ref.} & global & \ms{0.4550}{0.1410} & \ms{29.7480}{1.2870} & \ms{59.0660}{2.7370} & \ms{58.6110}{2.6950} \\
-         & top 5\% & \ms{3.5600}{0.8810} & \ms{39.2620}{1.4010} & \ms{72.8280}{2.5780} & \ms{69.2680}{1.9960} \\
-         & top 10\% & \ms{3.5580}{0.8800} & \ms{39.1660}{1.3910} & \ms{72.5200}{2.5670} & \ms{68.9630}{1.9890} \\
-         & top 20\% & \ms{3.5300}{0.8700} & \ms{38.2850}{1.2950} & \ms{69.7230}{2.4660} & \ms{66.1930}{1.9270} \\
-        \addlinespace[1pt]
-        Prithvi-WxC & global & \ms{0.0550}{0.0040} & \ms{7.1600}{0.6600} & \ms{20.1900}{1.8300} & \ms{20.1300}{1.8300} \\
-         & top 5\% & \ms{1.4100}{1.1600} & \ms{19.2600}{4.5000} & \ms{42.5800}{4.5500} & \ms{41.1700}{3.4800} \\
-         & top 10\% & \ms{1.2400}{1.3200} & \ms{14.8800}{8.4400} & \ms{32.6900}{13.2100} & \ms{31.4500}{11.9100} \\
-         & top 20\% & \ms{1.1500}{1.3800} & \ms{13.1500}{9.4600} & \ms{28.1300}{15.2900} & \ms{26.9800}{13.9200} \\
-        \addlinespace[1pt]
-        Aurora & global & \ms{0.0700}{0.0100} & \ms{8.5000}{1.9600} & \ms{23.1000}{4.9400} & \ms{23.0400}{4.9300} \\
-         & top 5\% & \ms{0.9900}{0.9300} & \ms{15.1300}{6.0800} & \ms{35.4800}{11.0200} & \ms{34.5000}{10.3700} \\
-         & top 10\% & \ms{0.7800}{1.0500} & \ms{12.7400}{6.5600} & \ms{30.5300}{10.8800} & \ms{29.7500}{9.8700} \\
-         & top 20\% & \ms{0.6700}{1.1000} & \ms{10.5300}{7.4300} & \ms{24.9400}{12.5800} & \ms{24.2800}{11.4900} \\
-        \addlinespace[1pt]
-        ClimaX & global & \ms{0.3500}{0.0800} & \ms{29.7500}{3.6100} & \ms{60.1500}{7.5900} & \ms{59.8000}{7.5500} \\
-         & top 5\% & \ms{1.2900}{0.1100} & \ms{34.5800}{2.3800} & \ms{69.2200}{5.7200} & \ms{67.9200}{5.7300} \\
-         & top 10\% & \ms{1.2500}{0.1600} & \ms{34.3300}{2.2900} & \ms{68.5700}{5.5400} & \ms{67.3200}{5.5500} \\
-         & top 20\% & \ms{1.0300}{0.2700} & \ms{30.2100}{4.2900} & \ms{60.0600}{7.5700} & \ms{59.0400}{7.5900} \\
-        \addlinespace[1pt]
-        StormCast & global & \ms{0.0560}{0.0110} & \ms{8.2000}{2.1900} & \ms{22.3800}{5.4300} & \ms{22.3200}{5.4200} \\
-         & top 5\% & \ms{0.9600}{0.8000} & \ms{15.3200}{5.5300} & \ms{36.1900}{9.7300} & \ms{35.2300}{9.1800} \\
-         & top 10\% & \ms{0.7300}{0.9300} & \ms{12.6700}{6.3300} & \ms{30.4700}{10.6500} & \ms{29.7500}{9.7500} \\
-         & top 20\% & \ms{0.5800}{0.9100} & \ms{10.4200}{7.3400} & \ms{24.6600}{12.4000} & \ms{24.0800}{11.5000} \\
-        \addlinespace[1pt]
-        AlphaEarth & global & \ms{2.0600}{0.4400} & \ms{29.4500}{6.0100} & \ms{37.4300}{9.9500} & \ms{35.3700}{10.0300} \\
-         & top 5\% & \ms{6.9100}{0.8500} & \ms{42.8800}{4.6100} & \ms{51.7400}{8.7300} & \ms{44.8300}{9.0800} \\
-         & top 10\% & \ms{6.6400}{0.9900} & \ms{41.9000}{5.9500} & \ms{50.5700}{10.0100} & \ms{43.9300}{9.9200} \\
-         & top 20\% & \ms{6.1900}{1.1300} & \ms{38.8300}{7.5000} & \ms{46.3800}{12.1700} & \ms{40.1900}{11.6800} \\
-        \addlinespace[1pt]
-        DLWP & global & \ms{0.1700}{0.0400} & \ms{14.9100}{3.2400} & \ms{28.1900}{6.9700} & \ms{28.0200}{6.9300} \\
-         & top 5\% & \ms{1.8100}{0.4800} & \ms{31.7200}{3.2900} & \ms{55.4600}{5.2900} & \ms{53.6500}{5.4800} \\
-         & top 10\% & \ms{1.6100}{0.6000} & \ms{27.6600}{5.9200} & \ms{47.1300}{8.0100} & \ms{45.5200}{7.7900} \\
-         & top 20\% & \ms{1.5200}{0.9000} & \ms{20.9400}{4.8000} & \ms{34.9300}{7.8500} & \ms{33.4100}{7.8800} \\
-        \addlinespace[1pt]
-        FCN & global & \ms{0.2800}{0.0800} & \ms{19.5100}{3.3400} & \ms{40.0600}{9.3700} & \ms{39.7800}{9.3400} \\
-         & top 5\% & \ms{1.6200}{0.5100} & \ms{29.3800}{2.7600} & \ms{54.3000}{7.4100} & \ms{52.6800}{7.4400} \\
-         & top 10\% & \ms{1.1800}{0.5100} & \ms{22.4200}{3.9800} & \ms{43.4500}{9.2500} & \ms{42.2700}{9.0300} \\
-         & top 20\% & \ms{1.0000}{0.4300} & \ms{16.9800}{3.9400} & \ms{34.0900}{8.2600} & \ms{33.0900}{7.9300} \\
-        \addlinespace[1pt]
-        FengWu & global & \ms{0.2600}{0.0800} & \ms{12.0000}{6.0200} & \ms{24.1000}{13.6300} & \ms{23.8400}{13.5700} \\
-         & top 5\% & \ms{1.5700}{0.3600} & \ms{16.2800}{3.7000} & \ms{30.1100}{5.0100} & \ms{28.5400}{4.7700} \\
-         & top 10\% & \ms{1.2400}{0.5300} & \ms{12.9500}{5.6100} & \ms{24.1900}{8.6900} & \ms{22.9400}{8.1900} \\
-         & top 20\% & \ms{1.1200}{0.5000} & \ms{11.9500}{5.0700} & \ms{22.7900}{7.9100} & \ms{21.6700}{7.4400} \\
-        \addlinespace[1pt]
-        FuXi & global & \ms{0.3800}{0.1200} & \ms{21.0300}{4.8200} & \ms{37.2900}{9.4500} & \ms{36.9100}{9.4300} \\
-         & top 5\% & \ms{2.0300}{0.6800} & \ms{31.8900}{4.7300} & \ms{53.9300}{8.3800} & \ms{51.9000}{8.6900} \\
-         & top 10\% & \ms{1.6500}{0.7300} & \ms{24.0100}{5.7800} & \ms{40.2100}{9.9300} & \ms{38.5600}{9.7700} \\
-         & top 20\% & \ms{1.3600}{0.6800} & \ms{21.9500}{5.8600} & \ms{36.7300}{10.0300} & \ms{35.3700}{9.9200} \\
-        \addlinespace[1pt]
-        Pangu-Weather & global & \ms{0.2800}{0.1100} & \ms{17.0900}{4.0500} & \ms{35.6400}{9.0300} & \ms{35.3600}{9.0800} \\
-         & top 5\% & \ms{1.3700}{0.3100} & \ms{22.2200}{6.8600} & \ms{43.4200}{13.2400} & \ms{42.0600}{13.0600} \\
-         & top 10\% & \ms{1.0900}{0.3500} & \ms{18.9300}{5.9300} & \ms{38.5300}{11.7200} & \ms{37.4400}{11.5300} \\
-         & top 20\% & \ms{0.8800}{0.3600} & \ms{17.0200}{5.4900} & \ms{34.5700}{10.2900} & \ms{33.6800}{10.1300} \\
-        \bottomrule
-    \end{tabular}
-    \end{adjustbox}
-\end{table*}

artifacts/results/selection_regret_full_head_table.generated.tex DELETED Viewed

	@@ -1,2 +0,0 @@
1	- % Full per-head rows are kept in the supplementary CSV files.
2	- % The manuscript uses the all-backbone selection-regret summaries instead.

artifacts/results/selection_regret_main_table.generated.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[!t]
-    \centering
-    \small
-    \setlength{\tabcolsep}{4pt}
-    \caption{Fixed-feature selection-regret check across evaluation scopes. Values are percentage-point regret \(\delta = D(h_D)-D(h_R)\) under union-\(F_1\), where \(h_R\) is selected by PR-AUC and \(h_D\) by the decision metric. Top-\(k\) columns use train-defined fire-prone scopes. Rows report mean with small std over five seeds; \(0.0000\) means the two selectors give the same decision score for all seeds.}
-    \label{tab:selection_regret_diagnostic}
-    \begin{tabular}{lcccc}
-        \toprule
-        \textbf{Feature source} & \textbf{\(\Omega=\)global} & \textbf{\(\Omega=\)top 5\%} & \textbf{\(\Omega=\)top 10\%} & \textbf{\(\Omega=\)top 20\%} \\
-        \midrule
-        \textcolor{blue}{FireWx-FM ref.} & \ms{7.3831}{7.4536} & \ms{0.3664}{0.6812} & \ms{1.2275}{1.2665} & \ms{2.9385}{2.7513} \\
-        Prithvi-WxC & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        Aurora & \ms{4.9455}{10.6974} & \ms{15.4283}{34.4987} & \ms{13.9934}{31.2903} & \ms{14.3706}{32.1337} \\
-        ClimaX & \ms{0.1296}{0.1775} & 0.0000 & 0.0000 & 0.0000 \\
-        StormCast & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        DLWP & 0.0000 & \ms{1.6716}{1.6079} & \ms{2.8465}{2.6938} & \ms{4.4634}{4.3561} \\
-        FCN & 0.0000 & \ms{0.4510}{1.0071} & \ms{0.4200}{0.9390} & \ms{1.1680}{1.9872} \\
-        FengWu & 0.0000 & \ms{0.8796}{0.5532} & \ms{0.4023}{0.5511} & \ms{0.5222}{0.6239} \\
-        FuXi & 0.0000 & \ms{1.3545}{2.0970} & \ms{0.1656}{0.3703} & \ms{0.2833}{0.3681} \\
-        Pangu-Weather & 0.0000 & \ms{0.7593}{0.8974} & \ms{0.3048}{0.5054} & \ms{0.1868}{0.3255} \\
-        AlphaEarth & \ms{17.2217}{8.8492} & \ms{6.3846}{4.9653} & \ms{6.5738}{6.8970} & \ms{3.8804}{5.9483} \\
-        \bottomrule
-    \end{tabular}
-\end{table*}

artifacts/results/selection_regret_scope_sweep_20260505.generated.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[!t]
-    \centering
-    \small
-    \setlength{\tabcolsep}{4pt}
-    \caption{Fixed-feature selection-regret sweep across evaluation scopes. Values are percentage-point regret \(\delta = D(h_D)-D(h_R)\) under union-\(F_1\). Top-\(k\) scopes are train-defined fire-prone masks. Rows report mean with small std over five seeds.}
-    \label{tab:selection_regret_scope_sweep}
-    \begin{tabular}{lcccc}
-        \toprule
-        \textbf{Feature source} & \textbf{\(\Omega=\)global} & \textbf{\(\Omega=\)top 5\%} & \textbf{\(\Omega=\)top 10\%} & \textbf{\(\Omega=\)top 20\%} \\
-        \midrule
-        \textcolor{blue}{FireWx-FM ref.} & \ms{7.3831}{7.4536} & \ms{0.3664}{0.6812} & \ms{1.2275}{1.2665} & \ms{2.9385}{2.7513} \\
-        Prithvi-WxC & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        Aurora & \ms{4.9455}{10.6974} & \ms{15.4283}{34.4987} & \ms{13.9934}{31.2903} & \ms{14.3706}{32.1337} \\
-        ClimaX & \ms{0.1296}{0.1775} & 0.0000 & 0.0000 & 0.0000 \\
-        StormCast & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        DLWP & 0.0000 & \ms{1.6716}{1.6079} & \ms{2.8465}{2.6938} & \ms{4.4634}{4.3561} \\
-        FCN & 0.0000 & \ms{0.4510}{1.0071} & \ms{0.4200}{0.9390} & \ms{1.1680}{1.9872} \\
-        FengWu & 0.0000 & \ms{0.8796}{0.5532} & \ms{0.4023}{0.5511} & \ms{0.5222}{0.6239} \\
-        FuXi & 0.0000 & \ms{1.3545}{2.0970} & \ms{0.1656}{0.3703} & \ms{0.2833}{0.3681} \\
-        Pangu-Weather & 0.0000 & \ms{0.7593}{0.8974} & \ms{0.3048}{0.5054} & \ms{0.1868}{0.3255} \\
-        AlphaEarth & \ms{17.2217}{8.8492} & \ms{6.3846}{4.9653} & \ms{6.5738}{6.8970} & \ms{3.8804}{5.9483} \\
-        \bottomrule
-    \end{tabular}
-\end{table*}

artifacts/results/selection_regret_tolerance_family_table.generated.tex DELETED Viewed

	@@ -1,2 +0,0 @@
1	- % Replaced by the all-backbone value table in sections/appendix.tex
2	- % (Table~\ref{tab:appendix_selection_regret_tolerance}).

docs/artifact_map.md CHANGED Viewed

@@ -1,56 +1,31 @@
-# Paper Artifact Map
-This map links every table and figure label in the current manuscript to the
-public release artifact and its provenance. Final output checksums are stored in
-`artifacts/manifests/paper_outputs.sha256`.
-## Figures
-| Paper label | Release file | Provenance |
 |---|---|---|
-| `fig:toy_occupancy_contract` | `paper_outputs/figures/matching.pdf` | Static vector schematic used by the manuscript. |
-| `fig:task_contract_tiles` | `paper_outputs/figures/fig_task_contract_tiles.pdf` | Static contract-map figure used by the manuscript. |
-| `fig:selection_regret_diagnostic` | `paper_outputs/figures/fig_selection_regret_rq2.tikz` | Rebuilt by `scripts/build_selection_regret_rq2_figure.py` from `artifacts/results/selection_regret_scope_sweep_20260505.csv`. |
-| `fig:fireprone_contract_progression` | `paper_outputs/figures/fig_fireprone_contract_progression_compact.pdf` | Rebuilt by `scripts/build_fireprone_contract_progression_figure.py` from `artifacts/results/fireprone_contract_progression_summary.json`. |
-| `fig:task_comparator_normalized_map` | `paper_outputs/figures/fig_task_rank_map.pdf` | Rebuilt by `scripts/build_task_rank_map.py` from `tab_primary_results.tex` and `tab_supporting_results.tex`. |
-## Main Tables
-| Paper label | Release file | Provenance |
-|---|---|---|
-| `tab:primary_results` | `paper_outputs/tables/tab_primary_results.tex` | Frozen paper-output TeX extracted from the current manuscript source and verified by checksum. Raw reruns require the task scripts and non-redistributed feature caches. |
-| `tab:supporting_results` | `paper_outputs/tables/tab_supporting_results.tex` | Frozen paper-output TeX extracted from the current manuscript source and verified by checksum. Raw reruns require the task scripts and non-redistributed feature caches. |
-## Appendix Tables
-| Paper label | Release file | Provenance |
-|---|---|---|
-| `tab:app_matching_rule_params` | `paper_outputs/tables/tab_app_matching_rule_params.tex` | Contract parameter table from manuscript source, verified by checksum. |
-| `tab:app_contract_params_full` | `paper_outputs/tables/tab_app_contract_params_full.tex` | Contract parameter table from manuscript source, verified by checksum. |
-| `tab:app_scope_params` | `paper_outputs/tables/tab_app_scope_params.tex` | Scope parameter table from manuscript source, verified by checksum. |
-| `tab:fireprone_contract_progression` | `paper_outputs/tables/tab_fireprone_contract_progression.tex` | Values from `artifacts/results/fireprone_contract_progression_summary.json`. |
-| `tab:appendix_selection_regret_tolerance` | `paper_outputs/tables/tab_appendix_selection_regret_tolerance.tex` | Values from selection-regret summary artifacts. |
-| `tab:app_occupancy_ppr_scope` | `paper_outputs/tables/tab_app_occupancy_ppr_scope.tex` | Values from `artifacts/results/fireprone_contract_progression_summary.json`. |
-| `tab:app_spread_ap_by_scope` | `paper_outputs/tables/tab_app_spread_ap_by_scope.tex` | Frozen paper-output TeX extracted from current manuscript source, verified by checksum. |
-| `tab:app_burned_area_median_acre` | `paper_outputs/tables/tab_app_burned_area_median_acre.tex` | Frozen paper-output TeX extracted from current manuscript source, verified by checksum. |
-| `tab:app_analog_rank_depth` | `paper_outputs/tables/tab_app_analog_rank_depth.tex` | Frozen paper-output TeX extracted from current manuscript source, verified by checksum. |
-| `tab:app_smoke_high_event` | `paper_outputs/tables/tab_app_smoke_high_event.tex` | Frozen paper-output TeX extracted from current manuscript source, verified by checksum. |
-| `tab:app_heat_event_pr` | `paper_outputs/tables/tab_app_heat_event_pr.tex` | Frozen paper-output TeX extracted from current manuscript source, verified by checksum. |
-| `tab:app_seed_robustness` | `paper_outputs/tables/tab_app_seed_robustness.tex` | Seed summary table from manuscript source, verified by checksum. |
-| `tab:app_head_architectures` | `paper_outputs/tables/tab_app_head_architectures.tex` | Architecture description table from manuscript source, verified by checksum. |
-## Reproduction Commands
 ```bash
 python3 scripts/reproduce_paper_outputs.py
 ```
-This command rebuilds the outputs that depend only on released summary files,
-checks all final paper-output hashes, and runs the release audit.
-## Raw Rerun Boundary
-Some tables depend on raw gridded data, event data, or backbone feature caches
-that are not redistributed. For public release, we provide the compact summary
-artifacts used to reproduce the displayed paper values and document the raw data
-sources separately.

+# Public Artifact Map
+This map describes the public Hugging Face release boundary. Manuscript TeX,
+BibTeX, table TeX, TikZ source, and paper PDFs are intentionally excluded.
+## Included Public Artifacts
+| Area | Release files | Notes |
 |---|---|---|
+| Model code | `models/wildfire_fm/modeling_unet.py` | Compact U-Net used by the released checkpoints. |
+| Checkpoint metadata | `models/wildfire_fm/checkpoint_manifest.json` | Lists five seeded checkpoint paths, SHA-256 hashes, and byte sizes. |
+| Figure previews | `assets/*.png`, `assets/*.svg` | Hub-page visuals and final-paper figure previews. |
+| Figure PDFs | `paper_outputs/figures/*.pdf` | Selected final-paper figures retained for visual reproducibility. |
+| Numeric summaries | `artifacts/results/*.csv`, `artifacts/results/*.json` | Sanitized compact summaries; local machine paths removed. |
+| Data notes | `data_sources/DATA_SOURCES.md` | Source roles and access entry points; raw data are not redistributed. |
+| Raw rerun references | `experiments/` | Sanitized scripts/templates requiring user-provided data and paths. |
+## Excluded Manuscript Artifacts
+The release does not include `paper/`, `paper_outputs/tables/`, generated table
+TeX, `.tikz`, `.bib`, or manuscript PDF files. The public arXiv paper should be
+linked separately after finalization.
+## Verification
 ```bash
 python3 scripts/reproduce_paper_outputs.py
 ```
+This command checks public artifact hashes and audits that manuscript/source
+artifacts and local paths are absent.

docs/huggingface_release_design.md CHANGED Viewed

@@ -12,11 +12,11 @@ reproducibility material rather than being the main organizing principle.
   checkpoint locations, quick loading code, data-source boundaries, limitations,
   and citation text.
 - `assets/` contains lightweight visuals for the Hub page plus PNG previews of
-  final-paper figures.
 - `models/wildfire_fm/` contains model code, manifests, and checkpoint metadata.
-- `paper_outputs/` stores final TeX, TikZ, and PDF artifacts used by the
-  manuscript.
-- `artifacts/results/` stores compact CSV/JSON summaries that can be public.
 - `data_sources/` documents external data resources without redistributing them.
 - `experiments/` contains raw-rerun reference scripts and Slurm templates.
@@ -25,3 +25,9 @@ reproducibility material rather than being the main organizing principle.
 The repository is a model release with reproducibility artifacts, not a raw-data
 mirror. Full raw-data reruns require separately obtained source data, local
 feature caches, and cluster-specific paths.

   checkpoint locations, quick loading code, data-source boundaries, limitations,
   and citation text.
 - `assets/` contains lightweight visuals for the Hub page plus PNG previews of
+  final-paper figure previews.
 - `models/wildfire_fm/` contains model code, manifests, and checkpoint metadata.
+- `paper_outputs/` stores selected final-paper figure PDFs only. Manuscript
+  TeX, table TeX, TikZ source, BibTeX, and paper PDF files are not included.
+- `artifacts/results/` stores sanitized compact CSV/JSON summaries that can be public.
 - `data_sources/` documents external data resources without redistributing them.
 - `experiments/` contains raw-rerun reference scripts and Slurm templates.
 The repository is a model release with reproducibility artifacts, not a raw-data
 mirror. Full raw-data reruns require separately obtained source data, local
 feature caches, and cluster-specific paths.
+## Manuscript Boundary
+The Hub model release intentionally excludes manuscript TeX, BibTeX, table TeX,
+TikZ source, and paper PDFs. The paper can be linked separately after the public
+arXiv version is finalized.

paper/main.tex DELETED Viewed

@@ -1,141 +0,0 @@
-% !TeX root = main.tex
-% !TeX program = pdflatex
-\documentclass{article}
-\usepackage[preprint]{neurips_2026}
-\usepackage[utf8]{inputenc} % allow utf-8 input
-\usepackage[T1]{fontenc}    % use 8-bit T1 fonts
-\usepackage{hyperref}       % hyperlinks
-\usepackage{url}            % simple URL typesetting
-\usepackage{booktabs}       % professional-quality tables
-\usepackage{amsfonts}       % blackboard math symbols
-\usepackage{nicefrac}       % compact symbols for 1/2, etc.
-\usepackage{microtype}      % microtypography
-\usepackage[table]{xcolor}  % colors
-\usepackage{placeins}
-\usepackage[utf8]{inputenc}
-\usepackage[T1]{fontenc}
-\usepackage{hyperref}
-\setcitestyle{numbers,square}
-\definecolor{tocblue}{RGB}{31, 73, 125}
-\hypersetup{
-    colorlinks=false,
-    citebordercolor=green,
-    linkbordercolor=green,
-    urlbordercolor=blue,
-    pdfauthor={Yangshuang Xu, Yuyang Dai, Liling Chang, Qi Wang, Yushun Dong},
-    pdftitle={Does Your Wildfire Prediction Model Actually Work, or Just Score Well?},
-    pdfsubject={},
-    pdfkeywords={}
-}
-\usepackage{url}
-\usepackage{booktabs}
-\usepackage{amsfonts}
-\usepackage{nicefrac}
-\usepackage{microtype}
-\usepackage{amsmath}
-\usepackage{amssymb}
-\usepackage{graphicx}
-\usepackage{tabularx}
-\usepackage{longtable}
-\usepackage{multirow}
-\usepackage{array}
-\usepackage{float}
-\usepackage{adjustbox}
-\usepackage{placeins}
-\usepackage{enumitem}
-\usepackage{siunitx}
-\usepackage{tikz}
-\usepackage{subcaption}
-\usepackage{wrapfig}
-\usepackage[normalem]{ulem}
-\usepackage{pifont}
-\usepackage{hyperref}
-\usepackage{xcolor}
-\usepackage{tabularx}
-\usepackage{xspace}
-\sisetup{detect-all}
-\definecolor{wfblue}{RGB}{42,111,151}
-\definecolor{wforange}{RGB}{231,111,81}
-\definecolor{wfgreen}{RGB}{42,157,143}
-\definecolor{wfgold}{RGB}{233,196,106}
-\definecolor{wfslate}{RGB}{38,70,83}
-\definecolor{wfgray}{RGB}{108,117,125}
-\definecolor{wfpurple}{RGB}{116,81,164}
-\definecolor{wfindigo}{RGB}{77,100,166}
-\definecolor{wfrose}{RGB}{188,80,144}
-\definecolor{wfolive}{RGB}{120,143,64}
-\definecolor{primarybg}{RGB}{219,234,254}
-\definecolor{primaryrule}{RGB}{147,197,253}
-\definecolor{headerbg}{RGB}{30,58,138}
-\definecolor{regbg}{RGB}{240,240,240}
-\definecolor{retrbg}{RGB}{232,232,232}
-\definecolor{refbg}{RGB}{255,243,205}      % amber – reference row
-\definecolor{alphabg}{RGB}{220,237,220}    % green – AlphaEarth row
-\definecolor{subheadbg}{RGB}{241,245,249}  % near-white – column subheader
-\definecolor{bestval}{RGB}{0,100,0}        % dark green – best frozen value
-\definecolor{warnval}{RGB}{180,0,0}        % dark red – anomalous value
-\newcolumntype{L}[1]{>{\raggedright\arraybackslash}p{#1}}
-\newcolumntype{Y}{>{\raggedright\arraybackslash}X}
-\newcommand{\ms}[2]
-{\ensuremath{#1{\mkern1mu}_{\scriptscriptstyle \pm #2}}}
-\newcommand{\msb}[2]{\ensuremath{\mathbf{#1 \pm #2}}}
-\newcommand{\best}[1]{\textbf{#1}}
-\newcommand{\ourfm}{\textsc{Wild}{\textbf{FIRE}}\textsc{-FM}\xspace}
-% \title{Ranking Is Not Decision Quality: Evaluation Contracts for Wildfire-Centric Transfer}
-\title{Does Your Wildfire Prediction Model Actually Work,\\ or Just Score Well?}
-\author{%
-  Yangshuang Xu\thanks{Equal contribution.} \\
-  Florida State University \\
-  \texttt{yx21e@fsu.edu} \\
-  \And
-  Yuyang Dai\footnotemark[1] \\
-  Florida State University \\
-  \texttt{yd26@fsu.edu} \\
-  \And
-  Liling Chang \\
-  Florida State University \\
-  \texttt{liling.chang@fsu.edu} \\
-  \And
-  Qi Wang \\
-  Northeastern University \\
-  \texttt{wangqi@vt.edu} \\
-  \And
-  Yushun Dong\thanks{Corresponding author.} \\
-  Florida State University \\
-  \texttt{yd24f@fsu.edu} \\
-}
-\begin{document}
-\maketitle
-\input{sections/0_abstract}
-\input{sections/1_intro}
-\input{sections/2_backbone}
-% \input{sections/background}
-\input{sections/3_prelim}
-% \input{sections/methodology}
-\input{sections/4_experiments}
-\input{sections/5_conclusion}
-% \bibliographystyle{abbrvnat}
-\bibliographystyle{plain}
-\bibliography{references}
-\newpage
-\input{sections/appendix}
-\clearpage
-\input{checklist_filled}
-\clearpage
-\end{document}

paper/manuscript_final.pdf DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c342978b2f0f25cf6e430b860702895bbb3b512145c8c6e38aa2233b416d835e
-size 297362

paper/references.bib DELETED Viewed

@@ -1,465 +0,0 @@
-@article{lam2023graphcast,
-  title   = {Learning skillful medium-range global weather forecasting},
-  author  = {Lam, Remi and Sanchez-Gonzalez, Alvaro and Willson, Matthew and Wirnsberger, Peter and Fortunato, Meire and Alet, Ferran and Ravuri, Suman and Ewalds, Timo and Eaton-Rosen, Zach and Hu, Weihua and others},
-  journal = {Science},
-  volume  = {382},
-  number  = {6677},
-  pages   = {1416--1421},
-  year    = {2023}
-}
-@article{ebert2009neighborhood,
-  title   = {Neighborhood Verification: A Strategy for Rewarding Close Forecasts},
-  author  = {Ebert, Elizabeth E.},
-  journal = {Weather and Forecasting},
-  volume  = {24},
-  number  = {6},
-  pages   = {1498--1510},
-  year    = {2009},
-  doi     = {10.1175/2009WAF2222251.1}
-}
-@inproceedings{ronneberger2015unet,
-  title     = {U-Net: Convolutional Networks for Biomedical Image Segmentation},
-  author    = {Ronneberger, Olaf and Fischer, Philipp and Brox, Thomas},
-  booktitle = {Medical Image Computing and Computer-Assisted Intervention},
-  pages     = {234--241},
-  year      = {2015},
-  doi       = {10.1007/978-3-319-24574-4_28}
-}
-@misc{noaa_hrrr_ncei,
-  title        = {{Rapid Refresh / High-Resolution Rapid Refresh}},
-  author       = {{NOAA National Centers for Environmental Information}},
-  year         = {2026},
-  howpublished = {\url{https://www.ncei.noaa.gov/products/weather-climate-models/rapid-refresh-update}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{noaa_hrrr_emc,
-  title        = {{High-Resolution Rapid Refresh (HRRR)}},
-  author       = {{NOAA National Centers for Environmental Prediction Environmental Modeling Center}},
-  year         = {2026},
-  howpublished = {\url{https://rapidrefresh.noaa.gov/hrrr/}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{nasa_firms,
-  title        = {{Fire Information for Resource Management System (FIRMS)}},
-  author       = {{NASA Earthdata}},
-  year         = {2026},
-  howpublished = {\url{https://www.earthdata.nasa.gov/data/tools/firms}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{landfire_fbfm40,
-  title        = {{LANDFIRE 40 Fire Behavior Fuel Models}},
-  author       = {{LANDFIRE}},
-  year         = {2026},
-  howpublished = {\url{https://landfire.gov/fuel/fbfm40}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{landfire_canopy_cover,
-  title        = {{LANDFIRE Forest Canopy Cover}},
-  author       = {{LANDFIRE}},
-  year         = {2026},
-  howpublished = {\url{https://landfire.gov/fuel/cc}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{usfs_wrc_housing_density,
-  title        = {{Wildfire Risk to Communities: Housing Unit Density Image Service}},
-  author       = {{USDA Forest Service}},
-  year         = {2026},
-  howpublished = {\url{https://catalog.data.gov/dataset/wildfire-risk-to-communities-housing-unit-density-image-service-fac22}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{ornl_landscan_2024,
-  title        = {{LandScan Global 2024}},
-  author       = {{Oak Ridge National Laboratory}},
-  year         = {2024},
-  howpublished = {\url{https://landscan.ornl.gov/}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{nifc_wfigs_perimeters,
-  title        = {{Wildland Fire Interagency Geospatial Services (WFIGS): Current Perimeters}},
-  author       = {{National Interagency Fire Center}},
-  year         = {2026},
-  howpublished = {\url{https://data-nifc.opendata.arcgis.com/datasets/nifc::wfigs-current-perimeters/about}},
-  note         = {Accessed: 2026-05-05}
-}
-@misc{mtbs_usgs_2025,
-  title        = {{Monitoring Trends in Burn Severity (MTBS)}},
-  author       = {{U.S. Geological Survey and USDA Forest Service}},
-  year         = {2025},
-  howpublished = {\url{https://www.mtbs.gov/}},
-  note         = {Accessed: 2026-05-05}
-}
-@article{pickell2017early,
-  title={An early warning system to forecast the close of the spring burning window from satellite-observed greenness},
-  author={Pickell, Paul D and Coops, Nicholas C and Ferster, Colin J and Bater, Christopher W and Blouin, Karen D and Flannigan, Mike D and Zhang, Jinkai},
-  journal={Scientific Reports},
-  volume={7},
-  number={1},
-  pages={14190},
-  year={2017},
-  publisher={Nature Publishing Group}
-}
-@article{khosravi2025assessing,
-  title={Assessing Pan-Canada wildfire susceptibility by integrating satellite data with novel hybrid deep learning and black widow optimizer algorithms},
-  author={Khosravi, Khabat and Mosallanejad, Ashkan and Bateni, Sayed M. and Kim, Dongkyun and Jun, Changhyun and Shahvaran, Ali Reza and Farooque, Aitazaz A. and Karbasi, Massoud and Ali, Mumtaz},
-  journal={Science of the Total Environment},
-  volume={977},
-  year={2025},
-  publisher={Elsevier},
-  doi={10.1016/j.scitotenv.2025.179369}
-}
-@article{vetrita2025drought,
-  title={Drought and Fine Fuel Moisture Code Evaluation: An Early Warning System for Forest/Land Fire Using Remote Sensing Approach},
-  author={Vetrita, Yenni and Prasasti, Indah and Haryani, Nanik Suryo and Priyatna, M. and Khomarudin, M. Rokhis},
-  journal={International Journal of Remote Sensing and Earth Sciences},
-  volume={9},
-  number={2},
-  year={2025},
-  doi={10.30536/ijreses.v9i2.13954}
-}
-@inproceedings{xu2026bcwildfire,
-  title={BCWildfire: A Long-term Multi-factor Dataset and Deep Learning Benchmark for Boreal Wildfire Risk Prediction},
-  author={Xu, Zhengsen and Cheng, Sibo and Wang, Lanying and He, Hongjie and Sun, Wentao and Li, Jonathan and Xu, Lincoln Linlin},
-  booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},
-  volume={40},
-  number={46},
-  pages={39486--39494},
-  year={2026},
-  doi={10.1609/aaai.v40i46.41299}
-}
-@article{krstinic2026spatio,
-  title={Spatio-temporal data model for early wildfire detection},
-  author={Krstini{\'c}, Damir and Bejo, Jakov and Sikora, Toma and Bugari{\'c}, Marin},
-  journal={Fire},
-  volume={9},
-  number={4},
-  pages={175},
-  year={2026},
-  doi={10.3390/fire9040175}
-}
-@article{kotroni2020disarm,
-  title={DISARM Early Warning System for Wildfires in the Eastern Mediterranean},
-  author={Kotroni, Vassiliki and Cartalis, Constantinos and Michaelides, Silas and Stoyanova, Julia and Tymvios, Filippos and Bezes, Antonis and Christoudias, Theodoros and Dafis, Stavros and Giannakopoulos, Christos and Giannaros, Theodore M. and others},
-  journal={Sustainability},
-  volume={12},
-  number={16},
-  pages={6670},
-  year={2020},
-  publisher={MDPI},
-  doi={10.3390/su12166670}
-}
-@article{ts2025satfire,
-  title={TS-SatFire: A Multi-Task Satellite Image Time-Series Dataset for Wildfire Detection and Prediction},
-  author={TS-SatFire Consortium},
-  journal={Scientific Data},
-  volume={12},
-  pages={1817},
-  year={2025},
-  doi={10.1038/s41597-025-06271-3}
-}
-@article{nujjoo2025modelling,
-  title={Modelling Spatial-Temporal Wildfire Susceptibility Using Geospatial Techniques Over the Table Mountain Nature Reserve, South Africa},
-  author={Nujjoo, Syed Tanweer Raza and Odera, Patroba Achola},
-  journal={Geoplanning: Journal of Geomatics and Planning},
-  volume={12},
-  number={2},
-  pages={197--214},
-  year={2025},
-  doi={10.14710/geoplanning.12.2.197-214}
-}
-@article{singh2025benchmarking,
-  title={Benchmarking Artificial Neural Networks and U-Net Convolutional Architectures for Wildfire Susceptibility Prediction: Innovations in Geospatial Intelligence},
-  author={Singh, Harikesh and Ang, Li-Minn and Srivastava, Sanjeev Kumar},
-  journal={IEEE Transactions on Geoscience and Remote Sensing},
-  volume={63},
-  year={2025},
-  publisher={IEEE},
-  doi={10.1109/TGRS.2025.3529134}
-}
-@inproceedings{goldammer1999early,
-  title={Early warning systems for the prediction of and appropriate response to wildfires and related environmental hazards},
-  author={Goldammer, Johann Georg},
-  booktitle={Early Warning Systems for Natural Disaster Reduction},
-  year={1999}
-}
-@article{huot2022nextday,
-  title={Next day wildfire prediction using deep learning},
-  author={Huot, Fantine and Hu, R Lily and Goyal, Nita and Sankar, Tharun and Ihme, Matthias and Chen, Yi-Fan},
-  journal={arXiv preprint arXiv:2206.08930},
-  year={2022}
-}
-@inproceedings{radke2019firecast,
-  title={FireCast: Leveraging deep learning to predict wildfire spread},
-  author={Radke, David and Hessler, Anna and Ellsworth, David},
-  booktitle={Proceedings of the 28th International Joint Conference on Artificial Intelligence},
-  pages={4575--4581},
-  year={2019}
-}
-@article{nguyen2023climax,
-  title={ClimaX: A foundation model for weather and climate},
-  author={Nguyen, Tung and Brandstetter, Johannes and Kapoor, Aditya and Gupta, Jayesh K and Grover, Aditya},
-  journal={arXiv preprint arXiv:2301.10343},
-  year={2023}
-}
-@article{reed2023scalemae,
-  title={Scale-MAE: A scale-aware masked autoencoder for multiscale geospatial representation learning},
-  author={Reed, Colorado J and Gupta, Ritwik and Li, Shufan and Brockman, Sarah and Funk, Christopher and Clipp, Brian and Keutzer, Kurt and Ermon, Stefano and Salakhutdinov, Ruslan},
-  journal={arXiv preprint arXiv:2212.14532},
-  year={2023}
-}
-@article{bi2023panguweather,
-  title={Pangu-Weather: A 3D high-resolution model for fast and accurate global weather forecast},
-  author={Bi, Kaifeng and Xie, Lingxi and Zhang, Hengheng and Chen, Xin and Gu, Xiaotao and Tian, Qi},
-  journal={Nature},
-  volume={619},
-  number={7970},
-  pages={533--538},
-  year={2023},
-  publisher={Nature Publishing Group}
-}
-@article{cong2022satmae,
-  title={SatMAE: Pre-training transformers for temporal and multi-spectral satellite imagery},
-  author={Cong, Yezhen and Khanna, Samar and Meng, Chenlin and Liu, Patrick and Rozi, Erik and He, Yutong and Burke, Marshall and Lobell, David and Ermon, Stefano},
-  journal={Advances in Neural Information Processing Systems},
-  volume={35},
-  pages={197--211},
-  year={2022}
-}
-@article{guo2024skysense,
-  title={SkySense: A multi-modal remote sensing foundation model towards universal interpretation},
-  author={Guo, Xin and Lao, Jianwei and Dang, Bo and Zhang, Yingying and Yu, Lei and Zhang, Ruixiang and Zhan, Siyu and Li, Wei and Hao, Yonggang and Zhang, Shuai and others},
-  journal={arXiv preprint arXiv:2403.11916},
-  year={2024}
-}
-@article{bodnar2025aurora,
-  title={Aurora: A foundation model of the atmosphere},
-  author={Bodnar, Cristian and others},
-  journal={arXiv preprint arXiv:2405.13063},
-  year={2024}
-}
-@article{schmude2024prithviwxc,
-  title={Prithvi WxC: Foundation model for weather and climate},
-  author={Schmude, Johannes and Roy, Sujit and Trofimova, Paulina and Ramesh, Karthik and Lusch, Bethany and Kesa, Harikumar and Singh, Shraddha and Chen, Phil and Liu, Zhuohan and Parashar, Shubhankar and others},
-  journal={arXiv preprint arXiv:2409.13598},
-  year={2024}
-}
-@article{farahmand2020fdeo,
-  title={Introducing spatially distributed fire danger from earth observations (FDEO) using satellite-based data in the contiguous United States},
-  author={Farahmand, Alireza and Stavros, E Natasha and Reager, John T and Behrangi, Ali},
-  journal={Remote Sensing},
-  volume={12},
-  number={8},
-  pages={1252},
-  year={2020},
-  publisher={MDPI}
-}
-@article{gilleland2009intercomparison,
-  title={Intercomparison of spatial forecast verification methods},
-  author={Gilleland, Eric and Ahijevych, David and Brown, Barbara G and Ebert, Elizabeth E},
-  journal={Weather and Forecasting},
-  volume={24},
-  number={5},
-  pages={1416--1430},
-  year={2009},
-  publisher={American Meteorological Society}
-}
-@article{gilleland2009spatialverification,
-  title   = {Intercomparison of Spatial Forecast Verification Methods},
-  author  = {Gilleland, Eric and Ahijevych, David and Brown, Barbara G. and Casati, Barbara and Ebert, Elizabeth E.},
-  journal = {Weather and Forecasting},
-  volume  = {24},
-  number  = {5},
-  pages   = {1416--1430},
-  year    = {2009},
-  doi     = {10.1175/2009WAF2222269.1}
-}
-@article{bi2023pangu,
-  title   = {Accurate medium-range global weather forecasting with 3D neural networks},
-  author  = {Bi, Kaifeng and Xie, Lingxi and Zhang, Hengheng and Chen, Xin and Gu, Xiaotao and Tian, Qi},
-  journal = {Nature},
-  volume  = {619},
-  pages   = {533--538},
-  year    = {2023}
-}
-@inproceedings{reed2023scale,
-  title     = {Scale-{MAE}: A Scale-Aware Masked Autoencoder for Multiscale Geospatial Representation Learning},
-  author    = {Reed, Colorado J. and Gupta, Ritwik and Li, Shufan and Brockman, Sarah and Funk, Christopher and Clipp, Brian and Keutzer, Kurt and Candido, Salvatore and Uyttendaele, Matt and Darrell, Trevor},
-  booktitle = {Proceedings of the IEEE/CVF International Conference on Computer Vision},
-  pages     = {4088--4099},
-  year      = {2023}
-}
-@inproceedings{mcdermott2024aurocauprc,
-  title     = {A Closer Look at {AUROC} and {AUPRC} under Class Imbalance},
-  author    = {McDermott, Matthew B. and Zhang, Haoran and Hansen, Lasse Hyldig and Angelotti, Giovanni and Gallifant, Jack},
-  booktitle = {Advances in Neural Information Processing Systems},
-  year      = {2024}
-}
-@inproceedings{lacoste2023geobench,
-  title     = {{GEO-Bench}: Toward Foundation Models for Earth Monitoring},
-  author    = {Lacoste, Alexandre and Lehmann, Nils and Rodriguez, Pau and Sherwin, Evan and Kerner, Hannah and L{\"u}tjens, Bj{\"o}rn and Irvin, Jeremy and Dao, David and Alemohammad, Hamed and Drouin, Alexandre and others},
-  booktitle = {Advances in Neural Information Processing Systems},
-  year      = {2023}
-}
-@article{rasp2024weatherbench2,
-  title   = {{WeatherBench} 2: A benchmark for the next generation of data-driven global weather models},
-  author  = {Rasp, Stephan and Hoyer, Stephan and Merose, Alex and Langmore, Ian and Battaglia, Peter and Russell, Tyler and Sanchez-Gonzalez, Alvaro and Yang, Vivian and Carver, Rob and Agrawal, Shreya and others},
-  journal = {Journal of Advances in Modeling Earth Systems},
-  volume  = {16},
-  number  = {6},
-  pages   = {e2023MS004019},
-  year    = {2024}
-}
-@inproceedings{koh2021wilds,
-  title     = {{WILDS}: A benchmark of in-the-wild distribution shifts},
-  author    = {Koh, Pang Wei and Sagawa, Shiori and Marklund, Henrik and Xie, Sang Michael and Zhang, Marvin and Balsubramani, Akshay and Hu, Weihua and Yasunaga, Michihiro and Phillips, Richard Lanas and Gao, Irena and others},
-  booktitle = {Proceedings of the International Conference on Machine Learning},
-  pages     = {5637--5664},
-  year      = {2021}
-}
-@inproceedings{yeh2021sustainbench,
-  title     = {{SustainBench}: Benchmarks for Monitoring the Sustainable Development Goals with Machine Learning},
-  author    = {Yeh, Christopher and Meng, Chenlin and Wang, Sijing and Driscoll, Anne and Rozi, Erik and Liu, Peng and Lee, Jae Yong and Burke, Marshall and Lobell, David B. and Ermon, Stefano},
-  booktitle = {Advances in Neural Information Processing Systems},
-  year      = {2021}
-}
-@inproceedings{torchgeo2022,
-  title     = {TorchGeo: Deep Learning With Geospatial Data},
-  author    = {Stewart, Adam J. and Robinson, Caleb and Corley, Isaac A. and Ortiz, Anthony and Lavista Ferres, Juan M. and Banerjee, Arindam},
-  booktitle = {Proceedings of the ACM SIGSPATIAL International Conference on Advances in Geographic Information Systems},
-  year      = {2022}
-}
-@inproceedings{schaeffer2023mirage,
-  title     = {Are Emergent Abilities of Large Language Models a Mirage?},
-  author    = {Schaeffer, Rylan and Miranda, Brando and Koyejo, Sanmi},
-  booktitle = {Advances in Neural Information Processing Systems},
-  year      = {2023}
-}
-@inproceedings{luth2023activelearning,
-  title     = {Navigating the Pitfalls of Active Learning Evaluation: A Systematic Framework for Meaningful Performance Assessment},
-  author    = {L{\"u}th, Carsten T. and Bungert, Till J. and Klein, Lukas and J{\"a}ger, Paul F.},
-  booktitle = {Advances in Neural Information Processing Systems},
-  year      = {2023}
-}
-@inproceedings{traub2024selectiveclassification,
-  title     = {Overcoming Common Flaws in the Evaluation of Selective Classification Systems},
-  author    = {Traub, Jeremias and Bungert, Till J. and L{\"u}th, Carsten T. and Baumgartner, Michael and Maier-Hein, Klaus H. and Maier-Hein, Lena and J{\"a}ger, Paul F.},
-  booktitle = {Advances in Neural Information Processing Systems},
-  year      = {2024}
-}
-# new---4.30
-@article{pathak2024stormcast,
-  title={Kilometer-scale convection-allowing model emulation using generative diffusion modeling},
-  author={Pathak, Jaideep and Cohen, Yair and Garg, Piyush and Harrington, Peter and Brenowitz, Noah and Durran, Dale and Mardani, Morteza and Vahdat, Arash and Xu, Shaoming and Kashinath, Karthik and others},
-  journal={Science Advances},
-  volume={12},
-  number={5},
-  pages={eadv0423},
-  year={2026},
-  publisher={American Association for the Advancement of Science}
-}
-@article{brown2025alphaearth,
-  title={Alphaearth foundations: An embedding field model for accurate and efficient global mapping from sparse label data},
-  author={Brown, Christopher F and Kazmierski, Michal R and Pasquarella, Valerie J and Rucklidge, William J and Samsikova, Masha and Zhang, Chenhui and Shelhamer, Evan and Lahera, Estefania and Wiles, Olivia and Ilyushchenko, Simon and others},
-  journal={arXiv preprint arXiv:2507.22291},
-  year={2025}
-}
-@article{weyn2020dlwp,
-  title={Improving data-driven global weather prediction using deep convolutional neural networks on a cubed sphere},
-  author={Weyn, Jonathan A and Durran, Dale R and Caruana, Rich},
-  journal={Journal of Advances in Modeling Earth Systems},
-  volume={12},
-  number={9},
-  pages={e2020MS002109},
-  year={2020},
-  publisher={Wiley Online Library}
-}
-@article{pathak2022fourcastnet,
-  title={Fourcastnet: A global data-driven high-resolution weather model using adaptive fourier neural operators},
-  author={Pathak, Jaideep and Subramanian, Shashank and Harrington, Peter and Raja, Sanjeev and Chattopadhyay, Ashesh and Mardani, Morteza and Kurth, Thorsten and Hall, David and Li, Zongyi and Azizzadenesheli, Kamyar and others},
-  journal={arXiv preprint arXiv:2202.11214},
-  year={2022}
-}
-@article{chen2023fengwu,
-  title={Fengwu: Pushing the skillful global medium-range weather forecast beyond 10 days lead},
-  author={Chen, Kang and Han, Tao and Gong, Junchao and Bai, Lei and Ling, Fenghua and Luo, Jing-Jia and Chen, Xi and Ma, Leiming and Zhang, Tianning and Su, Rui and others},
-  journal={arXiv preprint arXiv:2304.02948},
-  year={2023}
-}
-@article{chen2023fuxi,
-  title={FuXi: A cascade machine learning forecasting system for 15-day global weather forecast},
-  author={Chen, Lei and Zhong, Xiaohui and Zhang, Feng and Cheng, Yuan and Xu, Yinghui and Qi, Yuan and Li, Hao},
-  journal={npj climate and atmospheric science},
-  volume={6},
-  number={1},
-  pages={190},
-  year={2023},
-  publisher={Nature Publishing Group UK London}
-}
-@article{marsocci2024pangaea,
-  title={Pangaea: A global and inclusive benchmark for geospatial foundation models},
-  author={Marsocci, Valerio and Jia, Yuru and Bellier, Georges Le and Kerekes, David and Zeng, Liang and Hafner, Sebastian and Gerard, Sebastian and Brune, Eric and Yadav, Ritu and Shibli, Ali and others},
-  journal={arXiv preprint arXiv:2412.04204},
-  year={2024}
-}
-@article{gerard2023wildfirespreadts,
-  title={Wildfirespreadts: A dataset of multi-modal time series for wildfire spread prediction},
-  author={Gerard, Sebastian and Zhao, Yu and Sullivan, Josephine},
-  journal={Advances in Neural Information Processing Systems},
-  volume={36},
-  pages={74515--74529},
-  year={2023}
-}

paper/sections/0_abstract.tex DELETED Viewed

@@ -1,4 +0,0 @@
-\begin{abstract}
-Wildfire prediction is important for early warning and resource allocation, yet existing Earth foundation models (Earth FMs) are pretrained for general atmospheric and geophysical objectives rather than wildfire forecasting. To address this gap, we introduce \ourfm, the first foundation model pretrained specifically for wildfire prediction using weather, active-fire observations, topography, vegetation, and static environmental data. However, introducing a domain-specific backbone alone does not solve the evaluation problem: wildfire events are sparse in space and time, making transfer conclusions highly sensitive to matching rules and evaluation settings.
-To address this problem, we introduce a fixed-contract evaluation framework with two controlled checks: a fixed-output check for matching-rule effects and a fixed-feature check for head-selection effects. Under matched contracts, we compare \ourfm\ with ten Earth-FM baselines across occupancy, spread, retrieval, and regression tasks. Our results show that wildfire transfer conclusions depend strongly on evaluation design and task formulation. We hope this framework and \ourfm\ provide a foundation for future wildfire-specific Earth-FM research and benchmarking. Our code is available at https://anonymous.4open.science/r/Wildfire-fm-evaluation-contracts-5AE9/.
-\end{abstract}

paper/sections/1_intro.tex DELETED Viewed

@@ -1,77 +0,0 @@
-\section{Introduction}
-Wildfire prediction is critical for early warning and resource allocation in disaster response~\cite{goldammer1999early, farahmand2020fdeo}. As extreme fire events grow more frequent and severe, accurate forecasting of wildfire occurrence and spread is becoming increasingly important~\cite{pickell2017early, kotroni2020disarm}. Recent Earth foundation models (Earth FMs), pretrained on large-scale atmospheric and geophysical data~\cite{bodnar2025aurora, schmude2024prithviwxc, nguyen2023climax}, provide transferable representations for Earth-system dynamics and have shown strong performance across weather and remote-sensing tasks. However, wildfire dynamics depend on complex interactions among weather, vegetation, topography, fuel conditions, and active-fire behavior, which are not explicitly modeled during pretraining in existing general-purpose Earth FMs. This mismatch raises a natural question: can representations learned for general atmospheric or geophysical objectives transfer reliably to wildfire forecasting, and how should we measure that transfer?
-Answering this question requires solving two intertwined problems. The first is that existing Earth FMs are not pretrained specifically for wildfire dynamics, but instead adapted to wildfire tasks after general-purpose pretraining. To address this limitation, we introduce \textbf{\ourfm}, the first foundation model pretrained specifically for wildfire prediction using fire-relevant multimodal data, including regional weather dynamics, active-fire observations, topography, vegetation, and static environmental context. By incorporating wildfire-specific signals directly during pretraining, \ourfm\ learns representations aligned with the physical processes underlying fire behavior rather than relying on transfer from general atmospheric or geophysical objectives.
-The second problem is evaluation: even with a domain-specific model, reliably comparing \ourfm\ against transferred general-purpose Earth FMs remains difficult. Wildfire events are sparse in space and time~\cite{ebert2009neighborhood, gilleland2009intercomparison}, making transfer conclusions highly sensitive to three sources of evaluation variability.
-\textit{First,} matching rules determine what counts as a correct prediction. Early warning systems tolerate spatial offsets that post-fire damage assessment cannot, so different matching rules can produce substantially different F1 scores from the same model outputs~\cite{ebert2009neighborhood, gilleland2009intercomparison}.
-\textit{Second,} head-selection metrics determine which lightweight adapter is chosen on top of a frozen representation. Ranking metrics such as PR-AUC and decision metrics such as F1 can favor different heads from the same frozen features~\cite{mcdermott2024aurocauprc, traub2024selectiveclassification}.
-\textit{Third,} wildfire task forms operate over different prediction units and metric families. Occupancy prediction, spread forecasting, burned-area regression, and analog retrieval therefore produce scores that are not directly comparable even under the same backbone~\cite{schaeffer2023mirage, gerard2023wildfirespreadts}.
-Related protocol sensitivity has also been observed in active learning~\cite{luth2023activelearning} and selective classification~\cite{traub2024selectiveclassification}. We show that these effects become particularly severe in wildfire transfer evaluation, where sparse events and heterogeneous task forms amplify evaluation instability~\cite{marsocci2024pangaea}.
-\begin{figure*}
-    \centering
-    \includegraphics[width=0.96\linewidth]{figures/overview_wildfire.pdf}
-    \caption{Overview of \textbf{\ourfm} and \textbf{Evaluation Protocol} in this paper.}
-    \vspace{-8mm}
-    \label{fig:overview}
-\end{figure*}
-This evaluation instability makes reliable comparison between
-\ourfm\ and existing Earth FMs fundamentally difficult. Standard
-geospatial benchmarks such as GEO-Bench~\cite{lacoste2023geobench},
-WeatherBench2~\cite{rasp2024weatherbench2}, WILDS~\cite{koh2021wilds},
-SustainBench~\cite{yeh2021sustainbench}, and
-TorchGeo~\cite{torchgeo2022} standardize datasets, splits, and
-metrics, but do so for tasks with dense, balanced labels where
-matching-rule sensitivity is not a primary concern. Wildfire
-studies such as FireCast~\cite{radke2019firecast} and Next Day
-Wildfire Spread~\cite{huot2022nextday} apply the same
-report-and-compare paradigm directly to sparse fire events
-without controlling for matching-rule choice, head-selection
-metric, or task-form comparability, the three sources of
-instability identified above. As a result, scores reported under
-different implicit protocol choices are not directly comparable,
-even when the underlying predictions are identical.
-Based on the \textit{limitations of prior work}, our contributions are as follows (see Figure~\ref{fig:overview}).
-\begin{itemize}
-\item \textbf{Wildfire-specific foundation model.}
-We introduce \ourfm, the first foundation model pretrained
-specifically for wildfire prediction using multimodal wildfire
-data spanning weather, active-fire observations, topography,
-vegetation, and static environmental context. Unlike general
-Earth FMs adapted after pretraining, \ourfm learns wildfire
-representations directly from fire-relevant processes during
-pretraining.
-\item \textbf{Fixed-contract evaluation framework.}
-We formulate wildfire Earth-FM transfer as a fixed-contract
-evaluation problem, defining a contract
-$\mathcal{C} = (\mathcal{T}, M, \Lambda, \Omega, \mathcal{A})$
-that specifies the task, metric, matching rule, evaluation
-scope, and lightweight-head family before comparison. We
-introduce two controlled checks: a \emph{fixed-output check}
-for matching-rule effects and a \emph{fixed-feature check}
-for head-selection effects, enabling evaluation artifacts to
-be separated from representation quality.
-\item \textbf{Systematic wildfire transfer benchmark.}
-Under fixed contracts, we compare \ourfm\ against ten
-general-purpose Earth FMs across six wildfire task forms.
-Our results show that wildfire transfer conclusions are
-highly sensitive to evaluation design and strongly
-task-dependent across occupancy, spread, retrieval, and
-regression settings.
-\end{itemize}
-% \begin{figure*}
-%     \centering
-%     \includegraphics[width=\linewidth]{figures/overview_wildfire.pdf}
-%     \caption{Overview of \textbf{\ourfm} and \textbf{Evaluation Protocol} in this paper.}
-%     \label{fig:overview}
-% \end{figure*}

paper/sections/2_backbone.tex DELETED Viewed

@@ -1,39 +0,0 @@
-\section{\ourfm Reference Backbone}
-\label{Reference_backbone}
-\ourfm is a wildfire-specialized regional backbone trained on fire-relevant multimodal data for wildfire prediction. Existing general-purpose Earth FMs are pretrained for atmospheric and geophysical objectives~\cite{lam2023graphcast}, or for remote-sensing objectives~\cite{reed2023scalemae}, so wildfire-relevant information enters only indirectly through those objectives. In contrast, \ourfm is trained with weather, active-fire observations, topography, vegetation, and static environmental context, so its representation is learned from inputs tied directly to wildfire behavior. This design makes \ourfm a strong wildfire-specific backbone whose features are shaped by signals directly relevant to fire occurrence and spread.
-It provides a task-aligned regional model trained directly for wildfire prediction.
-It also serves as an empirical anchor for interpreting how transferred Earth FMs behave under matched evaluation contracts. This section describes the data resources and training strategy used to build \ourfm as an in-domain reference backbone. The fixed-contract protocol used to compare it with transferred Earth FMs is defined separately in Section~\ref{sec:eval}.
-\subsection{Data Resources}
-We group the resources by their role in the study: dynamic weather inputs, occupancy supervision, static context, and event-level resources for supporting tasks. Source and terms-of-use notes for the external data and model assets used in this study are summarized in Appendix Table~\ref{tab:external_assets_licenses}.
-\noindent\textbf{Dynamic weather inputs.}
-The weather inputs come from a California regional dataset built from NOAA High-Resolution Rapid Refresh (HRRR) fields~\cite{noaa_hrrr_ncei,noaa_hrrr_emc}. The data are placed on a projected 5 km grid in EPSG:5070. Each time map uses weather fields every 6 hours and predicts wildfire occupancy at a 12-hour lead. The variables include near-surface temperature and dew point, wind, CAPE, surface pressure, boundary-layer height, visibility, precipitation rate, and accumulated precipitation.
-\noindent\textbf{Occupancy supervision.}
-Wildfire supervision comes from NASA FIRMS active-fire detections~\cite{nasa_firms}. The detections are mapped to the same grid as the weather fields. \ourfm is trained on gridded occupancy labels derived from these detections. This defines the occupancy target used by the reference backbone throughout the primary experiments.
-\noindent\textbf{Static context.}
-Static context describes landscape and exposure factors that do not change at the weather time step. These variables are LANDFIRE fire-behavior fuel model~\cite{landfire_fbfm40}, LANDFIRE canopy cover~\cite{landfire_canopy_cover}, Wildfire Risk to Communities housing-unit density~\cite{usfs_wrc_housing_density}, and LandScan population~\cite{ornl_landscan_2024}. Together with validity masks for the weather and static fields, the occupancy input has 16 channels: 10 weather fields, two validity masks, and four static layers for regional fire prediction.
-\noindent\textbf{Event-level resources.}
-Event-level resources are used for supporting burned-area and analog tasks, not as occupancy labels for \ourfm. These resources include WFIGS incident and perimeter attributes~\cite{nifc_wfigs_perimeters} and MTBS burned-area and burn-severity records~\cite{mtbs_usgs_2025}. They provide event-scale outcomes and incident metadata for supporting tasks in the experiments and appendix analyses.
-\subsection{Training Strategy}
-\noindent\textbf{Model and data split.}
-\ourfm uses a compact U-Net~\cite{ronneberger2015unet} that maps gridded weather and static inputs to wildfire predictions.
-Its primary output is fire occupancy on the common spatial grid.
-Data are split by time: June--August 2024 for training, September 2024 for validation, and October 2024 for testing.
-This yields 368 training time maps, 120 validation time maps, and 120 test time maps.
-Temporal splitting keeps later fire outcomes out of earlier training periods.
-\noindent\textbf{Fire-aware tile training.}
-Training is performed on 32$\times$32 tiles sampled from the time maps. The tiles include fire-centered regions and non-fire context, so the model sees both sparse fire labels and surrounding background conditions. This sampling reduces the dominance of empty cells without removing non-fire examples from the training distribution. Class-weighted binary cross-entropy is used for the primary occupancy target to further balance sparse positives.
-\noindent\textbf{Spatial-support training objective.}
-Wildfire labels can shift by a few grid cells because detections, weather fields, and static layers are aligned on a common grid. To reduce sensitivity to these small displacements during training, the occupancy target is dilated by two grid cells. An auxiliary spatial-support output is trained for the same neighborhood alongside the primary occupancy output. At test time, \ourfm is scored under the same task-specific evaluation contracts as the transferred Earth-FM backbones in Section~\ref{sec:eval}, ensuring matched comparison conditions.

paper/sections/3_prelim.tex DELETED Viewed

@@ -1,84 +0,0 @@
-\section{Evaluation Design}
-\label{sec:eval}
-% Section~\ref{Reference_backbone} establishes \ourfm\ as a wildfire-specialized backbone.
-% This section formalizes an evaluation contract and introduces two controlled checks to isolate evaluation effects.
-\subsection{Wildfire Output Records and Fire Sets}
-\paragraph{Output record.}
-A wildfire prediction model produces scores over spatial units and forecast times, which are compared against observed fire activity to compute performance. We formalize this comparison as a \emph{wildfire output record} $\mathcal{O} = (S, Y)$, where the score field $S = \{s_{i,t}\}$ contains model scores over spatial units $i$ and times $t$, and the label field $Y = \{y_{i,t}\}$ contains the corresponding observations. For occupancy tasks, $y_{i,t} \in \{0,1\}$ indicates whether fire is observed at $(i,t)$.
-\vspace{-0.5em}
-\paragraph{Predicted and observed fire sets.}
-To evaluate $\mathcal{O}$, the score field is thresholded at $\tau$ to produce a predicted fire set $\hat{P}_\tau = \{(i,t) : s_{i,t} \geq \tau\}$, while the observed fire set is $P = \{(i,t) : y_{i,t} = 1\}$. The pair $(\hat{P}_\tau, P)$ is evaluated under a matching rule. Given a matching rule, true positives (TP), false positives (FP), and false negatives (FN) are computed from matched and unmatched elements, and the decision F1 score is $\text{F1} = 2\text{TP}/(2\text{TP} + \text{FP} + \text{FN})$. The same $(\hat{P}_\tau, P)$ can yield different TP, FP, and FN counts under different matching rules without changing model outputs, motivating the fixed-output check in Section~\ref{sec:checks}.
-\vspace{-0.5em}
-\paragraph{Matching rules.}
-A matching rule specifies when a predicted unit-time pair in $\hat{P}_\tau$ is considered a match to an observed pair in $P$~\cite{ebert2009neighborhood, gilleland2009intercomparison}. Because wildfire applications tolerate different levels of spatial and temporal error, we define three matching rules for occupancy outputs. \textit{(1) Exact matching}: requires agreement in both spatial unit and forecast time. \textit{(2) Tolerated matching}: accepts predictions within a fixed spatial or temporal neighborhood defined by the evaluation contract $\mathcal{C}$. \textit{(3) Union matching}: accepts predictions satisfying either exact or tolerated matching.
-% \begin{itemize}
-%     \item \textbf{Exact matching}: requires agreement in both spatial unit and forecast time.
-%     \item \textbf{Tolerated matching}: accepts predictions within a fixed spatial or temporal neighborhood defined by the evaluation contract $\mathcal{C}$.
-%     \item \textbf{Union matching}: accepts predictions satisfying either exact or tolerated matching.
-% \end{itemize}
-% \vspace{-0.5em}
-%
-Figure~\ref{fig:toy_occupancy_contract} illustrates these rules for a fixed output. Because the output record is held constant, any score difference is attributed solely to the matching rule.
-\begin{figure}
-    \centering
-        \vspace{-2mm}
-    \includegraphics[width=0.8\linewidth]{figures/matching.pdf}
-    \vspace{-2mm}
-    \caption{Matching rules for one fixed occupancy output.
-    (a) Exact matching counts only same-time, same-cell overlap.
-    (b) Tolerated matching accepts bounded spatial or temporal offsets.
-    (c) The union reading counts matches accepted by either rule.}
-    \vspace{-5mm}
-    \label{fig:toy_occupancy_contract}
-\end{figure}
-\subsection{Evaluation Contract}
-A wildfire transfer score depends not only on the model, but also on the evaluation choices used to compute it~\cite{luth2023activelearning}. Changing the matching rule $\Lambda$, metric $M$, or evaluation scope $\Omega$ changes what the score measures even when model outputs are fixed.
-We define an \emph{evaluation contract} as the tuple
-$\mathcal{C} = (\mathcal{T}, M, \Lambda, \Omega, \mathcal{A})$,
-where $\mathcal{T}$ denotes the task, $M$ the metric,
-$\Lambda$ the matching rule, $\Omega$ the evaluation scope,
-and $\mathcal{A}$ the allowed lightweight-head family.
-Two transfer scores are comparable only when all five
-components are identical. The evaluation scope $\Omega$ is particularly important in wildfire settings. A global scope evaluates the full spatial domain, including many fire-inactive regions that can mask differences between models. A fire-prone scope restricts evaluation to regions with higher historical fire activity. We report both scopes separately rather than averaging across them. Fixed matching-rule, task-form, and scope parameters are reported in Appendix Tables~\ref{tab:app_matching_rule_params}, \ref{tab:app_contract_params_full}, and~\ref{tab:app_scope_params}.
-\subsection{Task-Form Contracts}
-\label{sec:taskforms}
-Contract components depend on task form. We distinguish \emph{primary} and \emph{supporting} tasks based on whether they directly evaluate wildfire decisions. Occupancy and fire spread are primary tasks because they evaluate spatial fire outputs under matching or overlap rules.
-Retrieval, burned-area regression, smoke PM$_{2.5}$, and extreme heat are supporting tasks because they use different prediction units and metric families. Their results provide complementary evidence rather than direct substitutes for occupancy and spread evaluation~\cite{schaeffer2023mirage}.
-For primary tasks, multiple metrics are reported for the same output under different contracts. For occupancy, exact F1 requires same-cell same-time agreement, tolerated F1 accepts predictions within a spatial or temporal neighborhood, and union F1 accepts predictions satisfying either rule. For fire spread, exact F1 evaluates raster-cell agreement, spatial F1 evaluates region overlap between $\hat{B}$ and $B$~\cite{gilleland2009intercomparison}, and AP summarizes ranking quality across thresholds. These metrics are reported separately rather than aggregated because they measure different aspects of the same prediction task. Figure~\ref{fig:task_contract_tiles} summarizes the contract map across all six task forms.
-\subsection{Controlled Checks}
-\label{sec:checks}
-\begin{wrapfigure}[19]{r}{0.52\textwidth}
-\vspace{-2em}
-\centering
-\includegraphics[width=\linewidth]{figures/fig_task_contract_tiles.pdf}
-\vspace{-1.5em}
-\caption{
-Evaluation contract map for the six fixed-contract tasks.
-Yellow boxes denote \textcolor[RGB]{255,193,7}{\textbf{primary}} decision tasks; purple boxes denote \textcolor[RGB]{148,103,189}{\textbf{supporting}} tasks.
-}
-\label{fig:task_contract_tiles}
-\vspace{-0.8em}
-\end{wrapfigure}
-We isolate the two instability sources with two checks.
-Each check fixes all contract components except one, so any difference is attributed solely to that component.
-\paragraph{Fixed-output check.}
-The fixed-output check isolates matching-rule effects by holding the output record $\mathcal{O} = (S, Y)$ and all other contract components fixed while varying only $\Lambda$. For the same occupancy record, we compute F1 under exact, tolerated, and union matching. Any score difference is therefore attributed solely to the matching rule. If matching rules alone shift F1 by tens of percentage points on the same output, then comparing models under different $\Lambda$ conflates model quality with evaluation design.
-\paragraph{Fixed-feature check and selection regret.}
-The fixed-feature check isolates head-selection effects by holding the frozen feature source, $\mathcal{T}$, $\Omega$, $\Lambda$, and candidate head family $\mathcal{H} \subseteq \mathcal{A}$ fixed while varying only the selection metric. Let $R(h)$ denote the ranking score of head $h$ and $D(h)$ its decision score. Ranking-based selection chooses $h_R = \arg\max_{h \in \mathcal{H}} R(h)$, while decision-based selection chooses $h_D = \arg\max_{h \in \mathcal{H}} D(h)$. We define \emph{selection regret} as the decision-score gap incurred by using a ranking metric as a proxy for a decision metric during head selection: $\delta = D(h_D) - D(h_R) \geq 0$ ~\cite{mcdermott2024aurocauprc, traub2024selectiveclassification}. When $\delta > 0$, the ranking metric selects a head with lower decision performance under the same frozen representation, indicating that the observed gap arises from metric misalignment rather than from representation quality.The head family used in fixed-feature comparisons is summarized in Appendix Table~\ref{tab:app_head_architectures}.
-\paragraph{Fixed-contract transfer comparison.}
-After the controlled checks establish that matching-rule and selection-metric effects are non-trivial, Earth-FM backbones are evaluated under a shared contract $\mathcal{C}$. Entries are compared only when they satisfy the same $(\mathcal{T}, M, \Lambda, \Omega, \mathcal{A})$ tuple. Supporting tasks test whether occupancy and spread patterns generalize across task forms and provide additional evidence when transfer orderings are preserved.

paper/sections/4_experiments.tex DELETED Viewed

@@ -1,435 +0,0 @@
-\section{Experiments}
-\label{sec:experiments}
-We address three research questions under the fixed-contract framework defined in Section~\ref{sec:eval}. \textbf{RQ1:} Under fixed outputs, does the matching rule determine whether a wildfire model appears usable?
-\textbf{RQ2:} Under fixed features, does ranking-based head selection lose decision performance?
-\textbf{RQ3:} Under fixed task contracts, do model comparisons remain consistent across task forms?
-\vspace{-0.5em}
-\subsection{Experimental Setup}
-\paragraph{Task instances.}
-We instantiate the six task-form contracts defined in Section~\ref{sec:taskforms}.
-Occupancy and fire spread serve as primary tasks because they evaluate spatial fire outputs under matching or overlap rules and align with the decision structure of early warning systems~\cite{goldammer1999early, farahmand2020fdeo}.
-The four supporting tasks, \textit{final burned area, analog retrieval, smoke PM$_{2.5}$, and extreme heat}, use different prediction units and metric families; their results bound rather than replace primary decision evidence.
-\paragraph{Compared backbones.}
-The frozen Earth-FM comparator set includes Prithvi-WxC~\cite{schmude2024prithviwxc}, Aurora~\cite{bodnar2025aurora}, ClimaX~\cite{nguyen2023climax}, StormCast~\cite{pathak2024stormcast}, DLWP~\cite{weyn2020dlwp}, FCN~\cite{pathak2022fourcastnet}, FengWu~\cite{chen2023fengwu}, FuXi~\cite{chen2023fuxi}, Pangu-Weather~\cite{bi2023panguweather}, and AlphaEarth~\cite{brown2025alphaearth}.
-\ourfm\ serves as the wildfire-specialized reference backbone.
-\paragraph{Protocol.}
-For each comparison, the contract $\mathcal{C} = (\mathcal{T}, M, \Lambda, \Omega, \mathcal{A})$ is fixed before reporting test scores.
-Thresholds and morphology parameters are selected on validation data and held fixed at test time.
-Stochastic components are evaluated over five seeds and reported as mean $\pm$ standard deviation; deterministic fixed-output checks have zero seed variance by construction.
-Entries outside a fixed contract are omitted from main tables and documented in the appendix.
-For error metrics lower is better ($\downarrow$); for F1, AP, nDCG, and correlation metrics higher is better ($\uparrow$).
-Appendix Table~\ref{tab:app_seed_robustness} summarizes the seed-level checks behind the reported mean-with-std convention.
-\subsection{Matching-Rule Sensitivity Under Fixed Output (RQ1)}
-\label{sec:rq1}
-To answer RQ1, we conduct a fixed-output check on occupancy and fire spread tasks, holding the score field $S$, label field $Y$, threshold, and all other operating choices fixed while varying only the matching rule $\Lambda$ across exact, tolerated, and union settings. Occupancy results are reported in Figure~\ref{fig:fireprone_contract_progression} under both global and fire-prone scopes. The same progression is applied to fire spread outputs. Complete occupancy sweeps and predicted-positive rates are reported in Appendix Tables~\ref{tab:fireprone_contract_progression} and~\ref{tab:app_occupancy_ppr_scope}.
-\begin{wrapfigure}[21]{r}{0.50\textwidth}
-\centering
-\vspace{-3mm}
-\includegraphics[width=\linewidth]{figures/fig_primary_rank_change_map.pdf}
-\caption{\textbf{Primary-task rank changes (RQ1).}
-Cells show rank before\(\rightarrow\)after. Green/red/gray mark moving up/down/no change; darker green or red marks a larger move. Following Section~\ref{sec:taskforms}, Ex/Tol/Un are occupancy exact, tolerated, and union matching; Sp is spread spatial-overlap $F_1$.}
-\label{fig:primary_ranking}
-\vspace{-0.8em}
-\end{wrapfigure}
-Because both tasks involve spatially sparse targets, fire-active cells for occupancy, burned raster patches for spread, the operational assumptions encoded in $\Lambda$ directly govern what the model is being asked to get right, making matching-rule choice a substantive experimental setting rather than a post hoc evaluation detail.
-The fixed-output results reveal a pattern that goes beyond score differences: matching-rule choice determines whether a model appears viable for wildfire decision tasks at all. Under exact matching, which requires same-cell same-time agreement, the majority of frozen Earth-FM backbones produce F1 scores that are effectively near zero, rendering them indistinguishable from an uninformative baseline and suggesting they have no practical utility for the task. As the matching rule relaxes to tolerated and then union matching, both of which reflect operationally realistic assumptions for early warning systems, where a prediction displaced by a few grid cells still triggers the correct response, the same frozen representations recover substantial decision performance, with several backbones crossing from near-zero to practically meaningful F1 levels. This transition is not a marginal score improvement: it is a qualitative change in whether a model can be considered usable. The same pattern holds for fire spread under region-level matching relaxation, where strict raster-cell agreement again suppresses performance for most backbones while spatial tolerance restores it. The implications for prior wildfire transfer claims are significant: papers that report model performance under a single implicit matching rule, which is common practice given that sparse decision targets almost always require some form of tolerance~\cite{ebert2009neighborhood, gilleland2009intercomparison}, may be drawing viability conclusions that are entirely dependent on an undisclosed protocol choice. A model claimed to perform well under one tolerance assumption may be completely unusable under a stricter one, and vice versa. Matching rule cannot be treated as an evaluation detail; it is an experimental setting that must be fixed, reported, and justified as part of any wildfire transfer claim. Additional spread AP values under fixed scopes are reported in Appendix Table~\ref{tab:app_spread_ap_by_scope}.
-\begin{table}[t]
-\centering
-\small
-\setlength{\tabcolsep}{4pt}
-\renewcommand{\arraystretch}{1.20}
-\caption{%
-  \textbf{Primary fixed-contract transfer results (RQ1).}
-  Occupancy metrics: exact, tolerated, union $F_1$ (\%).
-  Fire spread metrics: exact $F_1$ and spatial $F_1$ (\%).
-  Each block fixes $\mathcal{T}$, $\Lambda$, $\Omega$, and $\mathcal{A}$.
-  Upward arrows indicate that larger values are better.
-  \textbf{Bold} marks the best value per metric. \textbf{Tol.} = Tolerated
-}
-\label{tab:primary_results}
-\setlength{\arrayrulewidth}{0.4pt}
-\resizebox{\textwidth}{!}{%
-\begin{tabular}{lccccc}
-\toprule
-& \multicolumn{3}{c}{\textbf{Occupancy}}
-& \multicolumn{2}{c}{\textbf{Fire spread}} \\
-\cmidrule(lr){2-4}\cmidrule(lr){5-6}
-\textbf{Comparator}
-& \textbf{Exact $F_1\uparrow$} & \textbf{Tol.\ $F_1\uparrow$} & \textbf{Union $F_1\uparrow$}
-& \textbf{Exact $F_1\uparrow$} & \textbf{Spatial $F_1\uparrow$} \\
-\midrule
-\ourfm\
-& \ms{0.4546}{0.1412}
-& \ms{29.7484}{1.2868}
-& \ms{59.0656}{2.7372}
-& \ensuremath{\mathbf{37.6700}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.9800}}}
-& \ensuremath{\mathbf{80.9700}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{2.0200}}} \\
-\midrule
-Prithvi-WxC
-& \ms{0.0552}{0.0039} & \ms{7.1649}{0.6557} & \ms{20.1853}{1.8299}
-& \ms{22.3500}{3.4500} & \ms{65.2600}{1.0700} \\
-Aurora
-& \ms{0.0656}{0.0094} & \ms{8.5009}{1.9594} & \ms{23.1037}{4.9418}
-& \ms{30.8757}{0.1343} & \ms{71.7329}{0.0141} \\
-ClimaX
-& \ms{0.3480}{0.0754}
-& \ensuremath{\mathbf{29.7535}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{3.6073}}}
-& \ensuremath{\mathbf{60.1506}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{7.5865}}}
-& \ms{27.9853}{2.0532} & \ms{69.0634}{2.3832} \\
-StormCast
-& \ms{0.0626}{0.0119} & \ms{8.1951}{2.1895} & \ms{22.3817}{5.4294}
-& \ms{14.8387}{7.5791} & \ms{55.7568}{21.3003} \\
-DLWP
-& \ms{0.1693}{0.0419} & \ms{14.9148}{3.2446} & \ms{28.1901}{6.9658}
-& \ms{5.9335}{10.0712} & \ms{22.8587}{22.3750} \\
-FCN
-& \ms{0.2829}{0.0839} & \ms{19.5061}{3.3412} & \ms{40.0604}{9.3701}
-& \ms{3.1798}{2.6598} & \ms{15.6203}{12.4531} \\
-FengWu
-& \ms{0.2613}{0.0757} & \ms{12.0050}{6.0239} & \ms{24.1022}{13.6293}
-& \ms{5.5189}{9.0883} & \ms{18.4774}{22.4703} \\
-FuXi
-& \ms{0.3774}{0.1212} & \ms{21.0323}{4.8211} & \ms{37.2888}{9.4470}
-& \ms{19.9909}{2.1364} & \ms{56.1826}{3.0412} \\
-Pangu-Weather
-& \ms{0.2755}{0.1089} & \ms{17.0909}{4.0477} & \ms{35.6386}{9.0327}
-& \ms{11.2583}{11.0719} & \ms{32.5081}{25.4969} \\
-AlphaEarth
-& \ensuremath{\mathbf{2.0606}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.4404}}}
-& \ms{29.4476}{6.0064} & \ms{37.4286}{9.9458}
-& \ms{11.0995}{3.6088} & \ms{32.8316}{7.4634} \\
-\bottomrule
-\end{tabular}
-}
-\end{table}
-% \begin{figure}[H]
-%     \centering
-%     \includegraphics[width=\textwidth]{figures/fig_fireprone_contract_progression_compact.pdf}
-%     \caption{
-% \textbf{Matching-rule sensitivity in fire-prone occupancy (RQ1).}
-% Each row holds the score field \(S\), label field \(Y\), threshold, and \(\Omega\) fixed, and changes only \(\Lambda\).
-% Legend: \textcolor[HTML]{17375E}{$\blacksquare$} strict \(F_1\),
-% \textcolor[HTML]{4F8DCC}{$\blacksquare$} added \(F_1\) from spatial tolerance,
-% \textcolor[HTML]{BFD7F0}{$\blacksquare$} added \(F_1\) from union matching,
-% red outline \ourfm, and dashed line original weather FMs vs.\ added baselines.
-% The horizontal axis is \(F_1\) in percent.
-% }
-%     \label{fig:fireprone_contract_progression}
-% \end{figure}
-\begin{wrapfigure}[14]{r}{0.50\textwidth}
-    \centering
-    \vspace{-1em} \includegraphics[width=0.50\textwidth]{figures/fig_selection_regret_scatter.pdf}
-\caption{\textbf{Head-selection regret under fixed features (RQ2).}
-Each point is one backbone; selection regret \(\delta\) follows Section~\ref{sec:checks} under global-scope union-\(F_1\).}
-    \label{fig:selection_regret_diagnostic}
-    \vspace{-1.2em}
-\end{wrapfigure}
-% \begin{wrapfigure}[17]{r}{0.46\textwidth}
-% \vspace{-0.4em}
-% \centering
-% \resizebox{\linewidth}{!}{%
-% \begin{tikzpicture}[x=1cm,y=1cm]
-% \footnotesize
-% \draw[black!12, line width=0.35pt] (2.450,-0.350) -- (2.450,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (2.450,-0.410) {-20};
-% \draw[black!12, line width=0.35pt] (3.243,-0.350) -- (3.243,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (3.243,-0.410) {-10};
-% \draw[wfgray, line width=0.55pt] (4.036,-0.350) -- (4.036,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (4.036,-0.410) {0};
-% \draw[black!12, line width=0.35pt] (4.829,-0.350) -- (4.829,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (4.829,-0.410) {10};
-% \draw[black!12, line width=0.35pt] (5.621,-0.350) -- (5.621,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (5.621,-0.410) {20};
-% \draw[black!12, line width=0.35pt] (6.414,-0.350) -- (6.414,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (6.414,-0.410) {30};
-% \draw[black!12, line width=0.35pt] (7.207,-0.350) -- (7.207,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (7.207,-0.410) {40};
-% \draw[black!12, line width=0.35pt] (8.000,-0.350) -- (8.000,4.530);
-% \node[anchor=north, font=\scriptsize, text=black!70] at (8.000,-0.410) {50};
-% \draw[black!45, line width=0.4pt] (2.450,-0.350) -- (8.000,-0.350);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,4.350) {\textcolor{wfblue}{\textbf{\ourfm}}};
-% \draw[wfslate, line width=0.72pt] (4.030,4.220) -- (5.212,4.220);
-% \draw[wfslate, line width=0.72pt] (4.030,4.185) -- (4.030,4.255);
-% \draw[wfslate, line width=0.72pt] (5.212,4.185) -- (5.212,4.255);
-% \filldraw[wfslate] (4.621,4.220) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.051,4.480) -- (4.487,4.480);
-% \draw[wforange, line width=0.72pt] (4.051,4.445) -- (4.051,4.515);
-% \draw[wforange, line width=0.72pt] (4.487,4.445) -- (4.487,4.515);
-% \filldraw[wforange] (4.224,4.435) rectangle (4.314,4.525);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,3.940) {Prithvi-WxC};
-% \draw[wfslate, line width=0.72pt] (4.036,3.810) -- (4.036,3.810);
-% \draw[wfslate, line width=0.72pt] (4.036,3.775) -- (4.036,3.845);
-% \draw[wfslate, line width=0.72pt] (4.036,3.775) -- (4.036,3.845);
-% \filldraw[wfslate] (4.036,3.810) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.036,4.070) -- (4.036,4.070);
-% \draw[wforange, line width=0.72pt] (4.036,4.035) -- (4.036,4.105);
-% \draw[wforange, line width=0.72pt] (4.036,4.035) -- (4.036,4.105);
-% \filldraw[wforange] (3.991,4.025) rectangle (4.081,4.115);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,3.530) {Aurora};
-% \draw[wfslate, line width=0.72pt] (3.580,3.400) -- (5.276,3.400);
-% \draw[wfslate, line width=0.72pt] (3.580,3.365) -- (3.580,3.435);
-% \draw[wfslate, line width=0.72pt] (5.276,3.365) -- (5.276,3.435);
-% \filldraw[wfslate] (4.428,3.400) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (2.627,3.660) -- (7.723,3.660);
-% \draw[wforange, line width=0.72pt] (2.627,3.625) -- (2.627,3.695);
-% \draw[wforange, line width=0.72pt] (7.723,3.625) -- (7.723,3.695);
-% \filldraw[wforange] (5.130,3.615) rectangle (5.220,3.705);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,3.120) {ClimaX};
-% \draw[wfslate, line width=0.72pt] (4.032,2.990) -- (4.060,2.990);
-% \draw[wfslate, line width=0.72pt] (4.032,2.955) -- (4.032,3.025);
-% \draw[wfslate, line width=0.72pt] (4.060,2.955) -- (4.060,3.025);
-% \filldraw[wfslate] (4.046,2.990) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.036,3.250) -- (4.036,3.250);
-% \draw[wforange, line width=0.72pt] (4.036,3.215) -- (4.036,3.285);
-% \draw[wforange, line width=0.72pt] (4.036,3.215) -- (4.036,3.285);
-% \filldraw[wforange] (3.991,3.205) rectangle (4.081,3.295);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,2.710) {StormCast};
-% \draw[wfslate, line width=0.72pt] (4.036,2.580) -- (4.036,2.580);
-% \draw[wfslate, line width=0.72pt] (4.036,2.545) -- (4.036,2.615);
-% \draw[wfslate, line width=0.72pt] (4.036,2.545) -- (4.036,2.615);
-% \filldraw[wfslate] (4.036,2.580) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.036,2.840) -- (4.036,2.840);
-% \draw[wforange, line width=0.72pt] (4.036,2.805) -- (4.036,2.875);
-% \draw[wforange, line width=0.72pt] (4.036,2.805) -- (4.036,2.875);
-% \filldraw[wforange] (3.991,2.795) rectangle (4.081,2.885);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,2.300) {DLWP};
-% \draw[wfslate, line width=0.72pt] (4.036,2.170) -- (4.036,2.170);
-% \draw[wfslate, line width=0.72pt] (4.036,2.135) -- (4.036,2.205);
-% \draw[wfslate, line width=0.72pt] (4.036,2.135) -- (4.036,2.205);
-% \filldraw[wfslate] (4.036,2.170) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.044,2.430) -- (4.735,2.430);
-% \draw[wforange, line width=0.72pt] (4.044,2.395) -- (4.044,2.465);
-% \draw[wforange, line width=0.72pt] (4.735,2.395) -- (4.735,2.465);
-% \filldraw[wforange] (4.345,2.385) rectangle (4.435,2.475);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,1.890) {FCN};
-% \draw[wfslate, line width=0.72pt] (4.036,1.760) -- (4.036,1.760);
-% \draw[wfslate, line width=0.72pt] (4.036,1.725) -- (4.036,1.795);
-% \draw[wfslate, line width=0.72pt] (4.036,1.725) -- (4.036,1.795);
-% \filldraw[wfslate] (4.036,1.760) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (3.971,2.020) -- (4.286,2.020);
-% \draw[wforange, line width=0.72pt] (3.971,1.985) -- (3.971,2.055);
-% \draw[wforange, line width=0.72pt] (4.286,1.985) -- (4.286,2.055);
-% \filldraw[wforange] (4.083,1.975) rectangle (4.173,2.065);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,1.480) {FengWu};
-% \draw[wfslate, line width=0.72pt] (4.036,1.350) -- (4.036,1.350);
-% \draw[wfslate, line width=0.72pt] (4.036,1.315) -- (4.036,1.385);
-% \draw[wfslate, line width=0.72pt] (4.036,1.315) -- (4.036,1.385);
-% \filldraw[wfslate] (4.036,1.350) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.028,1.610) -- (4.127,1.610);
-% \draw[wforange, line width=0.72pt] (4.028,1.575) -- (4.028,1.645);
-% \draw[wforange, line width=0.72pt] (4.127,1.575) -- (4.127,1.645);
-% \filldraw[wforange] (4.032,1.565) rectangle (4.122,1.655);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,1.070) {FuXi};
-% \draw[wfslate, line width=0.72pt] (4.036,0.940) -- (4.036,0.940);
-% \draw[wfslate, line width=0.72pt] (4.036,0.905) -- (4.036,0.975);
-% \draw[wfslate, line width=0.72pt] (4.036,0.905) -- (4.036,0.975);
-% \filldraw[wfslate] (4.036,0.940) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.029,1.200) -- (4.087,1.200);
-% \draw[wforange, line width=0.72pt] (4.029,1.165) -- (4.029,1.235);
-% \draw[wforange, line width=0.72pt] (4.087,1.165) -- (4.087,1.235);
-% \filldraw[wforange] (4.013,1.155) rectangle (4.103,1.245);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,0.660) {Pangu-Weather};
-% \draw[wfslate, line width=0.72pt] (4.036,0.530) -- (4.036,0.530);
-% \draw[wfslate, line width=0.72pt] (4.036,0.495) -- (4.036,0.565);
-% \draw[wfslate, line width=0.72pt] (4.036,0.495) -- (4.036,0.565);
-% \filldraw[wfslate] (4.036,0.530) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (4.025,0.790) -- (4.076,0.790);
-% \draw[wforange, line width=0.72pt] (4.025,0.755) -- (4.025,0.825);
-% \draw[wforange, line width=0.72pt] (4.076,0.755) -- (4.076,0.825);
-% \filldraw[wforange] (4.006,0.745) rectangle (4.096,0.835);
-% \node[anchor=east, font=\scriptsize, text=black!82] at (2.320,0.250) {AlphaEarth};
-% \draw[wfslate, line width=0.72pt] (4.700,0.120) -- (6.103,0.120);
-% \draw[wfslate, line width=0.72pt] (4.700,0.085) -- (4.700,0.155);
-% \draw[wfslate, line width=0.72pt] (6.103,0.085) -- (6.103,0.155);
-% \filldraw[wfslate] (5.401,0.120) circle[radius=0.045];
-% \draw[wforange, line width=0.72pt] (3.872,0.380) -- (4.815,0.380);
-% \draw[wforange, line width=0.72pt] (3.872,0.345) -- (3.872,0.415);
-% \draw[wforange, line width=0.72pt] (4.815,0.345) -- (4.815,0.415);
-% \filldraw[wforange] (4.298,0.335) rectangle (4.388,0.425);
-% \end{tikzpicture}%
-% }
-% \caption{\textbf{Fixed-feature selection-regret check (RQ2).} Fixed-feature selection regret \(\delta = D(h_D)-D(h_R)\) under union-\(F_1\). \textcolor{wfslate}{$\bullet$} uses the full grid; \textcolor{wforange}{$\blacksquare$} uses the top \(20\%\) fire-prone cells from training fire frequency. Horizontal intervals use the same colors and show mean \(\pm\) std over five seeds, in percentage points.}
-% \vspace{+0.4em}
-% \label{fig:selection_regret_diagnostic}
-% \end{wrapfigure}
-\subsection{Head-Selection Sensitivity Under Fixed Features (RQ2)}
-\label{sec:rq2}
-To answer RQ2, we conduct a fixed-feature check on occupancy and fire spread tasks, holding the frozen feature source, $\mathcal{T}$, $\Omega$, $\Lambda$, and candidate head family $\mathcal{H} \subseteq \mathcal{A}$ fixed while varying only the selection metric between PR-AUC-based and decision-F1-based selection. The resulting selection regret $\delta = D(h_D) - D(h_R)$ measures the decision-score loss induced by metric misalignment. Occupancy results are reported in Figure~\ref{fig:selection_regret_diagnostic} under both global and fire-prone scopes. Full per-seed and per-head details are reported in Appendix~\ref{sec:app_seeded_audits}, and the exact, tolerated, and union regret breakdown is provided in Appendix Table~\ref{tab:appendix_selection_regret_tolerance}.
-The fixed-feature results show that head-selection metrics introduce substantial backbone-dependent variation that is not explained by representation quality alone. Some backbones exhibit near-zero regret, indicating agreement between PR-AUC and decision-F1 selection, while others show large regret concentrated in specific scope-matching settings. Regret is generally larger under the global scope, where severe fire imbalance amplifies misalignment between ranking and decision metrics~\cite{mcdermott2024aurocauprc}. Restricting evaluation to fire-prone scopes typically reduces regret by concentrating evaluation on fire-relevant regions. A similar pattern appears for fire spread, where ranking and decision metrics can favor different heads under the same frozen representation. These results show that selection metrics must be aligned with the evaluation objective as part of the evaluation contract~\cite{traub2024selectiveclassification}.
-\subsection{Supporting Task Checks (RQ3)}
-\label{sec:rq3}
-To answer RQ3, we evaluate all backbones across the four supporting task contracts, \textit{burned area, analog retrieval, smoke PM$_{2.5}$, }and\textit{ extreme heat}, and examine whether the reference-versus-frozen ordering established under primary tasks generalizes across task forms. A rank overview across all six contracts is provided in Figure~\ref{fig:task_comparator_normalized_map}, which maps backbone-by-task rank positions and makes cross-task ordering shifts directly visible. Native metric values are reported in Table~\ref{tab:supporting_results}. Additional supporting-task diagnostics are reported in Appendix Tables~\ref{tab:app_burned_area_median_acre}, \ref{tab:app_analog_rank_depth}, \ref{tab:app_smoke_high_event}, and~\ref{tab:app_heat_event_pr}.
-The supporting task results produce three qualitatively distinct patterns relative to the primary findings. Burned area largely preserves the reference-versus-frozen ordering seen under occupancy and spread: \ourfm\ leads frozen entries on log-RMSE and Spearman $\rho$, suggesting that the representational advantage of wildfire-specific pretraining generalizes to event-scale regression under a different metric family, providing convergent evidence for the primary claim. Analog retrieval and smoke PM$_{2.5}$ show a different pattern, with AlphaEarth matching \ourfm\ closely on both tasks while atmospheric FMs show near-zero correlation on smoke PM$_{2.5}$, indicating that retrieval and air-quality signals are captured comparably by a general remote-sensing backbone, and that the primary occupancy advantage does not extend uniformly to these task forms. Extreme heat exhibits the largest variance across the comparator set, with atmospheric FMs ranging from near-reference performance to near-complete failure depending on backbone pretraining domain, while AlphaEarth again matches \ourfm\ closely. The scale of this variance is itself informative: aggregating scores across task forms without respecting contract boundaries would produce rankings dominated by scale artifacts in the extreme heat block rather than by transfer quality. Taken together, these results establish that supporting tasks bound rather than extend the primary claim, they provide useful evidence about where backbone families generalize and where they do not, but they cannot substitute for primary decision task evaluation, and their results must within their own task-form contracts.
-\begin{figure}[t]
-    \centering
-\vspace{-5mm}
-    \includegraphics[width=\textwidth]{figures/fig_rank_heatmap1.pdf}
-    \vspace{-2mm}
-    \caption{{\textbf{Rank map for supporting task comparison (RQ4).} Each row fixes one task contract $\mathcal{C}$ and ranks the eligible backbones within that contract. The figure shows rank changes across task forms; native metric values are reported in Table~\ref{tab:supporting_results}.}}
-\vspace{-6mm}
-    \label{fig:task_comparator_normalized_map}
-\end{figure}
-\begin{table}[t]
-\centering
-\small
-\setlength{\tabcolsep}{3.5pt}
-\renewcommand{\arraystretch}{1.18}
-\caption{%
-  \textbf{Supporting task-metric matrix (RQ3).}
-  Top: final burned area and analog retrieval.
-  Bottom: smoke PM$_{2.5}$ and extreme heat.
-  Each block fixes $\mathcal{T}$, $\Lambda$, and $\Omega$; backbone
-  column is shared across paired tasks. \ourfm\ row is
-  separated by a rule as the empirical anchor. \textbf{Bold} marks
-  the best value per metric. For error metrics
-  lower is better ($\downarrow$); for $F_1$, nDCG, and $r$ higher
-  is better ($\uparrow$).
-}
-\label{tab:supporting_results}
-\resizebox{\textwidth}{!}{%
-\begin{tabular}{lcccccc}
-\toprule
-& \multicolumn{3}{c}{\textbf{Burned area}}
-& \multicolumn{3}{c}{\textbf{Analog retrieval}} \\
-\cmidrule(lr){2-4}\cmidrule(lr){5-7}
-\textbf{Backbone}
-& \textbf{log-RMSE$\downarrow$} & \textbf{log-MAE$\downarrow$}
-& \textbf{Spearman$\uparrow$}
-& \textbf{nDCG@10$\uparrow$} & \textbf{log-RMSE$\downarrow$}
-& \textbf{log-MAE$\downarrow$} \\
-\midrule
-\ourfm\
-& \ensuremath{\mathbf{1.1657}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0126}}}
-& \ensuremath{\mathbf{1.0423}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0081}}}
-& \ensuremath{\mathbf{0.6298}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0338}}}
-& \ensuremath{\mathbf{0.5099}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0336}}}
-& \ensuremath{\mathbf{1.1977}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.1029}}}
-& \ensuremath{\mathbf{1.0043}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0759}}} \\
-\midrule
-Prithvi-WxC
-& \ms{1.3630}{0.0681} & \ms{1.2435}{0.0668} & \ms{0.1799}{0.3002}
-& \ms{0.3857}{0.0189} & \ms{1.3908}{0.0938} & \ms{1.2585}{0.0865} \\
-Aurora
-& \ms{1.8658}{0.2009} & \ms{1.6717}{0.1245} & \ms{-0.1156}{0.2982}
-& \ms{0.4046}{0.0144} & \ms{1.3659}{0.0792} & \ms{1.2596}{0.0968} \\
-ClimaX
-& \ms{2.0300}{0.2103} & \ms{1.8443}{0.1528} & \ms{-0.2515}{0.2688}
-& \ms{0.4143}{0.0191} & \ms{1.4526}{0.0926} & \ms{1.2441}{0.1446} \\
-StormCast
-& \ms{1.6679}{0.1438} & \ms{1.4745}{0.1134} & \ms{0.1830}{0.1969}
-& \ms{0.4076}{0.0094} & \ms{1.3663}{0.0781} & \ms{1.2371}{0.1078} \\
-DLWP
-& \ms{1.3070}{0.0980} & \ms{1.1769}{0.0834} & \ms{0.4888}{0.1368}
-& \ms{0.3972}{0.0146} & \ms{1.5351}{0.0802} & \ms{1.3196}{0.0781} \\
-FCN
-& \ms{1.3693}{0.0885} & \ms{1.2599}{0.0723} & \ms{0.3484}{0.1662}
-& \ms{0.4316}{0.0134} & \ms{1.4604}{0.1035} & \ms{1.2351}{0.0586} \\
-FengWu
-& \ms{1.3715}{0.1011} & \ms{1.2604}{0.0820} & \ms{0.3221}{0.2004}
-& \ms{0.4246}{0.0237} & \ms{1.4179}{0.0986} & \ms{1.2233}{0.0915} \\
-FuXi
-& \ms{1.4068}{0.1011} & \ms{1.3023}{0.0789} & \ms{0.2663}{0.2561}
-& \ms{0.4279}{0.0212} & \ms{1.4290}{0.0929} & \ms{1.2236}{0.0961} \\
-Pangu-Weather
-& \ms{1.3280}{0.0735} & \ms{1.2081}{0.0607} & \ms{0.4141}{0.1573}
-& \ms{0.4017}{0.0245} & \ms{1.4235}{0.0731} & \ms{1.2225}{0.0847} \\
-AlphaEarth
-& \ms{2.4068}{0.2841} & \ms{2.0822}{0.2371} & \ms{-0.3428}{0.1716}
-& \ms{0.5086}{0.0440} & \ms{1.2158}{0.1310} & \ms{1.0350}{0.1018} \\
-\bottomrule
-\end{tabular}
-}
-\vspace{4pt}
-\resizebox{\textwidth}{!}{%
-\begin{tabular}{lcccccc}
-\toprule
-& \multicolumn{3}{c}{\textbf{Smoke PM$_{2.5}$}}
-& \multicolumn{3}{c}{\textbf{Extreme heat}} \\
-\cmidrule(lr){2-4}\cmidrule(lr){5-7}
-\textbf{Backbone}
-& \textbf{RMSE$\downarrow$} & \textbf{MAE$\downarrow$}
-& \textbf{Pearson $r\uparrow$}
-& \textbf{RMSE-C$\downarrow$} & \textbf{MAE-C$\downarrow$}
-& \textbf{Exceed.\ $F_1\uparrow$} \\
-\midrule
-\ourfm\
-& \ms{4.4646}{0.0060}
-& \ms{2.4108}{0.0016}
-& \ensuremath{\mathbf{0.6368}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0013}}}
-& \ensuremath{\mathbf{0.2179}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0043}}}
-& \ensuremath{\mathbf{0.1787}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0018}}}
-& \ms{0.9541}{0.0164} \\
-\midrule
-Prithvi-WxC
-& \ms{6.0382}{0.0828} & \ms{3.7301}{0.0055} & \ms{0.0243}{0.0045}
-& \ms{4.6225}{0.0192} & \ms{2.6315}{0.0128} & \ms{0.8693}{0.0023} \\
-Aurora
-& \ms{6.0384}{0.0828} & \ms{3.7265}{0.0055} & \ms{0.0193}{0.0043}
-& \ms{18.0474}{0.0708} & \ms{15.3747}{0.0594} & \ms{0.0951}{0.0038} \\
-ClimaX
-& \ms{6.0402}{0.0828} & \ms{3.7290}{0.0055} & \ms{0.0004}{0.0029}
-& \ms{17.6492}{0.0347} & \ms{14.4938}{0.0319} & \ms{0.7684}{0.0068} \\
-StormCast
-& \ms{6.1230}{0.0830} & \ms{3.8182}{0.0073} & \ms{0.0183}{0.0041}
-& \ms{1.7671}{0.2145} & \ms{1.3507}{0.1576} & \ms{0.9073}{0.0189} \\
-DLWP
-& \ms{5.9289}{0.1031} & \ms{3.7331}{0.0088} & \ms{0.0303}{0.0060}
-& \ms{2.2662}{0.1106} & \ms{1.7153}{0.0748} & \ms{0.9156}{0.0112} \\
-FCN
-& \ms{5.9277}{0.1033} & \ms{3.7345}{0.0088} & \ms{0.0312}{0.0062}
-& \ms{2.1657}{0.1800} & \ms{1.6033}{0.1039} & \ms{0.9257}{0.0096} \\
-FengWu
-& \ms{5.9297}{0.1032} & \ms{3.7395}{0.0088} & \ms{0.0304}{0.0063}
-& \ms{2.1266}{0.1589} & \ms{1.5801}{0.1004} & \ms{0.0481}{0.0459} \\
-FuXi
-& \ms{5.9319}{0.1029} & \ms{3.7398}{0.0088} & \ms{0.0299}{0.0061}
-& \ms{2.1282}{0.0969} & \ms{1.5759}{0.0719} & \ms{0.2268}{0.0623} \\
-Pangu-Weather
-& \ms{5.9270}{0.1036} & \ms{3.7320}{0.0088} & \ms{0.0301}{0.0060}
-& \ms{2.2045}{0.1483} & \ms{1.6307}{0.0889} & \ms{0.0199}{0.0062} \\
-AlphaEarth
-& \ensuremath{\mathbf{4.4403}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0488}}}
-& \ensuremath{\mathbf{2.3992}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0056}}}
-& \ms{0.6347}{0.0066}
-& \ms{0.2194}{0.0039}
-& \ms{0.1800}{0.0014}
-& \ensuremath{\mathbf{0.9542}{\mkern1mu}_{\scriptscriptstyle \boldsymbol{\pm}\mathbf{0.0107}}} \\
-\bottomrule
-\end{tabular}
-}
-\end{table}
-\paragraph{Pattern 1: primary pattern preserved (burned area).}
-\ourfm\ leads all frozen entries on log-RMSE and Spearman $\rho$. The ordering observed under occupancy and spread is preserved under burned-area regression despite the different prediction unit and metric family.
-\paragraph{Pattern 2: primary pattern bounded (analog retrieval and smoke PM$_{2.5}$).}
-For analog retrieval, AlphaEarth matches \ourfm\ (nDCG@10 $= 0.51 \pm 0.04$ vs.\ $0.51 \pm 0.03$). For smoke PM$_{2.5}$, AlphaEarth also matches \ourfm\ on MAE and Pearson $r$, while atmospheric Earth FMs show near-zero correlation. These results show that the occupancy-and-spread ordering does not fully extend to all supporting tasks once AlphaEarth is included.
-\paragraph{Pattern 3: primary pattern bounded with large variance (extreme heat).} AlphaEarth matches \ourfm on RMSE-C and remains close on exceedance F1, while atmospheric FMs range from RMSE-C $= 1.77$ (StormCast) to $18.05$ (Aurora). This large spread indicates that aggregated scores across task forms would be dominated by scale artifacts rather than transfer quality, reinforcing the need for per-contract reporting established in Section~\ref{sec:eval}.
-\textit{Answer to RQ3:} Figure~\ref{fig:task_comparator_normalized_map} and Table~\ref{tab:supporting_results} show that burned area preserves the primary reference-versus-frozen pattern under a different metric family. Analog retrieval, smoke PM$_{2.5}$, and extreme heat bound this pattern: AlphaEarth matches or approaches \ourfm on these tasks, indicating that the primary occupancy and spread claims do not extend uniformly across all task forms.

paper/sections/5_conclusion.tex DELETED Viewed

@@ -1,31 +0,0 @@
-\vspace{-4em}
-\section{Conclusion}
-We introduced \ourfm, the first foundation model pretrained
-specifically for wildfire prediction using fire-relevant
-multimodal data. Our results show that wildfire forecasting
-requires representations aligned with wildfire dynamics rather
-than transfer alone from general atmospheric or geophysical
-pretraining.
-At the same time, our study shows that reliable wildfire
-transfer evaluation is substantially more difficult than
-standard benchmark settings suggest. Wildfire transfer
-conclusions depend strongly on matching rules, head-selection
-metrics, and task form, and scores computed under different
-evaluation settings are not directly comparable. These effects
-become particularly pronounced in sparse spatiotemporal
-prediction settings such as wildfire forecasting.
-We therefore introduced a fixed-contract evaluation framework
-for wildfire Earth-FM transfer. By explicitly specifying the
-task, metric, matching rule, evaluation scope, and head family
-before comparison, fixed-contract evaluation enables more
-controlled and interpretable comparison across wildfire tasks
-and models.
-We hope \ourfm\ and the fixed-contract framework provide a
-foundation for future wildfire-specific Earth FMs, transfer
-benchmarks, and decision-oriented evaluation protocols.
-More broadly, our research provides a reliable system to guide real-world intervention and resource allocation at the intersection of AI for environmental decision-making.
-\paragraph{Limitations.} The conclusions apply to the task forms, scopes, evaluation rules, and comparator eligibility decisions used in this study.
-The evaluation covers selected wildfire decision tasks and supporting retrieval and regression task forms.
-They provide task-form evidence rather than a single score across all wildfire-related prediction tasks.

paper/sections/appendix.tex DELETED Viewed

@@ -1,733 +0,0 @@
-% ============================================================
-%  APPENDIX
-% ============================================================
-\appendix
-% Copy-paste safety: these definitions are no-ops when main.tex already defines them.
-\providecommand{\ms}[2]{\ensuremath{#1{\mkern1mu}_{\scriptscriptstyle \pm #2}}}
-% ────────────────────────────────────────────────────────────
-%  TABLE OF CONTENTS (appendix only)
-% ────────────────────────────────────────────────────────────
-\section*{Appendix Contents}
-\addcontentsline{toc}{section}{Appendix Contents}
-\begin{center}
-\begin{tabular}{@{}p{0.82\textwidth}r@{}}
-\textbf{A\quad Evaluation Contract Specifications} & \pageref{sec:app_contract} \\
-\quad A.1\enspace Matching Rule Definitions        & \pageref{sec:app_contract_matching} \\
-\quad A.2\enspace Task-Form Contract Parameters    & \pageref{sec:app_contract_params} \\
-\quad A.3\enspace Evaluation Scope Definitions     & \pageref{sec:app_contract_scope} \\[4pt]
-\textbf{B\quad Controlled Check Details}           & \pageref{sec:app_checks} \\
-\quad B.1\enspace Fixed-Output Check: Full Sweep   & \pageref{sec:app_checks_output} \\
-\quad B.2\enspace Fixed-Feature Check: Selection Summary & \pageref{sec:app_checks_feature} \\
-\quad B.3\enspace Selection Regret Under Matching Rules & \pageref{sec:app_checks_regret} \\
-\quad B.4\enspace Additional Value Tables & \pageref{sec:app_checks_values} \\[4pt]
-\textbf{C\quad Comparator Eligibility Notes}       & \pageref{sec:comparator_audit} \\[4pt]
-\textbf{D\quad Seeded Audits}                      & \pageref{sec:app_seeded_audits} \\
-\quad D.1\enspace Seed Robustness Summary          & \pageref{sec:app_seed_robustness} \\[4pt]
-\textbf{E\quad Lightweight Head and Adaptation Details} & \pageref{sec:app_heads} \\[4pt]
-\textbf{F\quad Limitations}                        & \pageref{sec:limitations} \\[4pt]
-\textbf{G\quad Reproducibility and Evaluation Artifacts} & \pageref{sec:repro_compute_impact} \\
-\end{tabular}
-\end{center}
-\noindent\textit{Retention rule.}
-Appendix tables are retained when they add contract parameters, controlled-check arithmetic,
-task-specific non-main metrics, seed summaries, eligibility checks, or protocol details.
-Full task matrices and reference-summary tables that repeat the main result tables are not repeated here.
-\clearpage
-% ============================================================
-%  A  EVALUATION CONTRACT SPECIFICATIONS
-% ============================================================
-\section{Evaluation Contract Specifications}
-\label{sec:app_contract}
-% ────────────────────────────────────────────────────────────
-\subsection{Matching Rule Definitions}
-\label{sec:app_contract_matching}
-The three matching rules used across occupancy task forms are defined as follows.
-\noindent\textbf{Exact matching.}
-A predicted unit-time pair $(i,t) \in \widehat{P}_\tau$ is counted as a true positive if and only if the same pair appears in the observed fire set $P = \{(i,t): y_{i,t}=1\}$.
-This is the strictest rule and yields the lowest $F_1$ for any fixed output.
-\noindent\textbf{Tolerated matching.}
-A predicted pair $(i,t)$ is counted as correct if there exists an observed pair $(i',t') \in P$ such that $\|i - i'\|_\infty \le k$ and $|t - t'| \le \Delta t$, where $k$ is the spatial tolerance in grid cells and $\Delta t$ is the temporal tolerance in forecast steps.
-Both parameters are fixed as part of the evaluation contract $\mathcal{C}$ before scoring.
-\noindent\textbf{Union matching.}
-A predicted pair is counted as a true positive if it satisfies either exact or tolerated matching.
-The resulting union-$F_1$ provides an upper bound on decision performance under the chosen tolerance.
-\noindent\textbf{Fixed parameter values.}
-For occupancy, the spatial tolerance is $k=8$ grid cells.
-The temporal tolerance is $\Delta t=3$ forecast steps for union matching and $\Delta t=0$ for spatial-only tolerance.
-The threshold $\tau$ is selected on validation strict-$F_1$ before test scoring.
-For fire spread, the spatial tolerance is $k=4$ grid cells, $\Delta t=0$, and the threshold is selected on validation spatial $F_1$.
-\noindent Table~\ref{tab:app_matching_rule_params} records the fixed matching-rule parameters.
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{10pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Matching-rule values used in the evaluation contracts.}
-\label{tab:app_matching_rule_params}
-\begin{tabular}{lll}
-\toprule
-\textbf{Parameter} & \textbf{Occupancy} & \textbf{Fire spread} \\
-\midrule
-\(k\) & 8 cells & 4 cells \\
-\(\Delta t\) & 3 for union; 0 spatial-only & 0 \\
-\(\tau\) & val. strict \(F_1\) & val. spatial \(F_1\) \\
-\bottomrule
-\end{tabular}
-\end{table}
-% ────────────────────────────────────────────────────────────
-\subsection{Task-Form Contract Parameters}
-\label{sec:app_contract_params}
-Table~\ref{tab:app_contract_params_full} lists fixed scoring values not shown in the main contract map.
-\begin{table}[h]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3.5pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Fixed scoring values used by each task-form contract.}
-\label{tab:app_contract_params_full}
-\begin{adjustbox}{max width=\textwidth}
-\begin{tabular}{llll}
-\toprule
-\textbf{\(\mathcal{T}\)} & \textbf{Scoring} & \textbf{Validation} & \textbf{\(\Omega\)} \\
-\midrule
-Occupancy & \(k=8,\Delta t=3\); exact/tol./union \(F_1\) & val. strict \(F_1\) & global; top-5/10/20\% fire-prone \\
-Fire spread & \(k=4,\Delta t=0\); exact/spatial \(F_1\), AP & val. spatial \(F_1\) & spread-region cells \\
-Final burned area & log-RMSE, log-MAE, Spearman \(\rho\) & val. log-RMSE & test events \\
-Analog retrieval & nDCG@10; retrieved-event log error & val. nDCG@10 & test events \\
-Smoke PM\(_{2.5}\) & RMSE, MAE, Pearson \(r\); exceedance 35 & val. RMSE & test stations \\
-Extreme heat & RMSE-C, MAE-C, exceedance \(F_1\) & val. threshold 27/30/33\(^{\circ}\)C & heat-region stations \\
-\bottomrule
-\end{tabular}
-\end{adjustbox}
-\end{table}
-% ────────────────────────────────────────────────────────────
-\subsection{Evaluation Scope Definitions}
-\label{sec:app_contract_scope}
-\noindent\textbf{Global scope.}
-Evaluation covers all spatial units in the domain, including fire-inactive regions.
-This scope can mask model differences on fire-relevant locations because inactive cells inflate true-negative counts.
-\noindent\textbf{Fire-prone scope.}
-Evaluation is restricted to grid cells in the top-$k$\% of historical fire activity.
-We report results for top-5\%, top-10\%, and top-20\% cutoffs.
-The cutoff thresholds are derived from the training period and held fixed at test time.
-\noindent\textbf{Spread region scope.}
-For fire spread tasks, evaluation is restricted to the predicted and observed burned raster patches.
-Only cells within the union of $\widehat{B}$ and $B$ contribute to metric computation.
-\noindent\textbf{Fixed scope sizes.}
-The global scope contains 8,085,000 test cells.
-The fire-prone top-5\%, top-10\%, and top-20\% scopes contain 404,280, 808,560, and 1,617,000 test cells, respectively.
-The spread-region scope is event-specific and uses the union of $\widehat{B}$ and $B$.
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{8pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Scope values used in the evaluation contracts.}
-\label{tab:app_scope_params}
-\begin{tabular}{lcc}
-\toprule
-\textbf{\(\Omega\)} & \textbf{Definition} & \textbf{Units} \\
-\midrule
-Global & full domain & 8,085,000 test cells \\
-Fire-prone top-5\% & top 5\% by training-period fire frequency & 404,280 test cells \\
-Fire-prone top-10\% & top 10\% by training-period fire frequency & 808,560 test cells \\
-Fire-prone top-20\% & top 20\% by training-period fire frequency & 1,617,000 test cells \\
-Spread region & union of \(\widehat{B}\) and \(B\) & event-specific cells \\
-\bottomrule
-\end{tabular}
-\end{table}
-\clearpage
-% ============================================================
-%  B  CONTROLLED CHECK DETAILS
-% ============================================================
-\section{Controlled Check Details}
-\label{sec:app_checks}
-\begin{figure}[t]
-    \centering
-    \includegraphics[width=\textwidth]{figures/fig_fireprone_contract_progression_compact.pdf}
-    \caption{
-\textbf{Matching-rule sensitivity in fire-prone occupancy (RQ1).}
-Each row holds the score field \(S\), label field \(Y\), threshold, and \(\Omega\) fixed, and changes only \(\Lambda\).
-Legend: \textcolor[HTML]{17375E}{$\blacksquare$} strict \(F_1\),
-\textcolor[HTML]{4F8DCC}{$\blacksquare$} added \(F_1\) from spatial tolerance,
-\textcolor[HTML]{BFD7F0}{$\blacksquare$} added \(F_1\) from union matching,
-red outline \ourfm, and dashed line original weather FMs vs.\ added baselines.
-The horizontal axis is \(F_1\) in percent.
-}
-    \label{fig:fireprone_contract_progression}
-\end{figure}
-% ────────────────────────────────────────────────────────────
-\subsection{Fixed-Output Check: Full Sweep}
-\label{sec:app_checks_output}
-The fixed-output check holds the score field $S$ and label field $Y$ fixed and varies only $\Lambda$.
-Table~\ref{tab:fireprone_contract_progression} reports the full global and fire-prone sweep for all retained backbones.
-The same table is the numeric counterpart to Figure~\ref{fig:fireprone_contract_progression}.
-\begin{table*}[t]
-    \centering
-    \scriptsize
-    \setlength{\tabcolsep}{4pt}
-    \caption{Occupancy \(F_1\) scores across global and fire-prone scopes. Global uses the full validation/test domain; top-\(k\) rows use train-defined fire-prone masks from historical fire frequency. Values are percentages from the same validation-selected strict threshold. Tolerance is spatial-only; union adds temporal and spatial matching. \(\Delta\) is union minus strict. Cells report five-seed mean with std in small type.}
-    \label{tab:fireprone_contract_progression}
-    \begin{tabular}{@{}llcccc@{}}
-        \toprule
-        Backbone & \(\Omega\) & Strict \(F_1\uparrow\) & Tol.\ \(F_1\uparrow\) & Union \(F_1\uparrow\) & \(\Delta\) \(\uparrow\) \\
-        \midrule
-        \ourfm & global & \ms{0.4546}{0.1412} & \ms{29.7484}{1.2868} & \ms{59.0656}{2.7372} & \ms{58.6109}{2.6945} \\
-         & top 5\% & \ms{3.5604}{0.8809} & \ms{39.2617}{1.4011} & \ms{72.8280}{2.5784} & \ms{69.2676}{1.9960} \\
-         & top 10\% & \ms{3.5575}{0.8799} & \ms{39.1665}{1.3906} & \ms{72.5204}{2.5670} & \ms{68.9629}{1.9888} \\
-         & top 20\% & \ms{3.5300}{0.8700} & \ms{38.2849}{1.2952} & \ms{69.7228}{2.4664} & \ms{66.1928}{1.9273} \\
-        \addlinespace[1pt]
-        Prithvi-WxC & global & \ms{0.0552}{0.0039} & \ms{7.1649}{0.6557} & \ms{20.1853}{1.8299} & \ms{20.1301}{1.8297} \\
-         & top 5\% & \ms{1.4119}{1.1635} & \ms{19.2636}{4.5019} & \ms{42.5793}{4.5495} & \ms{41.1674}{3.4846} \\
-         & top 10\% & \ms{1.2376}{1.3201} & \ms{14.8780}{8.4429} & \ms{32.6913}{13.2085} & \ms{31.4536}{11.9053} \\
-         & top 20\% & \ms{1.1520}{1.3770} & \ms{13.1512}{9.4556} & \ms{28.1319}{15.2866} & \ms{26.9800}{13.9224} \\
-        \addlinespace[1pt]
-        Aurora & global & \ms{0.0656}{0.0094} & \ms{8.5009}{1.9594} & \ms{23.1037}{4.9418} & \ms{23.0382}{4.9325} \\
-         & top 5\% & \ms{0.9859}{0.9299} & \ms{15.1337}{6.0821} & \ms{35.4834}{11.0192} & \ms{34.4975}{10.3728} \\
-         & top 10\% & \ms{0.7790}{1.0453} & \ms{12.7381}{6.5558} & \ms{30.5305}{10.8842} & \ms{29.7515}{9.8656} \\
-         & top 20\% & \ms{0.6655}{1.1043} & \ms{10.5304}{7.4309} & \ms{24.9444}{12.5844} & \ms{24.2790}{11.4943} \\
-        \addlinespace[1pt]
-        ClimaX & global & \ms{0.3480}{0.0754} & \ms{29.7535}{3.6073} & \ms{60.1506}{7.5865} & \ms{59.8026}{7.5454} \\
-         & top 5\% & \ms{1.2937}{0.1086} & \ms{34.5791}{2.3772} & \ms{69.2186}{5.7215} & \ms{67.9249}{5.7263} \\
-         & top 10\% & \ms{1.2522}{0.1602} & \ms{34.3341}{2.2852} & \ms{68.5713}{5.5377} & \ms{67.3191}{5.5538} \\
-         & top 20\% & \ms{1.0287}{0.2686} & \ms{30.2140}{4.2857} & \ms{60.0650}{7.5674} & \ms{59.0363}{7.5891} \\
-        \addlinespace[1pt]
-        StormCast & global & \ms{0.0626}{0.0119} & \ms{8.1951}{2.1895} & \ms{22.3817}{5.4294} & \ms{22.3191}{5.4178} \\
-         & top 5\% & \ms{0.9573}{0.8011} & \ms{15.3219}{5.5337} & \ms{36.1857}{9.7331} & \ms{35.2284}{9.1816} \\
-         & top 10\% & \ms{0.7284}{0.9280} & \ms{12.6669}{6.3290} & \ms{30.4748}{10.6527} & \ms{29.7464}{9.7494} \\
-         & top 20\% & \ms{0.5795}{0.9104} & \ms{10.4157}{7.3437} & \ms{24.6598}{12.3973} & \ms{24.0803}{11.4988} \\
-        \addlinespace[1pt]
-        DLWP & global & \ms{0.1693}{0.0419} & \ms{14.9148}{3.2446} & \ms{28.1901}{6.9658} & \ms{28.0208}{6.9257} \\
-         & top 5\% & \ms{1.8054}{0.4835} & \ms{31.7231}{3.2923} & \ms{55.4596}{5.2920} & \ms{53.6542}{5.4752} \\
-         & top 10\% & \ms{1.6110}{0.5999} & \ms{27.6581}{5.9216} & \ms{47.1269}{8.0111} & \ms{45.5158}{7.7927} \\
-         & top 20\% & \ms{1.5248}{0.8987} & \ms{20.9403}{4.7971} & \ms{34.9301}{7.8471} & \ms{33.4054}{7.8760} \\
-        \addlinespace[1pt]
-        FCN & global & \ms{0.2829}{0.0839} & \ms{19.5061}{3.3412} & \ms{40.0604}{9.3701} & \ms{39.7775}{9.3423} \\
-         & top 5\% & \ms{1.6231}{0.5064} & \ms{29.3769}{2.7626} & \ms{54.3033}{7.4089} & \ms{52.6801}{7.4389} \\
-         & top 10\% & \ms{1.1777}{0.5118} & \ms{22.4217}{3.9803} & \ms{43.4510}{9.2513} & \ms{42.2734}{9.0251} \\
-         & top 20\% & \ms{0.9962}{0.4315} & \ms{16.9792}{3.9371} & \ms{34.0859}{8.2616} & \ms{33.0897}{7.9275} \\
-        \addlinespace[1pt]
-        FengWu & global & \ms{0.2613}{0.0757} & \ms{12.0050}{6.0239} & \ms{24.1022}{13.6293} & \ms{23.8410}{13.5736} \\
-         & top 5\% & \ms{1.5695}{0.3592} & \ms{16.2763}{3.7024} & \ms{30.1055}{5.0103} & \ms{28.5360}{4.7696} \\
-         & top 10\% & \ms{1.2427}{0.5333} & \ms{12.9503}{5.6052} & \ms{24.1854}{8.6854} & \ms{22.9427}{8.1863} \\
-         & top 20\% & \ms{1.1192}{0.5023} & \ms{11.9508}{5.0745} & \ms{22.7860}{7.9115} & \ms{21.6668}{7.4438} \\
-        \addlinespace[1pt]
-        FuXi & global & \ms{0.3774}{0.1212} & \ms{21.0323}{4.8211} & \ms{37.2888}{9.4470} & \ms{36.9114}{9.4327} \\
-         & top 5\% & \ms{2.0307}{0.6800} & \ms{31.8944}{4.7331} & \ms{53.9308}{8.3822} & \ms{51.9001}{8.6878} \\
-         & top 10\% & \ms{1.6542}{0.7316} & \ms{24.0128}{5.7784} & \ms{40.2140}{9.9307} & \ms{38.5597}{9.7744} \\
-         & top 20\% & \ms{1.3646}{0.6773} & \ms{21.9548}{5.8601} & \ms{36.7314}{10.0289} & \ms{35.3668}{9.9223} \\
-        \addlinespace[1pt]
-        Pangu-Weather & global & \ms{0.2755}{0.1089} & \ms{17.0909}{4.0477} & \ms{35.6386}{9.0327} & \ms{35.3630}{9.0774} \\
-         & top 5\% & \ms{1.3656}{0.3064} & \ms{22.2222}{6.8613} & \ms{43.4234}{13.2383} & \ms{42.0578}{13.0599} \\
-         & top 10\% & \ms{1.0931}{0.3535} & \ms{18.9337}{5.9329} & \ms{38.5325}{11.7221} & \ms{37.4394}{11.5261} \\
-         & top 20\% & \ms{0.8844}{0.3601} & \ms{17.0172}{5.4859} & \ms{34.5688}{10.2932} & \ms{33.6844}{10.1334} \\
-        \addlinespace[1pt]
-        AlphaEarth & global & \ms{2.0606}{0.4404} & \ms{29.4476}{6.0064} & \ms{37.4286}{9.9458} & \ms{35.3679}{10.0271} \\
-         & top 5\% & \ms{6.9133}{0.8450} & \ms{42.8790}{4.6087} & \ms{51.7449}{8.7321} & \ms{44.8315}{9.0763} \\
-         & top 10\% & \ms{6.6366}{0.9901} & \ms{41.8981}{5.9454} & \ms{50.5712}{10.0057} & \ms{43.9346}{9.9156} \\
-         & top 20\% & \ms{6.1908}{1.1330} & \ms{38.8325}{7.4966} & \ms{46.3833}{12.1697} & \ms{40.1925}{11.6788} \\
-        \bottomrule
-    \end{tabular}
-\end{table*}
-% ────────────────────────────────────────────────────────────
-\subsection{Fixed-Feature Check: Selection Summary}
-\label{sec:app_checks_feature}
-The paper appendix keeps the fixed-feature result at the selection-summary level.
-The full per-head rows are retained in the supplementary CSV files and are not repeated as a manuscript table because many degenerate heads produce identical zero decision scores.
-The supplementary selection rows report the decision-score loss after changing only the head-selection metric.
-% ────────────────────────────────────────────────────────────
-\subsection{Selection Regret Under Matching Rules}
-\label{sec:app_checks_regret}
-The fixed-feature check trains the same head family $\mathcal{H}$ on a fixed feature source and changes only the selection metric.
-Table~\ref{tab:appendix_selection_regret_tolerance} reports the same selection comparison under exact, tolerated, and union matching.
-Here, \(h_R\) is selected by PR-AUC and \(h_D\) is selected by the decision metric.
-The reported regret is \(D(h_D)-D(h_R)\).
-Exact zero entries mean the two selectors give the same decision score for all five seeds.
-\begin{table*}[!t]
-    \centering
-    \scriptsize
-    \setlength{\tabcolsep}{4pt}
-    \caption{Selection-regret values under exact, tolerated, and union matching. Values are percentage-point regret from selecting \(h_R\) by PR-AUC instead of \(h_D\) by the decision metric. Rows report mean with small std over five seeds; \(0.0000\) denotes exact zero regret.}
-    \label{tab:appendix_selection_regret_tolerance}
-    \begin{adjustbox}{max width=\textwidth}
-    \begin{tabular}{llccc}
-        \toprule
-        \textbf{Feature} & \textbf{\(\Omega\)} & \textbf{Exact regret} & \textbf{Tolerated regret} & \textbf{Union regret} \\
-        \midrule
-        \ourfm & global & 0.0000 & \ms{8.7830}{9.6705} & \ms{8.7830}{9.6705} \\
-        \ourfm & fire-prone & 0.0000 & \ms{3.4027}{3.2045} & \ms{3.4027}{3.2045} \\
-        Prithvi-WxC & global & 0.0000 & 0.0000 & 0.0000 \\
-        Prithvi-WxC & fire-prone & 0.0000 & 0.0000 & 0.0000 \\
-        Aurora & global & \ms{0.0200}{0.0267} & \ms{9.8520}{12.9878} & \ms{9.8520}{12.9878} \\
-        Aurora & fire-prone & \ms{0.8203}{1.8341} & \ms{14.3919}{32.1219} & \ms{14.3919}{32.1219} \\
-        ClimaX & global & \ms{0.0003}{0.0004} & \ms{0.1296}{0.1775} & \ms{0.1296}{0.1775} \\
-        ClimaX & fire-prone & 0.0000 & 0.0000 & 0.0000 \\
-        StormCast & global & 0.0000 & 0.0000 & 0.0000 \\
-        StormCast & fire-prone & 0.0000 & 0.0000 & 0.0000 \\
-        DLWP & global & 0.0000 & 0.0000 & 0.0000 \\
-        DLWP & fire-prone & \ms{0.0770}{0.1100} & \ms{4.3266}{4.3323} & \ms{4.3266}{4.3323} \\
-        FCN & global & 0.0000 & 0.0000 & 0.0000 \\
-        FCN & fire-prone & \ms{0.0006}{0.0013} & \ms{1.1680}{1.9872} & \ms{1.1680}{1.9872} \\
-        FengWu & global & 0.0000 & 0.0000 & 0.0000 \\
-        FengWu & fire-prone & \ms{0.0691}{0.1191} & \ms{0.5222}{0.6239} & \ms{0.5222}{0.6239} \\
-        FuXi & global & 0.0000 & 0.0000 & 0.0000 \\
-        FuXi & fire-prone & 0.0000 & \ms{0.1084}{0.1729} & \ms{0.1084}{0.1729} \\
-        Pangu-Weather & global & 0.0000 & 0.0000 & 0.0000 \\
-        Pangu-Weather & fire-prone & \ms{0.0728}{0.1179} & \ms{0.1849}{0.3263} & \ms{0.1849}{0.3263} \\
-        AlphaEarth & global & 0.0000 & \ms{17.2217}{8.8492} & \ms{17.2217}{8.8492} \\
-        AlphaEarth & fire-prone & 0.0000 & \ms{3.8804}{5.9483} & \ms{3.8804}{5.9483} \\
-        \bottomrule
-    \end{tabular}
-    \end{adjustbox}
-\end{table*}
-% ───────────────────────────────────────────────────────────���
-\subsection{Additional Value Tables}
-\label{sec:app_checks_values}
-Table~\ref{tab:app_occupancy_ppr_scope}
-reports the predicted-positive rate behind the occupancy \(F_1\) sweep.
-\begin{table*}[t]
-\centering
-\small
-\setlength{\tabcolsep}{4pt}
-\renewcommand{\arraystretch}{1.18}
-\caption{For fixed occupancy \(\mathcal{T}\), this table reports predicted-positive rate.
-Values are percentages under the same validation-selected strict threshold.
-Scopes \(\Omega\) are fixed before test scoring; cells report five-seed mean with std in small type.}
-\label{tab:app_occupancy_ppr_scope}
-\begin{tabular}{lcccc}
-\toprule
-\textbf{Backbone} & \textbf{\(\Omega=\)global} & \textbf{\(\Omega=\)top 5\%} & \textbf{\(\Omega=\)top 10\%} & \textbf{\(\Omega=\)top 20\%} \\
-\midrule
-\ourfm & \ms{1.6808}{0.3684} & \ms{3.0619}{1.0925} & \ms{1.5310}{0.5463} & \ms{0.7655}{0.2732} \\
-Prithvi-WxC & \ms{61.9711}{30.9101} & \ms{57.4117}{47.8987} & \ms{58.4565}{51.0897} & \ms{58.9788}{52.6991} \\
-Aurora & \ms{55.5849}{19.7524} & \ms{57.2238}{35.3400} & \ms{68.7942}{37.6958} & \ms{67.2891}{38.3991} \\
-ClimaX & \ms{5.6763}{3.9261} & \ms{24.0091}{9.2816} & \ms{11.8450}{4.5067} & \ms{5.7442}{4.1341} \\
-StormCast & \ms{60.6507}{17.4895} & \ms{57.6017}{35.2921} & \ms{68.0766}{37.3899} & \ms{67.8397}{39.2410} \\
-DLWP & \ms{4.3221}{1.5619} & \ms{9.4001}{5.0807} & \ms{4.9700}{3.6849} & \ms{1.9198}{1.4678} \\
-FCN & \ms{1.5202}{1.3446} & \ms{4.7856}{2.9409} & \ms{2.7257}{1.6353} & \ms{0.8368}{0.2358} \\
-FengWu & \ms{0.4277}{0.4830} & \ms{0.6004}{0.3041} & \ms{0.2609}{0.1935} & \ms{0.1501}{0.1206} \\
-FuXi & \ms{0.4505}{0.2773} & \ms{2.9315}{2.6392} & \ms{0.5197}{0.6074} & \ms{0.3621}{0.4346} \\
-Pangu-Weather & \ms{1.0801}{1.1308} & \ms{2.0549}{2.1893} & \ms{1.4029}{1.4739} & \ms{1.0103}{1.1084} \\
-AlphaEarth & \ms{0.0691}{0.0499} & \ms{0.2826}{0.1497} & \ms{0.1524}{0.0770} & \ms{0.0656}{0.0414} \\
-\bottomrule
-\end{tabular}
-\end{table*}
-Tables~\ref{tab:app_spread_ap_by_scope}--\ref{tab:app_heat_event_pr}
-report additional values that are not repeated in the main tables.
-Each table fixes the task \(\mathcal{T}\) and reports either a different \(\Omega\), metric, or event subset.
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed spread \(\mathcal{T}\) and strict \(\Lambda\), this table reports AP under three \(\Omega\) scopes: full test, top-5\% train-fire area, and top-10\% train-fire area. Values are percentages; cells report mean with small std.}
-\label{tab:app_spread_ap_by_scope}
-\begin{tabular}{lccc}
-\toprule
-Backbone & full \(\Omega\) AP & top-5\% \(\Omega\) AP & top-10\% \(\Omega\) AP \\
-\midrule
-\ourfm & \ms{30.0197}{1.5651} & \ms{40.7452}{2.0542} & \ms{37.4096}{1.8731} \\
-Prithvi-WxC & \ms{4.8319}{0.1731} & \ms{12.6086}{0.4468} & \ms{8.7051}{0.1889} \\
-Aurora & \ms{17.7723}{0.4293} & \ms{30.3106}{0.9404} & \ms{26.4732}{0.6932} \\
-ClimaX & \ms{11.1726}{0.2337} & \ms{25.7871}{1.2896} & \ms{19.9977}{1.2217} \\
-StormCast & \ms{8.1147}{1.1569} & \ms{18.5461}{1.1727} & \ms{14.1286}{1.2956} \\
-DLWP & \ms{9.2142}{2.6587} & \ms{19.3346}{2.3922} & \ms{14.9788}{2.6696} \\
-FCN & \ms{6.6774}{1.3001} & \ms{16.7396}{3.2955} & \ms{11.9308}{2.3881} \\
-FengWu & \ms{11.0046}{2.7092} & \ms{21.1506}{1.2163} & \ms{17.0113}{1.5778} \\
-FuXi & \ms{13.5507}{0.3840} & \ms{22.5434}{0.4100} & \ms{19.1964}{0.3943} \\
-Pangu-Weather & \ms{10.6250}{1.4643} & \ms{19.8294}{1.3044} & \ms{15.8013}{1.1602} \\
-AlphaEarth & \ms{12.2847}{1.3562} & \ms{22.8692}{0.4915} & \ms{18.2992}{1.2110} \\
-\bottomrule
-\end{tabular}
-\end{table*}
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed final-area \(\mathcal{T}\) and \(\Omega\), this table reports median log error and acre-scale errors in addition to the main log-RMSE/log-MAE/Spearman metrics. Cells report mean with small std.}
-\label{tab:app_burned_area_median_acre}
-\begin{tabular}{lccc}
-\toprule
-Backbone & log median AE & acre median AE & acre MAPE \\
-\midrule
-\ourfm & \ms{1.0235}{0.0982} & \ms{4504.0692}{459.0483} & \ms{1.4525}{0.0254} \\
-Prithvi-WxC & \ms{1.2184}{0.2107} & \ms{5375.8770}{788.7906} & \ms{1.9517}{0.2875} \\
-Aurora & \ms{1.4547}{0.0301} & \ms{9904.9483}{457.4260} & \ms{6.8728}{3.0026} \\
-ClimaX & \ms{1.6841}{0.1818} & \ms{18130.4820}{3248.3873} & \ms{8.2373}{2.8540} \\
-StormCast & \ms{1.4522}{0.1519} & \ms{11155.7881}{2020.8656} & \ms{4.6142}{1.1500} \\
-DLWP & \ms{1.0952}{0.1306} & \ms{4406.9315}{303.0944} & \ms{1.7357}{0.3625} \\
-FCN & \ms{1.1688}{0.1139} & \ms{5166.9993}{213.0333} & \ms{2.0800}{0.4004} \\
-FengWu & \ms{1.1589}{0.1772} & \ms{5137.2822}{628.7543} & \ms{2.0944}{0.4545} \\
-FuXi & \ms{1.1855}{0.0612} & \ms{5697.7117}{796.8785} & \ms{2.4411}{0.5567} \\
-Pangu-Weather & \ms{1.1221}{0.1470} & \ms{5092.3621}{483.8243} & \ms{1.9571}{0.3113} \\
-AlphaEarth & \ms{1.7459}{0.6057} & \ms{15110.7573}{7106.3417} & \ms{9.7398}{2.7425} \\
-\bottomrule
-\end{tabular}
-\end{table*}
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed retrieval \(\mathcal{T}\) and \(\Omega\), this table reports nDCG@5, best log gap, and rank \(\rho\) in addition to the main nDCG@10/log-error metrics. Cells report mean with small std.}
-\label{tab:app_analog_rank_depth}
-\begin{tabular}{lccc}
-\toprule
-Backbone & nDCG@5 & best log gap & rank $\rho$ \\
-\midrule
-\ourfm & \ms{0.5175}{0.0445} & \ms{0.1868}{0.0285} & \ms{0.6019}{0.1460} \\
-Prithvi-WxC & \ms{0.3591}{0.0107} & \ms{0.2151}{0.0594} & \ms{0.1514}{0.1489} \\
-Aurora & \ms{0.4423}{0.0210} & \ms{0.1551}{0.0437} & \ms{0.2162}{0.1856} \\
-ClimaX & \ms{0.4151}{0.0293} & \ms{0.2129}{0.0653} & \ms{0.1587}{0.2831} \\
-StormCast & \ms{0.3960}{0.0240} & \ms{0.1714}{0.0310} & \ms{0.1258}{0.1625} \\
-DLWP & \ms{0.3795}{0.0274} & \ms{0.1944}{0.0807} & \ms{-0.3865}{0.2802} \\
-FCN & \ms{0.4250}{0.0112} & \ms{0.1856}{0.0846} & \ms{-0.1357}{0.2571} \\
-FengWu & \ms{0.4228}{0.0310} & \ms{0.1870}{0.0858} & \ms{-0.1926}{0.2194} \\
-FuXi & \ms{0.4544}{0.0356} & \ms{0.2171}{0.0806} & \ms{-0.1367}{0.2885} \\
-Pangu-Weather & \ms{0.3988}{0.0506} & \ms{0.1901}{0.0838} & \ms{-0.1970}{0.2216} \\
-AlphaEarth & \ms{0.5276}{0.0531} & \ms{0.1782}{0.0454} & \ms{0.4639}{0.2802} \\
-\bottomrule
-\end{tabular}
-\end{table*}
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed smoke \(\mathcal{T}\) and station \(\Omega\), this table reports RMSE, MAE, and 90th-percentile absolute error on test rows with observed PM$_{2.5}\ge35$; std uses a row bootstrap over those rows. Cells report mean with small std.}
-\label{tab:app_smoke_high_event}
-\begin{tabular}{lccc}
-\toprule
-Backbone & high-smoke RMSE & high-smoke MAE & high-smoke 90th AE \\
-\midrule
-\ourfm & \ms{47.4870}{0.6346} & \ms{34.3954}{0.7654} & \ms{65.6213}{3.8778} \\
-Prithvi-WxC & \ms{57.2224}{1.7268} & \ms{47.3871}{0.3153} & \ms{74.9666}{3.2381} \\
-Aurora & \ms{57.2752}{1.7248} & \ms{47.4368}{0.3149} & \ms{75.0755}{3.1074} \\
-ClimaX & \ms{57.2828}{1.7239} & \ms{47.4407}{0.3140} & \ms{75.1012}{3.0777} \\
-StormCast & \ms{56.6512}{1.7517} & \ms{46.7914}{0.3281} & \ms{74.0794}{3.4707} \\
-DLWP & \ms{57.0075}{1.7359} & \ms{47.1971}{0.3198} & \ms{74.4936}{3.3826} \\
-FCN & \ms{57.0582}{1.7339} & \ms{47.2401}{0.3187} & \ms{74.6431}{3.1982} \\
-FengWu & \ms{57.0158}{1.7357} & \ms{47.1957}{0.3194} & \ms{74.5652}{3.2871} \\
-FuXi & \ms{56.9622}{1.7371} & \ms{47.1508}{0.3201} & \ms{74.3278}{3.4435} \\
-Pangu-Weather & \ms{57.1282}{1.7307} & \ms{47.3050}{0.3170} & \ms{74.6830}{3.2375} \\
-AlphaEarth & \ms{48.0665}{0.7904} & \ms{35.6088}{0.7341} & \ms{66.7613}{3.9235} \\
-\bottomrule
-\end{tabular}
-\end{table*}
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed heat \(\mathcal{T}\) and heat-region \(\Omega\), this table reports precision and recall for the exceedance label used by the main \(F_1\). Cells report mean with small std.}
-\label{tab:app_heat_event_pr}
-\begin{tabular}{lcc}
-\toprule
-Backbone & precision & recall \\
-\midrule
-\ourfm & \ms{0.9767}{0.0117} & \ms{0.9330}{0.0299} \\
-Prithvi-WxC & \ms{0.8260}{0.0030} & \ms{0.9173}{0.0033} \\
-Aurora & \ms{0.5920}{0.0347} & \ms{0.0517}{0.0020} \\
-ClimaX & \ms{0.7397}{0.0099} & \ms{0.7994}{0.0051} \\
-StormCast & \ms{0.8840}{0.0237} & \ms{0.9320}{0.0165} \\
-DLWP & \ms{0.9429}{0.0085} & \ms{0.8899}{0.0167} \\
-FCN & \ms{0.9408}{0.0097} & \ms{0.9111}{0.0127} \\
-FengWu & \ms{0.3808}{0.2719} & \ms{0.0266}{0.0267} \\
-FuXi & \ms{0.3262}{0.1262} & \ms{0.1810}{0.0481} \\
-Pangu-Weather & \ms{0.1159}{0.0743} & \ms{0.0112}{0.0032} \\
-AlphaEarth & \ms{0.9824}{0.0040} & \ms{0.9278}{0.0178} \\
-\bottomrule
-\end{tabular}
-\end{table*}
-\clearpage
-% ============================================================
-%  C  COMPARATOR ELIGIBILITY NOTES
-% ============================================================
-\section{Comparator Eligibility Notes}
-\label{sec:comparator_audit}
-All numeric comparator rows in Tables~\ref{tab:primary_results} and~\ref{tab:supporting_results}
-are included only after the task form, metric, matching rule, scope, and head family are fixed.
-The appendix does not repeat those full matrices.
-The key eligibility rule is simple: reported rows satisfy the same contract as the row block in which they appear, while excluded rows are excluded because their representation or output form does not satisfy that contract.
-\noindent\textbf{Reading rule.}
-Exact-only, tolerated, union, ranking, retrieval, and regression scores answer different questions.
-The fixed-contract reading is therefore to compare entries only within one row block and not to average across task forms.
-\clearpage
-% ============================================================
-%  D  SEEDED AUDITS
-% ============================================================
-\section{Seeded Audits}
-\label{sec:app_seeded_audits}
-\subsection{Seed Robustness Summary}
-\label{sec:app_seed_robustness}
-Table~\ref{tab:app_seed_robustness} summarizes stochastic checks used to support the reported mean-with-std convention.
-It is not a replacement for the main fixed-contract result tables.
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{5pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Seed summaries for stochastic checks. Values report mean with small std over completed seeds.}
-\label{tab:app_seed_robustness}
-\begin{adjustbox}{max width=\textwidth}
-\begin{tabular}{p{0.28\textwidth}cllp{0.18\textwidth}}
-\toprule
-\textbf{\(\mathcal{T}\) check} & \textbf{Seeds} & \textbf{Primary value} & \textbf{Other value(s)} & \textbf{Reading} \\
-\midrule
-Final burned area &
-5 & log-RMSE \ms{1.1657}{0.0126} &
-log-MAE \ms{1.0423}{0.0081}; Spear.\ \ms{0.6298}{0.0338} &
-stable across seeds \\
-Smoke PM\(_{2.5}\) &
-5 & RMSE \ms{4.4646}{0.0060} &
-MAE \ms{2.4108}{0.0016}; \(r\) \ms{0.6368}{0.0013} &
-stable at table precision \\
-Extreme heat &
-5 & RMSE-C \ms{0.2179}{0.0043} &
-MAE-C \ms{0.1787}{0.0018}; exceed.\ \(F_1\) \ms{0.9541}{0.0164} &
-stable across seeds \\
-Fire spread &
-5 & exact \(F_1\) \ms{37.6700}{0.9800} &
-spatial \(F_1\) \ms{80.9700}{2.0200}; AP \ms{30.0900}{1.2500} &
-stable across seeds \\
-Aurora paired-head check &
-5 & fire-prone score diff.\ \ms{6.3500}{13.2800} &
-PR-AUC and union choices differ in 2/5 seeds &
-variable across seeds \\
-\bottomrule
-\end{tabular}
-\end{adjustbox}
-\end{table}
-\clearpage
-% ============================================================
-%  E  LIGHTWEIGHT HEAD AND ADAPTATION DETAILS
-% ============================================================
-\section{Lightweight Head and Adaptation Details}
-\label{sec:app_heads}
-All frozen-transfer comparisons use the same five lightweight head architectures applied
-on top of the frozen backbone representations.
-Table~\ref{tab:app_head_architectures} summarises each head family, its architecture,
-approximate parameter count, and the adaptation procedure used.
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{5pt}
-\renewcommand{\arraystretch}{1.3}
-\caption{Lightweight head architectures used in the fixed-contract transfer comparisons.
-All heads are trained from random initialisation on the frozen backbone features.
-Parameter counts are approximate and depend on the feature dimensionality of each backbone.}
-\label{tab:app_head_architectures}
-\begin{tabular}{p{0.15\textwidth}p{0.30\textwidth}p{0.12\textwidth}p{0.33\textwidth}}
-\toprule
-\textbf{$\mathcal{A}$ head} & \textbf{Architecture} & \textbf{Approx.\ params} & \textbf{Notes} \\
-\midrule
-Constant prior &
-  Outputs a fixed bias vector, ignoring input features. &
-  Output dimension only &
-  Provides a degenerate baseline; selected when backbone features carry no useful signal. \\
-Linear probe &
-  Single linear layer mapping backbone features to output. No nonlinearity. &
-  $d\times c + c$ &
-  Standard frozen-representation baseline. \\
-Pixel MLP &
-  Two-layer MLP applied independently per spatial unit. &
-  $d\times h + h\times c$ &
-  Captures per-pixel nonlinearity; ignores spatial context. \\
-Shallow adapter &
-  Two-layer MLP with a spatial context window; uses $3\times3$ convolution before the linear output. &
-  $9dh + hc$ &
-  Balances local spatial context with parameter efficiency. \\
-Wide adapter &
-  Shallow adapter with wider hidden dimension. &
-  $9dH + Hc$ &
-  Higher capacity variant; can overfit on small fire-event sets. \\
-\bottomrule
-\end{tabular}
-\end{table}
-\noindent\textbf{Training protocol.}
-Each occupancy head-control run uses seeds $\{1,7,42,99,123\}$, the five heads listed above, and the fixed variants identity, erode-r1, and close-r1.
-The spread U-Net reference is trained for 4 epochs.
-The threshold $\tau$ is selected on the validation split by maximising union-$F_1$ (for occupancy) or spatial $F_1$ (for spread) and held fixed at test time.
-Morphology parameters (spatial tolerance $k$, temporal tolerance $\Delta t$) are fixed as part of the evaluation contract and are not tuned after validation.
-\noindent\textbf{Head selection procedure.}
-For each (feature source, scope, seed) tuple, all five heads are trained independently.
-The PR-AUC-based selector picks $h_R = \arg\max_{h \in \mathcal{H}} R(h)$ on the validation set;
-the decision-based selector picks $h_D = \arg\max_{h \in \mathcal{H}} D(h)$ on the same set.
-The selection regret $\delta = D(h_D) - D(h_R) \ge 0$ is computed on the held-out test set.
-\clearpage
-% ============================================================
-%  F  LIMITATIONS
-% ============================================================
-\section{Limitations}
-\label{sec:limitations}
-The conclusions apply to the task forms, scopes, evaluation rules, and comparator eligibility decisions used in this study.
-The evaluation covers selected wildfire decision tasks and supporting retrieval and regression task forms.
-Comparator eligibility is fixed before metric values are interpreted.
-This eligibility rule keeps each comparison within one task-form contract.
-It also leaves some model and task pairs outside the evaluated comparison set by design.
-The transfer comparison uses frozen backbones with lightweight heads.
-The results therefore describe frozen-backbone transfer under the allowed head families in each contract.
-Full fine-tuning, alternative adaptation procedures, and broader head families are outside the evaluated scope.
-The task-specific reference baselines serve as empirical anchors for same-contract comparison.
-\ourfm is a regional wildfire reference for the reported California fixed-contract experiments.
-The supporting retrieval and regression checks bound the primary spatial decision claim.
-They provide task-form evidence rather than a single score across all wildfire-related prediction tasks.
-The analysis focuses on the reported metric families, matching rules, and fixed comparison choices.
-Operational response rules, intervention costs, and deployment policies are part of wildfire early-warning use contexts~\cite{goldammer1999early,pickell2017early,farahmand2020fdeo}.
-They are outside the scope of this evaluation study and are not inferred from the reported scores.
-\clearpage
-% ============================================================
-%  G  REPRODUCIBILITY AND EVALUATION ARTIFACTS
-% ============================================================
-\section{Reproducibility and Evaluation Artifacts}
-\label{sec:repro_compute_impact}
-\subsection{External Assets and Terms of Use}
-\label{sec:external_assets_terms}
-We use external datasets and model assets only for research evaluation.
-Access to each asset follows the original provider's portal, license, or terms of use; this submission does not imply that all assets are openly redistributable.
-We do not redistribute raw external datasets, provider-hosted embeddings, or third-party model weights.
-Table~\ref{tab:external_assets_licenses} records the source and terms-of-use status used to interpret reproducibility.
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{4pt}
-\renewcommand{\arraystretch}{1.18}
-\caption{External assets used by the study and their source or terms-of-use status.}
-\label{tab:external_assets_licenses}
-\begin{tabular}{p{0.25\textwidth}p{0.34\textwidth}p{0.34\textwidth}}
-\toprule
-\textbf{Asset family} & \textbf{Use in this study} & \textbf{Source and terms-of-use note} \\
-\midrule
-NOAA HRRR fields~\cite{noaa_hrrr_ncei,noaa_hrrr_emc}
-& Dynamic weather inputs for \ourfm and transfer tasks.
-& NOAA provider terms and citation requirements apply. \\
-NASA FIRMS~\cite{nasa_firms}
-& Active-fire occupancy supervision.
-& NASA Earthdata/FIRMS access terms and citation requirements apply. \\
-LANDFIRE and WRC layers~\cite{landfire_fbfm40,landfire_canopy_cover,usfs_wrc_housing_density}
-& Static fuel, canopy, and exposure context.
-& Original geospatial-product provider terms and citations apply. \\
-LandScan~\cite{ornl_landscan_2024}
-& Static population context.
-& ORNL/LandScan source-specific access terms apply; raw data are not redistributed. \\
-WFIGS and MTBS~\cite{nifc_wfigs_perimeters,mtbs_usgs_2025}
-& Event-level resources for burned-area and analog tasks.
-& Original incident/perimeter-product provider terms and citations apply. \\
-External Earth-FM baselines~\cite{schmude2024prithviwxc,bodnar2025aurora,nguyen2023climax,pathak2024stormcast,weyn2020dlwp,pathak2022fourcastnet,chen2023fengwu,chen2023fuxi,bi2023panguweather,brown2025alphaearth}
-& Frozen comparator representations or task-model baselines.
-& Original model-provider licenses and access terms apply; third-party weights are not redistributed. \\
-\bottomrule
-\end{tabular}
-\end{table}
-This note supports the NeurIPS checklist and identifies the files that support the reported claims.
-This file statement does not imply full raw-data release.
-The main claims can be checked from the manuscript contracts, metric
-definitions, and per-head result files, even if full raw-data release is
-delayed or limited. Sections~3 and~4 specify the contract components used by
-the main claims: task definition, split logic, label space, tolerance
-parameters, scope definitions, threshold or operating-point rules, and
-lightweight-head set.
-The supplementary source includes the check scripts, per-head and per-seed
-CSV result files, and \LaTeX{} result tables for the expanded check and matching-rule support.
-These files expose exact \(F_1\),
-tolerated \(F_1\), union-\(F_1\), PR-AUC, per-head selection,
-top-1 agreement, and selection-regret arithmetic. The manuscript also includes
-full figure and table reproduction values in result tables and appendix tables.
-These files provide a runnable check of the
-selection-regret arithmetic and the table-construction logic from fixed
-per-head rows. The seeded occupancy check uses seeds
-$\{1,7,42,99,123\}$, and the spread task-specific U-Net check uses repeated seeds; reported error bars are standard deviations over the completed
-seeded runs. Full raw wildfire inputs and large feature arrays are not
-released at submission because redistribution and storage constraints require a
-separate review.
-For stochastic results, the paper reports mean with standard deviation over repeated seeds.
-For fixed-output or fixed-feature controls, the table uses one fixed output or feature set; the changed item is the matching rule or selection metric.
-The reported experiments use two resource classes on a shared Slurm-managed
-cluster. Tabular retrieval/regression checks and same-feature head controls run
-on CPU workers with 4 to 8 cores, 24 to 64~GB host memory, and 2 to 4~hour wall-clock
-limits. Spread U-Net training and threshold calibration run on single-GPU jobs
-with one B200 GPU, 8 CPU cores, 96~GB host memory, and a 4~hour wall-clock
-limit. The seed/check waves reported in the appendix correspond to roughly
-78 CPU job-hours and 12 GPU job-hours of scheduled wall-clock budget;
-exploratory runs are not included in the reported compute accounting.
-The raw-data limitation is separate from the selection-regret files.
-The supplementary source is sufficient to inspect the selection-regret arithmetic and reproduce the reported tables.
-Full end-to-end recomputation from raw wildfire inputs is not included at submission because redistribution review is still required.
-The broader impact is evaluation-facing rather than operational.
-Better reading of wildfire transfer evidence can reduce overconfident benchmark claims, while misread transfer results could still encourage inappropriate reliance on models with low decision scores.
-For that reason, the paper keeps its claims wildfire-centered, decision-task
-specific, and explicitly separate from any predictive deployment
-recommendation.

paper_outputs/figures/fig_selection_regret_rq2.tikz DELETED Viewed

@@ -1,120 +0,0 @@
-% Auto-generated by scripts/build_selection_regret_rq2_figure.py.
-\begin{tikzpicture}[x=1cm,y=1cm]
-\footnotesize
-\draw[black!12, line width=0.35pt] (2.450,-0.350) -- (2.450,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (2.450,-0.410) {-20};
-\draw[black!12, line width=0.35pt] (3.243,-0.350) -- (3.243,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (3.243,-0.410) {-10};
-\draw[wfgray, line width=0.55pt] (4.036,-0.350) -- (4.036,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (4.036,-0.410) {0};
-\draw[black!12, line width=0.35pt] (4.829,-0.350) -- (4.829,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (4.829,-0.410) {10};
-\draw[black!12, line width=0.35pt] (5.621,-0.350) -- (5.621,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (5.621,-0.410) {20};
-\draw[black!12, line width=0.35pt] (6.414,-0.350) -- (6.414,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (6.414,-0.410) {30};
-\draw[black!12, line width=0.35pt] (7.207,-0.350) -- (7.207,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (7.207,-0.410) {40};
-\draw[black!12, line width=0.35pt] (8.000,-0.350) -- (8.000,4.530);
-\node[anchor=north, font=\scriptsize, text=black!70] at (8.000,-0.410) {50};
-\draw[black!45, line width=0.4pt] (2.450,-0.350) -- (8.000,-0.350);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,4.350) {\textcolor{wfblue}{\textbf{FireWx-FM ref.}}};
-\draw[wfslate, line width=0.72pt] (4.030,4.220) -- (5.212,4.220);
-\draw[wfslate, line width=0.72pt] (4.030,4.185) -- (4.030,4.255);
-\draw[wfslate, line width=0.72pt] (5.212,4.185) -- (5.212,4.255);
-\filldraw[wfslate] (4.621,4.220) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.051,4.480) -- (4.487,4.480);
-\draw[wforange, line width=0.72pt] (4.051,4.445) -- (4.051,4.515);
-\draw[wforange, line width=0.72pt] (4.487,4.445) -- (4.487,4.515);
-\filldraw[wforange] (4.224,4.435) rectangle (4.314,4.525);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,3.940) {Prithvi-WxC};
-\draw[wfslate, line width=0.72pt] (4.036,3.810) -- (4.036,3.810);
-\draw[wfslate, line width=0.72pt] (4.036,3.775) -- (4.036,3.845);
-\draw[wfslate, line width=0.72pt] (4.036,3.775) -- (4.036,3.845);
-\filldraw[wfslate] (4.036,3.810) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.036,4.070) -- (4.036,4.070);
-\draw[wforange, line width=0.72pt] (4.036,4.035) -- (4.036,4.105);
-\draw[wforange, line width=0.72pt] (4.036,4.035) -- (4.036,4.105);
-\filldraw[wforange] (3.991,4.025) rectangle (4.081,4.115);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,3.530) {Aurora};
-\draw[wfslate, line width=0.72pt] (3.580,3.400) -- (5.276,3.400);
-\draw[wfslate, line width=0.72pt] (3.580,3.365) -- (3.580,3.435);
-\draw[wfslate, line width=0.72pt] (5.276,3.365) -- (5.276,3.435);
-\filldraw[wfslate] (4.428,3.400) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (2.627,3.660) -- (7.723,3.660);
-\draw[wforange, line width=0.72pt] (2.627,3.625) -- (2.627,3.695);
-\draw[wforange, line width=0.72pt] (7.723,3.625) -- (7.723,3.695);
-\filldraw[wforange] (5.130,3.615) rectangle (5.220,3.705);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,3.120) {ClimaX};
-\draw[wfslate, line width=0.72pt] (4.032,2.990) -- (4.060,2.990);
-\draw[wfslate, line width=0.72pt] (4.032,2.955) -- (4.032,3.025);
-\draw[wfslate, line width=0.72pt] (4.060,2.955) -- (4.060,3.025);
-\filldraw[wfslate] (4.046,2.990) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.036,3.250) -- (4.036,3.250);
-\draw[wforange, line width=0.72pt] (4.036,3.215) -- (4.036,3.285);
-\draw[wforange, line width=0.72pt] (4.036,3.215) -- (4.036,3.285);
-\filldraw[wforange] (3.991,3.205) rectangle (4.081,3.295);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,2.710) {StormCast};
-\draw[wfslate, line width=0.72pt] (4.036,2.580) -- (4.036,2.580);
-\draw[wfslate, line width=0.72pt] (4.036,2.545) -- (4.036,2.615);
-\draw[wfslate, line width=0.72pt] (4.036,2.545) -- (4.036,2.615);
-\filldraw[wfslate] (4.036,2.580) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.036,2.840) -- (4.036,2.840);
-\draw[wforange, line width=0.72pt] (4.036,2.805) -- (4.036,2.875);
-\draw[wforange, line width=0.72pt] (4.036,2.805) -- (4.036,2.875);
-\filldraw[wforange] (3.991,2.795) rectangle (4.081,2.885);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,2.300) {DLWP};
-\draw[wfslate, line width=0.72pt] (4.036,2.170) -- (4.036,2.170);
-\draw[wfslate, line width=0.72pt] (4.036,2.135) -- (4.036,2.205);
-\draw[wfslate, line width=0.72pt] (4.036,2.135) -- (4.036,2.205);
-\filldraw[wfslate] (4.036,2.170) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.044,2.430) -- (4.735,2.430);
-\draw[wforange, line width=0.72pt] (4.044,2.395) -- (4.044,2.465);
-\draw[wforange, line width=0.72pt] (4.735,2.395) -- (4.735,2.465);
-\filldraw[wforange] (4.345,2.385) rectangle (4.435,2.475);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,1.890) {FCN};
-\draw[wfslate, line width=0.72pt] (4.036,1.760) -- (4.036,1.760);
-\draw[wfslate, line width=0.72pt] (4.036,1.725) -- (4.036,1.795);
-\draw[wfslate, line width=0.72pt] (4.036,1.725) -- (4.036,1.795);
-\filldraw[wfslate] (4.036,1.760) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (3.971,2.020) -- (4.286,2.020);
-\draw[wforange, line width=0.72pt] (3.971,1.985) -- (3.971,2.055);
-\draw[wforange, line width=0.72pt] (4.286,1.985) -- (4.286,2.055);
-\filldraw[wforange] (4.083,1.975) rectangle (4.173,2.065);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,1.480) {FengWu};
-\draw[wfslate, line width=0.72pt] (4.036,1.350) -- (4.036,1.350);
-\draw[wfslate, line width=0.72pt] (4.036,1.315) -- (4.036,1.385);
-\draw[wfslate, line width=0.72pt] (4.036,1.315) -- (4.036,1.385);
-\filldraw[wfslate] (4.036,1.350) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.028,1.610) -- (4.127,1.610);
-\draw[wforange, line width=0.72pt] (4.028,1.575) -- (4.028,1.645);
-\draw[wforange, line width=0.72pt] (4.127,1.575) -- (4.127,1.645);
-\filldraw[wforange] (4.032,1.565) rectangle (4.122,1.655);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,1.070) {FuXi};
-\draw[wfslate, line width=0.72pt] (4.036,0.940) -- (4.036,0.940);
-\draw[wfslate, line width=0.72pt] (4.036,0.905) -- (4.036,0.975);
-\draw[wfslate, line width=0.72pt] (4.036,0.905) -- (4.036,0.975);
-\filldraw[wfslate] (4.036,0.940) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.029,1.200) -- (4.087,1.200);
-\draw[wforange, line width=0.72pt] (4.029,1.165) -- (4.029,1.235);
-\draw[wforange, line width=0.72pt] (4.087,1.165) -- (4.087,1.235);
-\filldraw[wforange] (4.013,1.155) rectangle (4.103,1.245);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,0.660) {Pangu-Weather};
-\draw[wfslate, line width=0.72pt] (4.036,0.530) -- (4.036,0.530);
-\draw[wfslate, line width=0.72pt] (4.036,0.495) -- (4.036,0.565);
-\draw[wfslate, line width=0.72pt] (4.036,0.495) -- (4.036,0.565);
-\filldraw[wfslate] (4.036,0.530) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (4.025,0.790) -- (4.076,0.790);
-\draw[wforange, line width=0.72pt] (4.025,0.755) -- (4.025,0.825);
-\draw[wforange, line width=0.72pt] (4.076,0.755) -- (4.076,0.825);
-\filldraw[wforange] (4.006,0.745) rectangle (4.096,0.835);
-\node[anchor=east, font=\scriptsize, text=black!82] at (2.320,0.250) {AlphaEarth};
-\draw[wfslate, line width=0.72pt] (4.700,0.120) -- (6.103,0.120);
-\draw[wfslate, line width=0.72pt] (4.700,0.085) -- (4.700,0.155);
-\draw[wfslate, line width=0.72pt] (6.103,0.085) -- (6.103,0.155);
-\filldraw[wfslate] (5.401,0.120) circle[radius=0.045];
-\draw[wforange, line width=0.72pt] (3.872,0.380) -- (4.815,0.380);
-\draw[wforange, line width=0.72pt] (3.872,0.345) -- (3.872,0.415);
-\draw[wforange, line width=0.72pt] (4.815,0.345) -- (4.815,0.415);
-\filldraw[wforange] (4.298,0.335) rectangle (4.388,0.425);
-\end{tikzpicture}

paper_outputs/tables/tab_app_analog_rank_depth.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed retrieval \(\mathcal{T}\) and \(\Omega\), this table reports nDCG@5, best log gap, and rank \(\rho\) in addition to the main nDCG@10/log-error metrics. Cells report mean with small std.}
-\label{tab:app_analog_rank_depth}
-\begin{tabular}{lccc}
-\toprule
-Backbone & nDCG@5 & best log gap & rank $\rho$ \\
-\midrule
-FireWx-FM ref. & \ms{0.5175}{0.0445} & \ms{0.1868}{0.0285} & \ms{0.6019}{0.1460} \\
-Prithvi-WxC & \ms{0.3591}{0.0107} & \ms{0.2151}{0.0594} & \ms{0.1514}{0.1489} \\
-Aurora & \ms{0.4423}{0.0210} & \ms{0.1551}{0.0437} & \ms{0.2162}{0.1856} \\
-ClimaX & \ms{0.4151}{0.0293} & \ms{0.2129}{0.0653} & \ms{0.1587}{0.2831} \\
-StormCast & \ms{0.3960}{0.0240} & \ms{0.1714}{0.0310} & \ms{0.1258}{0.1625} \\
-DLWP & \ms{0.3795}{0.0274} & \ms{0.1944}{0.0807} & \ms{-0.3865}{0.2802} \\
-FCN & \ms{0.4250}{0.0112} & \ms{0.1856}{0.0846} & \ms{-0.1357}{0.2571} \\
-FengWu & \ms{0.4228}{0.0310} & \ms{0.1870}{0.0858} & \ms{-0.1926}{0.2194} \\
-FuXi & \ms{0.4544}{0.0356} & \ms{0.2171}{0.0806} & \ms{-0.1367}{0.2885} \\
-Pangu-Weather & \ms{0.3988}{0.0506} & \ms{0.1901}{0.0838} & \ms{-0.1970}{0.2216} \\
-AlphaEarth & \ms{0.5276}{0.0531} & \ms{0.1782}{0.0454} & \ms{0.4639}{0.2802} \\
-\bottomrule
-\end{tabular}
-\end{table*}

paper_outputs/tables/tab_app_burned_area_median_acre.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed final-area \(\mathcal{T}\) and \(\Omega\), this table reports median log error and acre-scale errors in addition to the main log-RMSE/log-MAE/Spearman metrics. Cells report mean with small std.}
-\label{tab:app_burned_area_median_acre}
-\begin{tabular}{lccc}
-\toprule
-Backbone & log median AE & acre median AE & acre MAPE \\
-\midrule
-FireWx-FM ref. & \ms{1.0235}{0.0982} & \ms{4504.0692}{459.0483} & \ms{1.4525}{0.0254} \\
-Prithvi-WxC & \ms{1.2184}{0.2107} & \ms{5375.8770}{788.7906} & \ms{1.9517}{0.2875} \\
-Aurora & \ms{1.4547}{0.0301} & \ms{9904.9483}{457.4260} & \ms{6.8728}{3.0026} \\
-ClimaX & \ms{1.6841}{0.1818} & \ms{18130.4820}{3248.3873} & \ms{8.2373}{2.8540} \\
-StormCast & \ms{1.4522}{0.1519} & \ms{11155.7881}{2020.8656} & \ms{4.6142}{1.1500} \\
-DLWP & \ms{1.0952}{0.1306} & \ms{4406.9315}{303.0944} & \ms{1.7357}{0.3625} \\
-FCN & \ms{1.1688}{0.1139} & \ms{5166.9993}{213.0333} & \ms{2.0800}{0.4004} \\
-FengWu & \ms{1.1589}{0.1772} & \ms{5137.2822}{628.7543} & \ms{2.0944}{0.4545} \\
-FuXi & \ms{1.1855}{0.0612} & \ms{5697.7117}{796.8785} & \ms{2.4411}{0.5567} \\
-Pangu-Weather & \ms{1.1221}{0.1470} & \ms{5092.3621}{483.8243} & \ms{1.9571}{0.3113} \\
-AlphaEarth & \ms{1.7459}{0.6057} & \ms{15110.7573}{7106.3417} & \ms{9.7398}{2.7425} \\
-\bottomrule
-\end{tabular}
-\end{table*}

paper_outputs/tables/tab_app_contract_params_full.tex DELETED Viewed

@@ -1,22 +0,0 @@
-\begin{table}[h]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3.5pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Fixed scoring values used by each task-form contract.}
-\label{tab:app_contract_params_full}
-\begin{adjustbox}{max width=\textwidth}
-\begin{tabular}{llll}
-\toprule
-\textbf{\(\mathcal{T}\)} & \textbf{Scoring} & \textbf{Validation} & \textbf{\(\Omega\)} \\
-\midrule
-Occupancy & \(k=8,\Delta t=3\); exact/tol./union \(F_1\) & val. strict \(F_1\) & global; top-5/10/20\% fire-prone \\
-Fire spread & \(k=4,\Delta t=0\); exact/spatial \(F_1\), AP & val. spatial \(F_1\) & spread-region cells \\
-Final burned area & log-RMSE, log-MAE, Spearman \(\rho\) & val. log-RMSE & test events \\
-Analog retrieval & nDCG@10; retrieved-event log error & val. nDCG@10 & test events \\
-Smoke PM\(_{2.5}\) & RMSE, MAE, Pearson \(r\); exceedance 35 & val. RMSE & test stations \\
-Extreme heat & RMSE-C, MAE-C, exceedance \(F_1\) & val. threshold 27/30/33\(^{\circ}\)C & heat-region stations \\
-\bottomrule
-\end{tabular}
-\end{adjustbox}
-\end{table}

paper_outputs/tables/tab_app_head_architectures.tex DELETED Viewed

@@ -1,36 +0,0 @@
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{5pt}
-\renewcommand{\arraystretch}{1.3}
-\caption{Lightweight head architectures used in the fixed-contract transfer comparisons.
-All heads are trained from random initialisation on the frozen backbone features.
-Parameter counts are approximate and depend on the feature dimensionality of each backbone.}
-\label{tab:app_head_architectures}
-\begin{tabular}{p{0.15\textwidth}p{0.30\textwidth}p{0.12\textwidth}p{0.33\textwidth}}
-\toprule
-\textbf{$\mathcal{A}$ head} & \textbf{Architecture} & \textbf{Approx.\ params} & \textbf{Notes} \\
-\midrule
-Constant prior &
-  Outputs a fixed bias vector, ignoring input features. &
-  Output dimension only &
-  Provides a degenerate baseline; selected when backbone features carry no useful signal. \\
-Linear probe &
-  Single linear layer mapping backbone features to output. No nonlinearity. &
-  $d\times c + c$ &
-  Standard frozen-representation baseline. \\
-Pixel MLP &
-  Two-layer MLP applied independently per spatial unit. &
-  $d\times h + h\times c$ &
-  Captures per-pixel nonlinearity; ignores spatial context. \\
-Shallow adapter &
-  Two-layer MLP with a spatial context window; uses $3\times3$ convolution before the linear output. &
-  $9dh + hc$ &
-  Balances local spatial context with parameter efficiency. \\
-Wide adapter &
-  Shallow adapter with wider hidden dimension. &
-  $9dH + Hc$ &
-  Higher capacity variant; can overfit on small fire-event sets. \\
-\bottomrule
-\end{tabular}
-\end{table}

paper_outputs/tables/tab_app_heat_event_pr.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed heat \(\mathcal{T}\) and heat-region \(\Omega\), this table reports precision and recall for the exceedance label used by the main \(F_1\). Cells report mean with small std.}
-\label{tab:app_heat_event_pr}
-\begin{tabular}{lcc}
-\toprule
-Backbone & precision & recall \\
-\midrule
-FireWx-FM ref. & \ms{0.9767}{0.0117} & \ms{0.9330}{0.0299} \\
-Prithvi-WxC & \ms{0.8260}{0.0030} & \ms{0.9173}{0.0033} \\
-Aurora & \ms{0.5920}{0.0347} & \ms{0.0517}{0.0020} \\
-ClimaX & \ms{0.7397}{0.0099} & \ms{0.7994}{0.0051} \\
-StormCast & \ms{0.8840}{0.0237} & \ms{0.9320}{0.0165} \\
-DLWP & \ms{0.9429}{0.0085} & \ms{0.8899}{0.0167} \\
-FCN & \ms{0.9408}{0.0097} & \ms{0.9111}{0.0127} \\
-FengWu & \ms{0.3808}{0.2719} & \ms{0.0266}{0.0267} \\
-FuXi & \ms{0.3262}{0.1262} & \ms{0.1810}{0.0481} \\
-Pangu-Weather & \ms{0.1159}{0.0743} & \ms{0.0112}{0.0032} \\
-AlphaEarth & \ms{0.9824}{0.0040} & \ms{0.9278}{0.0178} \\
-\bottomrule
-\end{tabular}
-\end{table*}

paper_outputs/tables/tab_app_matching_rule_params.tex DELETED Viewed

@@ -1,17 +0,0 @@
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{10pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Matching-rule values used in the evaluation contracts.}
-\label{tab:app_matching_rule_params}
-\begin{tabular}{lll}
-\toprule
-\textbf{Parameter} & \textbf{Occupancy} & \textbf{Fire spread} \\
-\midrule
-\(k\) & 8 cells & 4 cells \\
-\(\Delta t\) & 3 for union; 0 spatial-only & 0 \\
-\(\tau\) & val. strict \(F_1\) & val. spatial \(F_1\) \\
-\bottomrule
-\end{tabular}
-\end{table}

paper_outputs/tables/tab_app_occupancy_ppr_scope.tex DELETED Viewed

@@ -1,27 +0,0 @@
-\begin{table*}[t]
-\centering
-\small
-\setlength{\tabcolsep}{4pt}
-\renewcommand{\arraystretch}{1.18}
-\caption{For fixed occupancy \(\mathcal{T}\), this table reports predicted-positive rate.
-Values are percentages under the same validation-selected strict threshold.
-Scopes \(\Omega\) are fixed before test scoring; cells report five-seed mean with std in small type.}
-\label{tab:app_occupancy_ppr_scope}
-\begin{tabular}{lcccc}
-\toprule
-\textbf{Backbone} & \textbf{\(\Omega=\)global} & \textbf{\(\Omega=\)top 5\%} & \textbf{\(\Omega=\)top 10\%} & \textbf{\(\Omega=\)top 20\%} \\
-\midrule
-FireWx-FM ref. & \ms{1.6808}{0.3684} & \ms{3.0619}{1.0925} & \ms{1.5310}{0.5463} & \ms{0.7655}{0.2732} \\
-Prithvi-WxC & \ms{61.9711}{30.9101} & \ms{57.4117}{47.8987} & \ms{58.4565}{51.0897} & \ms{58.9788}{52.6991} \\
-Aurora & \ms{55.5849}{19.7524} & \ms{57.2238}{35.3400} & \ms{68.7942}{37.6958} & \ms{67.2891}{38.3991} \\
-ClimaX & \ms{5.6763}{3.9261} & \ms{24.0091}{9.2816} & \ms{11.8450}{4.5067} & \ms{5.7442}{4.1341} \\
-StormCast & \ms{60.6507}{17.4895} & \ms{57.6017}{35.2921} & \ms{68.0766}{37.3899} & \ms{67.8397}{39.2410} \\
-DLWP & \ms{4.3221}{1.5619} & \ms{9.4001}{5.0807} & \ms{4.9700}{3.6849} & \ms{1.9198}{1.4678} \\
-FCN & \ms{1.5202}{1.3446} & \ms{4.7856}{2.9409} & \ms{2.7257}{1.6353} & \ms{0.8368}{0.2358} \\
-FengWu & \ms{0.4277}{0.4830} & \ms{0.6004}{0.3041} & \ms{0.2609}{0.1935} & \ms{0.1501}{0.1206} \\
-FuXi & \ms{0.4505}{0.2773} & \ms{2.9315}{2.6392} & \ms{0.5197}{0.6074} & \ms{0.3621}{0.4346} \\
-Pangu-Weather & \ms{1.0801}{1.1308} & \ms{2.0549}{2.1893} & \ms{1.4029}{1.4739} & \ms{1.0103}{1.1084} \\
-AlphaEarth & \ms{0.0691}{0.0499} & \ms{0.2826}{0.1497} & \ms{0.1524}{0.0770} & \ms{0.0656}{0.0414} \\
-\bottomrule
-\end{tabular}
-\end{table*}

paper_outputs/tables/tab_app_scope_params.tex DELETED Viewed

@@ -1,19 +0,0 @@
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{8pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Scope values used in the evaluation contracts.}
-\label{tab:app_scope_params}
-\begin{tabular}{lcc}
-\toprule
-\textbf{\(\Omega\)} & \textbf{Definition} & \textbf{Units} \\
-\midrule
-Global & full domain & 8,085,000 test cells \\
-Fire-prone top-5\% & top 5\% by training-period fire frequency & 404,280 test cells \\
-Fire-prone top-10\% & top 10\% by training-period fire frequency & 808,560 test cells \\
-Fire-prone top-20\% & top 20\% by training-period fire frequency & 1,617,000 test cells \\
-Spread region & union of \(\widehat{B}\) and \(B\) & event-specific cells \\
-\bottomrule
-\end{tabular}
-\end{table}

paper_outputs/tables/tab_app_seed_robustness.tex DELETED Viewed

@@ -1,36 +0,0 @@
-\begin{table}[h]
-\centering
-\small
-\setlength{\tabcolsep}{5pt}
-\renewcommand{\arraystretch}{1.2}
-\caption{Seed summaries for stochastic checks. Values report mean with small std over completed seeds.}
-\label{tab:app_seed_robustness}
-\begin{adjustbox}{max width=\textwidth}
-\begin{tabular}{p{0.28\textwidth}cllp{0.18\textwidth}}
-\toprule
-\textbf{\(\mathcal{T}\) check} & \textbf{Seeds} & \textbf{Primary value} & \textbf{Other value(s)} & \textbf{Reading} \\
-\midrule
-Final burned area &
-5 & log-RMSE \ms{1.1657}{0.0126} &
-log-MAE \ms{1.0423}{0.0081}; Spear.\ \ms{0.6298}{0.0338} &
-stable across seeds \\
-Smoke PM\(_{2.5}\) &
-5 & RMSE \ms{4.4646}{0.0060} &
-MAE \ms{2.4108}{0.0016}; \(r\) \ms{0.6368}{0.0013} &
-stable at table precision \\
-Extreme heat &
-5 & RMSE-C \ms{0.2179}{0.0043} &
-MAE-C \ms{0.1787}{0.0018}; exceed.\ \(F_1\) \ms{0.9541}{0.0164} &
-stable across seeds \\
-Fire spread &
-5 & exact \(F_1\) \ms{37.6700}{0.9800} &
-spatial \(F_1\) \ms{80.9700}{2.0200}; AP \ms{30.0900}{1.2500} &
-stable across seeds \\
-Aurora paired-head check &
-5 & fire-prone score diff.\ \ms{6.3500}{13.2800} &
-PR-AUC and union choices differ in 2/5 seeds &
-variable across seeds \\
-\bottomrule
-\end{tabular}
-\end{adjustbox}
-\end{table}

paper_outputs/tables/tab_app_smoke_high_event.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed smoke \(\mathcal{T}\) and station \(\Omega\), this table reports RMSE, MAE, and 90th-percentile absolute error on test rows with observed PM$_{2.5}\ge35$; std uses a row bootstrap over those rows. Cells report mean with small std.}
-\label{tab:app_smoke_high_event}
-\begin{tabular}{lccc}
-\toprule
-Backbone & high-smoke RMSE & high-smoke MAE & high-smoke 90th AE \\
-\midrule
-FireWx-FM ref. & \ms{47.4870}{0.6346} & \ms{34.3954}{0.7654} & \ms{65.6213}{3.8778} \\
-Prithvi-WxC & \ms{57.2224}{1.7268} & \ms{47.3871}{0.3153} & \ms{74.9666}{3.2381} \\
-Aurora & \ms{57.2752}{1.7248} & \ms{47.4368}{0.3149} & \ms{75.0755}{3.1074} \\
-ClimaX & \ms{57.2828}{1.7239} & \ms{47.4407}{0.3140} & \ms{75.1012}{3.0777} \\
-StormCast & \ms{56.6512}{1.7517} & \ms{46.7914}{0.3281} & \ms{74.0794}{3.4707} \\
-DLWP & \ms{57.0075}{1.7359} & \ms{47.1971}{0.3198} & \ms{74.4936}{3.3826} \\
-FCN & \ms{57.0582}{1.7339} & \ms{47.2401}{0.3187} & \ms{74.6431}{3.1982} \\
-FengWu & \ms{57.0158}{1.7357} & \ms{47.1957}{0.3194} & \ms{74.5652}{3.2871} \\
-FuXi & \ms{56.9622}{1.7371} & \ms{47.1508}{0.3201} & \ms{74.3278}{3.4435} \\
-Pangu-Weather & \ms{57.1282}{1.7307} & \ms{47.3050}{0.3170} & \ms{74.6830}{3.2375} \\
-AlphaEarth & \ms{48.0665}{0.7904} & \ms{35.6088}{0.7341} & \ms{66.7613}{3.9235} \\
-\bottomrule
-\end{tabular}
-\end{table*}

paper_outputs/tables/tab_app_spread_ap_by_scope.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[t]
-\centering
-\scriptsize
-\setlength{\tabcolsep}{3pt}
-\caption{For fixed spread \(\mathcal{T}\) and strict \(\Lambda\), this table reports AP under three \(\Omega\) scopes: full test, top-5\% train-fire area, and top-10\% train-fire area. Values are percentages; cells report mean with small std.}
-\label{tab:app_spread_ap_by_scope}
-\begin{tabular}{lccc}
-\toprule
-Backbone & full \(\Omega\) AP & top-5\% \(\Omega\) AP & top-10\% \(\Omega\) AP \\
-\midrule
-FireWx-FM ref. & \ms{30.0197}{1.5651} & \ms{40.7452}{2.0542} & \ms{37.4096}{1.8731} \\
-Prithvi-WxC & \ms{4.8319}{0.1731} & \ms{12.6086}{0.4468} & \ms{8.7051}{0.1889} \\
-Aurora & \ms{17.7723}{0.4293} & \ms{30.3106}{0.9404} & \ms{26.4732}{0.6932} \\
-ClimaX & \ms{11.1726}{0.2337} & \ms{25.7871}{1.2896} & \ms{19.9977}{1.2217} \\
-StormCast & \ms{8.1147}{1.1569} & \ms{18.5461}{1.1727} & \ms{14.1286}{1.2956} \\
-DLWP & \ms{9.2142}{2.6587} & \ms{19.3346}{2.3922} & \ms{14.9788}{2.6696} \\
-FCN & \ms{6.6774}{1.3001} & \ms{16.7396}{3.2955} & \ms{11.9308}{2.3881} \\
-FengWu & \ms{11.0046}{2.7092} & \ms{21.1506}{1.2163} & \ms{17.0113}{1.5778} \\
-FuXi & \ms{13.5507}{0.3840} & \ms{22.5434}{0.4100} & \ms{19.1964}{0.3943} \\
-Pangu-Weather & \ms{10.6250}{1.4643} & \ms{19.8294}{1.3044} & \ms{15.8013}{1.1602} \\
-AlphaEarth & \ms{12.2847}{1.3562} & \ms{22.8692}{0.4915} & \ms{18.2992}{1.2110} \\
-\bottomrule
-\end{tabular}
-\end{table*}

paper_outputs/tables/tab_appendix_selection_regret_tolerance.tex DELETED Viewed

	@@ -1,2 +0,0 @@
1	- % Replaced by the all-backbone value table in sections/appendix.tex
2	- % (Table~\ref{tab:appendix_selection_regret_tolerance}).

paper_outputs/tables/tab_fireprone_contract_progression.tex DELETED Viewed

@@ -1,69 +0,0 @@
-\begin{table*}[t]
-    \centering
-    \scriptsize
-    \setlength{\tabcolsep}{4pt}
-    \caption{Occupancy scores across global and fire-prone scopes. Global uses the full validation/test domain; top-\(k\) rows use train-defined fire-prone masks from historical fire frequency. Values are \(F_1\) percentages from the same validation-selected strict threshold. Tolerance is spatial-only; union adds temporal and spatial matching. Difference is union minus strict. Rows report five-seed mean with small std. Values use four decimals.}
-    \label{tab:fireprone_contract_progression}
-    \begin{adjustbox}{max width=\textwidth}
-    \begin{tabular}{@{}llcccc@{}}
-        \toprule
-        Backbone & Scope & Strict \(F_1\uparrow\) & Tolerance \(F_1\uparrow\) & Union \(F_1\uparrow\) & Difference \(\uparrow\) \\
-        \midrule
-        \textcolor{blue}{FireWx-FM ref.} & global & \ms{0.4550}{0.1410} & \ms{29.7480}{1.2870} & \ms{59.0660}{2.7370} & \ms{58.6110}{2.6950} \\
-         & top 5\% & \ms{3.5600}{0.8810} & \ms{39.2620}{1.4010} & \ms{72.8280}{2.5780} & \ms{69.2680}{1.9960} \\
-         & top 10\% & \ms{3.5580}{0.8800} & \ms{39.1660}{1.3910} & \ms{72.5200}{2.5670} & \ms{68.9630}{1.9890} \\
-         & top 20\% & \ms{3.5300}{0.8700} & \ms{38.2850}{1.2950} & \ms{69.7230}{2.4660} & \ms{66.1930}{1.9270} \\
-        \addlinespace[1pt]
-        Prithvi-WxC & global & \ms{0.0550}{0.0040} & \ms{7.1600}{0.6600} & \ms{20.1900}{1.8300} & \ms{20.1300}{1.8300} \\
-         & top 5\% & \ms{1.4100}{1.1600} & \ms{19.2600}{4.5000} & \ms{42.5800}{4.5500} & \ms{41.1700}{3.4800} \\
-         & top 10\% & \ms{1.2400}{1.3200} & \ms{14.8800}{8.4400} & \ms{32.6900}{13.2100} & \ms{31.4500}{11.9100} \\
-         & top 20\% & \ms{1.1500}{1.3800} & \ms{13.1500}{9.4600} & \ms{28.1300}{15.2900} & \ms{26.9800}{13.9200} \\
-        \addlinespace[1pt]
-        Aurora & global & \ms{0.0700}{0.0100} & \ms{8.5000}{1.9600} & \ms{23.1000}{4.9400} & \ms{23.0400}{4.9300} \\
-         & top 5\% & \ms{0.9900}{0.9300} & \ms{15.1300}{6.0800} & \ms{35.4800}{11.0200} & \ms{34.5000}{10.3700} \\
-         & top 10\% & \ms{0.7800}{1.0500} & \ms{12.7400}{6.5600} & \ms{30.5300}{10.8800} & \ms{29.7500}{9.8700} \\
-         & top 20\% & \ms{0.6700}{1.1000} & \ms{10.5300}{7.4300} & \ms{24.9400}{12.5800} & \ms{24.2800}{11.4900} \\
-        \addlinespace[1pt]
-        ClimaX & global & \ms{0.3500}{0.0800} & \ms{29.7500}{3.6100} & \ms{60.1500}{7.5900} & \ms{59.8000}{7.5500} \\
-         & top 5\% & \ms{1.2900}{0.1100} & \ms{34.5800}{2.3800} & \ms{69.2200}{5.7200} & \ms{67.9200}{5.7300} \\
-         & top 10\% & \ms{1.2500}{0.1600} & \ms{34.3300}{2.2900} & \ms{68.5700}{5.5400} & \ms{67.3200}{5.5500} \\
-         & top 20\% & \ms{1.0300}{0.2700} & \ms{30.2100}{4.2900} & \ms{60.0600}{7.5700} & \ms{59.0400}{7.5900} \\
-        \addlinespace[1pt]
-        StormCast & global & \ms{0.0560}{0.0110} & \ms{8.2000}{2.1900} & \ms{22.3800}{5.4300} & \ms{22.3200}{5.4200} \\
-         & top 5\% & \ms{0.9600}{0.8000} & \ms{15.3200}{5.5300} & \ms{36.1900}{9.7300} & \ms{35.2300}{9.1800} \\
-         & top 10\% & \ms{0.7300}{0.9300} & \ms{12.6700}{6.3300} & \ms{30.4700}{10.6500} & \ms{29.7500}{9.7500} \\
-         & top 20\% & \ms{0.5800}{0.9100} & \ms{10.4200}{7.3400} & \ms{24.6600}{12.4000} & \ms{24.0800}{11.5000} \\
-        \addlinespace[1pt]
-        AlphaEarth & global & \ms{2.0600}{0.4400} & \ms{29.4500}{6.0100} & \ms{37.4300}{9.9500} & \ms{35.3700}{10.0300} \\
-         & top 5\% & \ms{6.9100}{0.8500} & \ms{42.8800}{4.6100} & \ms{51.7400}{8.7300} & \ms{44.8300}{9.0800} \\
-         & top 10\% & \ms{6.6400}{0.9900} & \ms{41.9000}{5.9500} & \ms{50.5700}{10.0100} & \ms{43.9300}{9.9200} \\
-         & top 20\% & \ms{6.1900}{1.1300} & \ms{38.8300}{7.5000} & \ms{46.3800}{12.1700} & \ms{40.1900}{11.6800} \\
-        \addlinespace[1pt]
-        DLWP & global & \ms{0.1700}{0.0400} & \ms{14.9100}{3.2400} & \ms{28.1900}{6.9700} & \ms{28.0200}{6.9300} \\
-         & top 5\% & \ms{1.8100}{0.4800} & \ms{31.7200}{3.2900} & \ms{55.4600}{5.2900} & \ms{53.6500}{5.4800} \\
-         & top 10\% & \ms{1.6100}{0.6000} & \ms{27.6600}{5.9200} & \ms{47.1300}{8.0100} & \ms{45.5200}{7.7900} \\
-         & top 20\% & \ms{1.5200}{0.9000} & \ms{20.9400}{4.8000} & \ms{34.9300}{7.8500} & \ms{33.4100}{7.8800} \\
-        \addlinespace[1pt]
-        FCN & global & \ms{0.2800}{0.0800} & \ms{19.5100}{3.3400} & \ms{40.0600}{9.3700} & \ms{39.7800}{9.3400} \\
-         & top 5\% & \ms{1.6200}{0.5100} & \ms{29.3800}{2.7600} & \ms{54.3000}{7.4100} & \ms{52.6800}{7.4400} \\
-         & top 10\% & \ms{1.1800}{0.5100} & \ms{22.4200}{3.9800} & \ms{43.4500}{9.2500} & \ms{42.2700}{9.0300} \\
-         & top 20\% & \ms{1.0000}{0.4300} & \ms{16.9800}{3.9400} & \ms{34.0900}{8.2600} & \ms{33.0900}{7.9300} \\
-        \addlinespace[1pt]
-        FengWu & global & \ms{0.2600}{0.0800} & \ms{12.0000}{6.0200} & \ms{24.1000}{13.6300} & \ms{23.8400}{13.5700} \\
-         & top 5\% & \ms{1.5700}{0.3600} & \ms{16.2800}{3.7000} & \ms{30.1100}{5.0100} & \ms{28.5400}{4.7700} \\
-         & top 10\% & \ms{1.2400}{0.5300} & \ms{12.9500}{5.6100} & \ms{24.1900}{8.6900} & \ms{22.9400}{8.1900} \\
-         & top 20\% & \ms{1.1200}{0.5000} & \ms{11.9500}{5.0700} & \ms{22.7900}{7.9100} & \ms{21.6700}{7.4400} \\
-        \addlinespace[1pt]
-        FuXi & global & \ms{0.3800}{0.1200} & \ms{21.0300}{4.8200} & \ms{37.2900}{9.4500} & \ms{36.9100}{9.4300} \\
-         & top 5\% & \ms{2.0300}{0.6800} & \ms{31.8900}{4.7300} & \ms{53.9300}{8.3800} & \ms{51.9000}{8.6900} \\
-         & top 10\% & \ms{1.6500}{0.7300} & \ms{24.0100}{5.7800} & \ms{40.2100}{9.9300} & \ms{38.5600}{9.7700} \\
-         & top 20\% & \ms{1.3600}{0.6800} & \ms{21.9500}{5.8600} & \ms{36.7300}{10.0300} & \ms{35.3700}{9.9200} \\
-        \addlinespace[1pt]
-        Pangu-Weather & global & \ms{0.2800}{0.1100} & \ms{17.0900}{4.0500} & \ms{35.6400}{9.0300} & \ms{35.3600}{9.0800} \\
-         & top 5\% & \ms{1.3700}{0.3100} & \ms{22.2200}{6.8600} & \ms{43.4200}{13.2400} & \ms{42.0600}{13.0600} \\
-         & top 10\% & \ms{1.0900}{0.3500} & \ms{18.9300}{5.9300} & \ms{38.5300}{11.7200} & \ms{37.4400}{11.5300} \\
-         & top 20\% & \ms{0.8800}{0.3600} & \ms{17.0200}{5.4900} & \ms{34.5700}{10.2900} & \ms{33.6800}{10.1300} \\
-        \bottomrule
-    \end{tabular}
-    \end{adjustbox}
-\end{table*}

paper_outputs/tables/tab_primary_results.tex DELETED Viewed

@@ -1,62 +0,0 @@
-\begin{table}[t]
-\centering
-\small
-\setlength{\tabcolsep}{4pt}
-\renewcommand{\arraystretch}{1.20}
-\caption{%
-  \textbf{Primary fixed-contract transfer results (RQ3).}
-  Occupancy metrics: exact, tolerated, and union $F_1$ (\%).
-  Fire spread metrics: exact $F_1$, spatial $F_1$, and AP (\%).
-  Each block fixes $\mathcal{T}$, $\Lambda$, $\Omega$, $\mathcal{A}$.
-  \textbf{Bold} marks the best frozen backbone per metric.
-}
-\label{tab:primary_results}
-\setlength{\arrayrulewidth}{0.4pt}
-\resizebox{\textwidth}{!}{%
-\begin{tabular}{lcccccc}
-\toprule
-& \multicolumn{3}{c}{\textbf{Occupancy}}
-& \multicolumn{3}{c}{\textbf{Fire spread}} \\
-\cmidrule(lr){2-4}\cmidrule(lr){5-7}
-\textbf{Comparator}
-& \textbf{Exact $F_1\uparrow$} & \textbf{Tol.\ $F_1\uparrow$} & \textbf{Union $F_1\uparrow$}
-& \textbf{Exact $F_1\uparrow$} & \textbf{Spatial $F_1\uparrow$} & \textbf{AP$\uparrow$} \\
-\midrule
-\textcolor{blue}{FireWx-FM ref.}
-& \ms{0.4546}{0.1412} & \ms{29.7484}{1.2868} & \ms{59.0656}{2.7372}
-& \ms{37.6700}{0.9800} & \ms{80.9700}{2.0200} & \ms{30.0900}{1.2500} \\
-\midrule
-Prithvi-WxC
-& \ms{0.0552}{0.0039} & \ms{7.1649}{0.6557} & \ms{20.1853}{1.8299}
-& \ms{22.3500}{3.4500} & \ms{65.2600}{1.0700} & \ms{5.0000}{0.3000} \\
-Aurora
-& \ms{0.0656}{0.0094} & \ms{8.5009}{1.9594} & \ms{23.1037}{4.9418}
-& \textbf{\ms{30.8757}{0.1343}} & \textbf{\ms{71.7329}{0.0141}} & \textbf{\ms{16.6221}{1.6965}} \\
-ClimaX
-& \ms{0.3480}{0.0754} & \textbf{\ms{29.7535}{3.6073}} & \textbf{\ms{60.1506}{7.5865}}
-& \ms{27.9853}{2.0532} & \ms{69.0634}{2.3832} & \ms{11.1726}{0.2337} \\
-StormCast
-& \ms{0.0626}{0.0119} & \ms{8.1951}{2.1895} & \ms{22.3817}{5.4294}
-& \ms{14.8387}{7.5791} & \ms{55.7568}{21.3003} & \ms{2.8114}{0.7377} \\
-    DLWP
-& \ms{0.1693}{0.0419} & \ms{14.9148}{3.2446} & \ms{28.1901}{6.9658}
-& \ms{5.9335}{10.0712} & \ms{22.8587}{22.3750} & \ms{5.9435}{5.5194} \\
-FCN
-& \ms{0.2829}{0.0839} & \ms{19.5061}{3.3412} & \ms{40.0604}{9.3701}
-& \ms{3.1798}{2.6598} & \ms{15.6203}{12.4531} & \ms{2.3861}{1.2614} \\
-FengWu
-& \ms{0.2613}{0.0757} & \ms{12.0050}{6.0239} & \ms{24.1022}{13.6293}
-& \ms{5.5189}{9.0883} & \ms{18.4774}{22.4703} & \ms{13.1658}{1.3408} \\
-FuXi
-& \ms{0.3774}{0.1212} & \ms{21.0323}{4.8211} & \ms{37.2888}{9.4470}
-& \ms{19.9909}{2.1364} & \ms{56.1826}{3.0412} & \ms{14.3526}{0.3554} \\
-Pangu-Weather
-& \ms{0.2755}{0.1089} & \ms{17.0909}{4.0477} & \ms{35.6386}{9.0327}
-& \ms{11.2583}{11.0719} & \ms{32.5081}{25.4969} & \ms{12.6881}{1.6790} \\
-AlphaEarth
-& \textbf{\ms{2.0606}{0.4404}} & \ms{29.4476}{6.0064} & \ms{37.4286}{9.9458}
-& \ms{11.0995}{3.6088} & \ms{32.8316}{7.4634} & \ms{11.8343}{1.5050} \\
-\bottomrule
-\end{tabular}
-}
-\end{table}

paper_outputs/tables/tab_selection_regret_full_head.tex DELETED Viewed

	@@ -1,2 +0,0 @@
1	- % Full per-head rows are kept in the supplementary CSV files.
2	- % The manuscript uses the all-backbone selection-regret summaries instead.

paper_outputs/tables/tab_selection_regret_scope.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[!t]
-    \centering
-    \small
-    \setlength{\tabcolsep}{4pt}
-    \caption{Fixed-feature selection-regret check across evaluation scopes. Values are percentage-point regret \(\delta = D(h_D)-D(h_R)\) under union-\(F_1\), where \(h_R\) is selected by PR-AUC and \(h_D\) by the decision metric. Top-\(k\) columns use train-defined fire-prone scopes. Rows report mean with small std over five seeds; \(0.0000\) means the two selectors give the same decision score for all seeds.}
-    \label{tab:selection_regret_diagnostic}
-    \begin{tabular}{lcccc}
-        \toprule
-        \textbf{Feature source} & \textbf{\(\Omega=\)global} & \textbf{\(\Omega=\)top 5\%} & \textbf{\(\Omega=\)top 10\%} & \textbf{\(\Omega=\)top 20\%} \\
-        \midrule
-        \textcolor{blue}{FireWx-FM ref.} & \ms{7.3831}{7.4536} & \ms{0.3664}{0.6812} & \ms{1.2275}{1.2665} & \ms{2.9385}{2.7513} \\
-        Prithvi-WxC & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        Aurora & \ms{4.9455}{10.6974} & \ms{15.4283}{34.4987} & \ms{13.9934}{31.2903} & \ms{14.3706}{32.1337} \\
-        ClimaX & \ms{0.1296}{0.1775} & 0.0000 & 0.0000 & 0.0000 \\
-        StormCast & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        DLWP & 0.0000 & \ms{1.6716}{1.6079} & \ms{2.8465}{2.6938} & \ms{4.4634}{4.3561} \\
-        FCN & 0.0000 & \ms{0.4510}{1.0071} & \ms{0.4200}{0.9390} & \ms{1.1680}{1.9872} \\
-        FengWu & 0.0000 & \ms{0.8796}{0.5532} & \ms{0.4023}{0.5511} & \ms{0.5222}{0.6239} \\
-        FuXi & 0.0000 & \ms{1.3545}{2.0970} & \ms{0.1656}{0.3703} & \ms{0.2833}{0.3681} \\
-        Pangu-Weather & 0.0000 & \ms{0.7593}{0.8974} & \ms{0.3048}{0.5054} & \ms{0.1868}{0.3255} \\
-        AlphaEarth & \ms{17.2217}{8.8492} & \ms{6.3846}{4.9653} & \ms{6.5738}{6.8970} & \ms{3.8804}{5.9483} \\
-        \bottomrule
-    \end{tabular}
-\end{table*}

paper_outputs/tables/tab_selection_regret_scope_sweep.tex DELETED Viewed

@@ -1,24 +0,0 @@
-\begin{table*}[!t]
-    \centering
-    \small
-    \setlength{\tabcolsep}{4pt}
-    \caption{Fixed-feature selection-regret sweep across evaluation scopes. Values are percentage-point regret \(\delta = D(h_D)-D(h_R)\) under union-\(F_1\). Top-\(k\) scopes are train-defined fire-prone masks. Rows report mean with small std over five seeds.}
-    \label{tab:selection_regret_scope_sweep}
-    \begin{tabular}{lcccc}
-        \toprule
-        \textbf{Feature source} & \textbf{\(\Omega=\)global} & \textbf{\(\Omega=\)top 5\%} & \textbf{\(\Omega=\)top 10\%} & \textbf{\(\Omega=\)top 20\%} \\
-        \midrule
-        \textcolor{blue}{FireWx-FM ref.} & \ms{7.3831}{7.4536} & \ms{0.3664}{0.6812} & \ms{1.2275}{1.2665} & \ms{2.9385}{2.7513} \\
-        Prithvi-WxC & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        Aurora & \ms{4.9455}{10.6974} & \ms{15.4283}{34.4987} & \ms{13.9934}{31.2903} & \ms{14.3706}{32.1337} \\
-        ClimaX & \ms{0.1296}{0.1775} & 0.0000 & 0.0000 & 0.0000 \\
-        StormCast & 0.0000 & 0.0000 & 0.0000 & 0.0000 \\
-        DLWP & 0.0000 & \ms{1.6716}{1.6079} & \ms{2.8465}{2.6938} & \ms{4.4634}{4.3561} \\
-        FCN & 0.0000 & \ms{0.4510}{1.0071} & \ms{0.4200}{0.9390} & \ms{1.1680}{1.9872} \\
-        FengWu & 0.0000 & \ms{0.8796}{0.5532} & \ms{0.4023}{0.5511} & \ms{0.5222}{0.6239} \\
-        FuXi & 0.0000 & \ms{1.3545}{2.0970} & \ms{0.1656}{0.3703} & \ms{0.2833}{0.3681} \\
-        Pangu-Weather & 0.0000 & \ms{0.7593}{0.8974} & \ms{0.3048}{0.5054} & \ms{0.1868}{0.3255} \\
-        AlphaEarth & \ms{17.2217}{8.8492} & \ms{6.3846}{4.9653} & \ms{6.5738}{6.8970} & \ms{3.8804}{5.9483} \\
-        \bottomrule
-    \end{tabular}
-\end{table*}

paper_outputs/tables/tab_supporting_results.tex DELETED Viewed

@@ -1,120 +0,0 @@
-\begin{table}[t]
-\centering
-\small
-\setlength{\tabcolsep}{3.5pt}
-\renewcommand{\arraystretch}{1.18}
-\caption{%
-  \textbf{Supporting task-metric matrix (RQ4).}
-  Top block: final burned area and analog retrieval.
-  Bottom block: smoke PM$_{2.5}$ and extreme heat.
-  Each block fixes $\mathcal{T}$, $\Lambda$, $\Omega$; backbone
-  column is shared across paired tasks. \textcolor{blue}{FireWx-FM reference row is}
-  separated by a rule as the empirical anchor. \textbf{Bold} marks
-  the largest atmospheric-FM heat error values. For error metrics
-  lower is better ($\downarrow$); for $F_1$, nDCG, and $r$ higher
-  is better ($\uparrow$).
-}
-\label{tab:supporting_results}
-\resizebox{\textwidth}{!}{%
-\begin{tabular}{lcccccc}
-\toprule
-& \multicolumn{3}{c}{\textbf{Burned area}}
-& \multicolumn{3}{c}{\textbf{Analog retrieval}} \\
-\cmidrule(lr){2-4}\cmidrule(lr){5-7}
-\textbf{Backbone}
-& \textbf{log-RMSE$\downarrow$} & \textbf{log-MAE$\downarrow$}
-& \textbf{Spearman$\uparrow$}
-& \textbf{nDCG@10$\uparrow$} & \textbf{log-RMSE$\downarrow$}
-& \textbf{log-MAE$\downarrow$} \\
-\midrule
-\textcolor{blue}{FireWx-FM ref.}
-& \ms{1.1657}{0.0126} & \ms{1.0423}{0.0081} & \ms{0.6298}{0.0338}
-& \ms{0.5099}{0.0336} & \ms{1.1977}{0.1029} & \ms{1.0043}{0.0759} \\
-\midrule
-Prithvi-WxC
-& \ms{1.3630}{0.0681} & \ms{1.2435}{0.0668} & \ms{0.1799}{0.3002}
-& \ms{0.3857}{0.0189} & \ms{1.3908}{0.0938} & \ms{1.2585}{0.0865} \\
-Aurora
-& \ms{1.8658}{0.2009} & \ms{1.6717}{0.1245} & \ms{-0.1156}{0.2982}
-& \ms{0.4046}{0.0144} & \ms{1.3659}{0.0792} & \ms{1.2596}{0.0968} \\
-ClimaX
-& \ms{2.0300}{0.2103} & \ms{1.8443}{0.1528} & \ms{-0.2515}{0.2688}
-& \ms{0.4143}{0.0191} & \ms{1.4526}{0.0926} & \ms{1.2441}{0.1446} \\
-StormCast
-& \ms{1.6679}{0.1438} & \ms{1.4745}{0.1134} & \ms{0.1830}{0.1969}
-& \ms{0.4076}{0.0094} & \ms{1.3663}{0.0781} & \ms{1.2371}{0.1078} \\
-DLWP
-& \ms{1.3070}{0.0980} & \ms{1.1769}{0.0834} & \ms{0.4888}{0.1368}
-& \ms{0.3972}{0.0146} & \ms{1.5351}{0.0802} & \ms{1.3196}{0.0781} \\
-FCN
-& \ms{1.3693}{0.0885} & \ms{1.2599}{0.0723} & \ms{0.3484}{0.1662}
-& \ms{0.4316}{0.0134} & \ms{1.4604}{0.1035} & \ms{1.2351}{0.0586} \\
-FengWu
-& \ms{1.3715}{0.1011} & \ms{1.2604}{0.0820} & \ms{0.3221}{0.2004}
-& \ms{0.4246}{0.0237} & \ms{1.4179}{0.0986} & \ms{1.2233}{0.0915} \\
-FuXi
-& \ms{1.4068}{0.1011} & \ms{1.3023}{0.0789} & \ms{0.2663}{0.2561}
-& \ms{0.4279}{0.0212} & \ms{1.4290}{0.0929} & \ms{1.2236}{0.0961} \\
-Pangu-Weather
-& \ms{1.3280}{0.0735} & \ms{1.2081}{0.0607} & \ms{0.4141}{0.1573}
-& \ms{0.4017}{0.0245} & \ms{1.4235}{0.0731} & \ms{1.2225}{0.0847} \\
-AlphaEarth
-& \ms{2.4068}{0.2841} & \ms{2.0822}{0.2371} & \ms{-0.3428}{0.1716}
-& \ms{0.5086}{0.0440} & \ms{1.2158}{0.1310} & \ms{1.0350}{0.1018} \\
-\bottomrule
-\end{tabular}
-}
-\vspace{4pt}
-\resizebox{\textwidth}{!}{%
-\begin{tabular}{lcccccc}
-\toprule
-& \multicolumn{3}{c}{\textbf{Smoke PM$_{2.5}$}}
-& \multicolumn{3}{c}{\textbf{Extreme heat}} \\
-\cmidrule(lr){2-4}\cmidrule(lr){5-7}
-\textbf{Backbone}
-& \textbf{RMSE$\downarrow$} & \textbf{MAE$\downarrow$}
-& \textbf{Pearson $r\uparrow$}
-& \textbf{RMSE-C$\downarrow$} & \textbf{MAE-C$\downarrow$}
-& \textbf{Exceed.\ $F_1\uparrow$} \\
-\midrule
-\textcolor{blue}{FireWx-FM ref.}
-& \ms{4.4646}{0.0060} & \ms{2.4108}{0.0016} & \ms{0.6368}{0.0013}
-& \ms{0.2179}{0.0043} & \ms{0.1787}{0.0018} & \ms{0.9541}{0.0164} \\
-\midrule
-Prithvi-WxC
-& \ms{6.0382}{0.0828} & \ms{3.7301}{0.0055} & \ms{0.0243}{0.0045}
-& \ms{4.6225}{0.0192} & \ms{2.6315}{0.0128} & \ms{0.8693}{0.0023} \\
-Aurora
-& \ms{6.0384}{0.0828} & \ms{3.7265}{0.0055} & \ms{0.0193}{0.0043}
-& \textbf{\ms{18.0474}{0.0708}} & \textbf{\ms{15.3747}{0.0594}}
-& \ms{0.0951}{0.0038} \\
-ClimaX
-& \ms{6.0402}{0.0828} & \ms{3.7290}{0.0055} & \ms{0.0004}{0.0029}
-& \ms{17.6492}{0.0347} & \ms{14.4938}{0.0319} & \ms{0.7684}{0.0068} \\
-StormCast
-& \ms{6.1230}{0.0830} & \ms{3.8182}{0.0073} & \ms{0.0183}{0.0041}
-& \ms{1.7671}{0.2145} & \ms{1.3507}{0.1576} & \ms{0.9073}{0.0189} \\
-DLWP
-& \ms{5.9289}{0.1031} & \ms{3.7331}{0.0088} & \ms{0.0303}{0.0060}
-& \ms{2.2662}{0.1106} & \ms{1.7153}{0.0748} & \ms{0.9156}{0.0112} \\
-FCN
-& \ms{5.9277}{0.1033} & \ms{3.7345}{0.0088} & \ms{0.0312}{0.0062}
-& \ms{2.1657}{0.1800} & \ms{1.6033}{0.1039} & \ms{0.9257}{0.0096} \\
-FengWu
-& \ms{5.9297}{0.1032} & \ms{3.7395}{0.0088} & \ms{0.0304}{0.0063}
-& \ms{2.1266}{0.1589} & \ms{1.5801}{0.1004} & \ms{0.0481}{0.0459} \\
-FuXi
-& \ms{5.9319}{0.1029} & \ms{3.7398}{0.0088} & \ms{0.0299}{0.0061}
-& \ms{2.1282}{0.0969} & \ms{1.5759}{0.0719} & \ms{0.2268}{0.0623} \\
-Pangu-Weather
-& \ms{5.9270}{0.1036} & \ms{3.7320}{0.0088} & \ms{0.0301}{0.0060}
-& \ms{2.2045}{0.1483} & \ms{1.6307}{0.0889} & \ms{0.0199}{0.0062} \\
-AlphaEarth
-& \ms{4.4403}{0.0488} & \ms{2.3992}{0.0056} & \ms{0.6347}{0.0066}
-& \ms{0.2194}{0.0039} & \ms{0.1800}{0.0014} & \ms{0.9542}{0.0107} \\
-\bottomrule
-\end{tabular}
-}
-\end{table}

scripts/audit_release.py CHANGED Viewed

@@ -18,7 +18,6 @@ REQUIRED = [
     "models/wildfire_fm/README.md",
     "models/wildfire_fm/modeling_unet.py",
     "models/wildfire_fm/checkpoint_manifest.json",
-    "paper/manuscript_final.pdf",
     "paper_outputs/figures/overview_wildfire.pdf",
     "paper_outputs/figures/matching.pdf",
     "paper_outputs/figures/fig_task_contract_tiles.pdf",
@@ -34,14 +33,8 @@ REQUIRED = [
     "scripts/check_paper_output_hashes.py",
 ]
-TABLE_LABELS = [
-    "tab_primary_results.tex",
-    "tab_supporting_results.tex",
-    "tab_fireprone_contract_progression.tex",
-    "tab_selection_regret_scope.tex",
-    "tab_selection_regret_scope_sweep.tex",
-    "tab_appendix_selection_regret_tolerance.tex",
-]
 FORBIDDEN_TEXT = [
     "/home/yx21e",
@@ -75,9 +68,11 @@ def main() -> None:
     for rel in REQUIRED:
         if not (ROOT / rel).exists():
             issues.append(f"missing required file: {rel}")
-    for table in TABLE_LABELS:
-        if not (ROOT / "paper_outputs/tables" / table).exists():
-            issues.append(f"missing paper table output: {table}")
     for path in iter_text_files():
         text = path.read_text(errors="ignore")
@@ -127,7 +122,6 @@ def main() -> None:
         expected_paths = []
         for rel_root in ["paper_outputs", "assets"]:
             expected_paths.extend(str(p.relative_to(ROOT)) for p in (ROOT / rel_root).rglob("*") if p.is_file())
-        expected_paths.append("paper/manuscript_final.pdf")
         expected = sorted(set(expected_paths))
         if sorted(listed) != expected:
             missing = sorted(set(expected) - set(listed))

     "models/wildfire_fm/README.md",
     "models/wildfire_fm/modeling_unet.py",
     "models/wildfire_fm/checkpoint_manifest.json",
     "paper_outputs/figures/overview_wildfire.pdf",
     "paper_outputs/figures/matching.pdf",
     "paper_outputs/figures/fig_task_contract_tiles.pdf",
     "scripts/check_paper_output_hashes.py",
 ]
+FORBIDDEN_FILE_SUFFIXES = {".tex", ".bib", ".tikz"}
+FORBIDDEN_FILE_NAMES = {"manuscript_final.pdf"}
 FORBIDDEN_TEXT = [
     "/home/yx21e",
     for rel in REQUIRED:
         if not (ROOT / rel).exists():
             issues.append(f"missing required file: {rel}")
+    for path in ROOT.rglob("*"):
+        if ".git" in path.parts or "__pycache__" in path.parts:
+            continue
+        if path.is_file() and (path.suffix in FORBIDDEN_FILE_SUFFIXES or path.name in FORBIDDEN_FILE_NAMES):
+            issues.append(f"forbidden manuscript/source artifact present: {path.relative_to(ROOT)}")
     for path in iter_text_files():
         text = path.read_text(errors="ignore")
         expected_paths = []
         for rel_root in ["paper_outputs", "assets"]:
             expected_paths.extend(str(p.relative_to(ROOT)) for p in (ROOT / rel_root).rglob("*") if p.is_file())
         expected = sorted(set(expected_paths))
         if sorted(listed) != expected:
             missing = sorted(set(expected) - set(listed))

scripts/reproduce_paper_outputs.py CHANGED Viewed

@@ -1,10 +1,8 @@
 #!/usr/bin/env python3
-"""Verify the released WildFIRE-FM paper artifacts.
-The final paper figures/tables in this Hub release are copied from the current
-manuscript bundle. Raw-data reruns are intentionally outside this lightweight
-check because the public repository does not redistribute source data or local
-feature caches.
 """
 from __future__ import annotations
@@ -25,7 +23,7 @@ def run(cmd: list[str]) -> None:
 def main() -> None:
     run([sys.executable, "scripts/check_paper_output_hashes.py"])
     run([sys.executable, "scripts/audit_release.py"])
-    print("Verified final paper outputs and release audit.")
 if __name__ == "__main__":

 #!/usr/bin/env python3
+"""Verify the released WildFIRE-FM public artifacts.
+The Hub release intentionally excludes manuscript TeX/PDF source. This check
+verifies public figure previews, sanitized summaries, and release hygiene.
 """
 from __future__ import annotations
 def main() -> None:
     run([sys.executable, "scripts/check_paper_output_hashes.py"])
     run([sys.executable, "scripts/audit_release.py"])
+    print("Verified public release artifacts and release audit.")
 if __name__ == "__main__":