Upload folder using huggingface_hub
Browse files- src/eurus/config.py +10 -0
- src/eurus/retrieval.py +9 -5
- src/eurus/tools/analysis_guide.py +9 -1
src/eurus/config.py
CHANGED
|
@@ -632,6 +632,16 @@ Persistent Python kernel for custom analysis and visualization.
|
|
| 632 |
- **Extremes**: Filter data where values exceed thresholds
|
| 633 |
- **Visualizations**: Any matplotlib plot saved to PLOTS_DIR
|
| 634 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 635 |
### 4. MEMORY
|
| 636 |
Remembers conversation history and previous analyses.
|
| 637 |
|
|
|
|
| 632 |
- **Extremes**: Filter data where values exceed thresholds
|
| 633 |
- **Visualizations**: Any matplotlib plot saved to PLOTS_DIR
|
| 634 |
|
| 635 |
+
### 3b. ANOMALY MAP STRATEGY (CRITICAL)
|
| 636 |
+
⚠️ When computing **anomaly maps** for large spatial areas (≥30°×30°):
|
| 637 |
+
|
| 638 |
+
1. **Download target period** — 1 `retrieve_era5_data` call for the requested month
|
| 639 |
+
2. **Download baseline** — MAX 3-5 `retrieve_era5_data` calls (same month, recent years like 2018-2022)
|
| 640 |
+
3. **Compute in `python_repl`** — Load all files, average baseline, subtract from target
|
| 641 |
+
|
| 642 |
+
**NEVER download 20-30 individual years** for climatology — that is extremely wasteful.
|
| 643 |
+
A 5-year baseline is sufficient for spatial anomaly maps.
|
| 644 |
+
|
| 645 |
### 4. MEMORY
|
| 646 |
Remembers conversation history and previous analyses.
|
| 647 |
|
src/eurus/retrieval.py
CHANGED
|
@@ -190,6 +190,7 @@ def retrieve_era5_data(
|
|
| 190 |
min_longitude: float = 0.0,
|
| 191 |
max_longitude: float = 359.75,
|
| 192 |
region: Optional[str] = None,
|
|
|
|
| 193 |
) -> str:
|
| 194 |
"""
|
| 195 |
Retrieve ERA5 reanalysis data from Earthmover's cloud-optimized archive.
|
|
@@ -204,6 +205,7 @@ def retrieve_era5_data(
|
|
| 204 |
min_longitude: Western bound (0 to 360)
|
| 205 |
max_longitude: Eastern bound (0 to 360)
|
| 206 |
region: Optional predefined region name (overrides lat/lon)
|
|
|
|
| 207 |
|
| 208 |
Returns:
|
| 209 |
Success message with file path, or error message.
|
|
@@ -213,8 +215,8 @@ def retrieve_era5_data(
|
|
| 213 |
"""
|
| 214 |
memory = get_memory()
|
| 215 |
|
| 216 |
-
# Get API key
|
| 217 |
-
api_key = os.environ.get("ARRAYLAKE_API_KEY")
|
| 218 |
if not api_key:
|
| 219 |
return (
|
| 220 |
"Error: ARRAYLAKE_API_KEY not found in environment.\n"
|
|
@@ -327,9 +329,11 @@ def retrieve_era5_data(
|
|
| 327 |
f"Error: Spatial queries are limited to 1 year max ({date_span_days} days requested).\n"
|
| 328 |
f"The spatial dataset is optimised for maps, not long time series.\n\n"
|
| 329 |
f"Options:\n"
|
| 330 |
-
f"1.
|
| 331 |
-
f"
|
| 332 |
-
f"
|
|
|
|
|
|
|
| 333 |
)
|
| 334 |
|
| 335 |
# Download with retry logic
|
|
|
|
| 190 |
min_longitude: float = 0.0,
|
| 191 |
max_longitude: float = 359.75,
|
| 192 |
region: Optional[str] = None,
|
| 193 |
+
api_key: Optional[str] = None,
|
| 194 |
) -> str:
|
| 195 |
"""
|
| 196 |
Retrieve ERA5 reanalysis data from Earthmover's cloud-optimized archive.
|
|
|
|
| 205 |
min_longitude: Western bound (0 to 360)
|
| 206 |
max_longitude: Eastern bound (0 to 360)
|
| 207 |
region: Optional predefined region name (overrides lat/lon)
|
| 208 |
+
api_key: Optional Arraylake API key (falls back to env var)
|
| 209 |
|
| 210 |
Returns:
|
| 211 |
Success message with file path, or error message.
|
|
|
|
| 215 |
"""
|
| 216 |
memory = get_memory()
|
| 217 |
|
| 218 |
+
# Get API key: prefer explicit parameter, fall back to env var
|
| 219 |
+
api_key = api_key or os.environ.get("ARRAYLAKE_API_KEY")
|
| 220 |
if not api_key:
|
| 221 |
return (
|
| 222 |
"Error: ARRAYLAKE_API_KEY not found in environment.\n"
|
|
|
|
| 329 |
f"Error: Spatial queries are limited to 1 year max ({date_span_days} days requested).\n"
|
| 330 |
f"The spatial dataset is optimised for maps, not long time series.\n\n"
|
| 331 |
f"Options:\n"
|
| 332 |
+
f"1. For anomaly maps: download ONLY the target period + a few recent baseline years "
|
| 333 |
+
f"(3-5 calls max), then compute climatology in python_repl\n"
|
| 334 |
+
f"2. Narrow the date range to ≤ 366 days\n\n"
|
| 335 |
+
f"⚠️ Do NOT split into 20-30 yearly retrieve_era5_data calls — "
|
| 336 |
+
f"that is extremely wasteful and slow!"
|
| 337 |
)
|
| 338 |
|
| 339 |
# Download with retry logic
|
src/eurus/tools/analysis_guide.py
CHANGED
|
@@ -118,8 +118,16 @@ ANALYSIS_GUIDES = {
|
|
| 118 |
4. **Convert units** — Report in °C, mm, m/s (not K, m, Pa).
|
| 119 |
5. **Assess magnitude** — Compare to σ of the baseline period.
|
| 120 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
### Quality Checklist
|
| 122 |
-
- [ ] Baseline ≥10 years
|
| 123 |
- [ ] Same calendar grouping for clim and analysis
|
| 124 |
- [ ] Units converted for readability
|
| 125 |
- [ ] Spatial context: is anomaly regional or localized?
|
|
|
|
| 118 |
4. **Convert units** — Report in °C, mm, m/s (not K, m, Pa).
|
| 119 |
5. **Assess magnitude** — Compare to σ of the baseline period.
|
| 120 |
|
| 121 |
+
### Data Strategy for Large-Area Anomaly Maps
|
| 122 |
+
⚠️ For spatial areas ≥30°×30° (e.g., tropical Pacific), do NOT download 30 years one-by-one!
|
| 123 |
+
1. Download target period with 1 `retrieve_era5_data` call
|
| 124 |
+
2. Download 3-5 recent years of the same month as baseline (3-5 calls)
|
| 125 |
+
3. Average baseline files in `python_repl` → climatology
|
| 126 |
+
4. Subtract climatology from target → anomaly map
|
| 127 |
+
A 5-year baseline is sufficient for spatial anomaly maps.
|
| 128 |
+
|
| 129 |
### Quality Checklist
|
| 130 |
+
- [ ] Baseline ≥10 years (for temporal/small-area analysis; 3-5 years OK for spatial maps)
|
| 131 |
- [ ] Same calendar grouping for clim and analysis
|
| 132 |
- [ ] Units converted for readability
|
| 133 |
- [ ] Spatial context: is anomaly regional or localized?
|