dmpantiu commited on
Commit
b6e173d
·
verified ·
1 Parent(s): 182f48d

Upload folder using huggingface_hub

Browse files
src/eurus/config.py CHANGED
@@ -632,6 +632,16 @@ Persistent Python kernel for custom analysis and visualization.
632
  - **Extremes**: Filter data where values exceed thresholds
633
  - **Visualizations**: Any matplotlib plot saved to PLOTS_DIR
634
 
 
 
 
 
 
 
 
 
 
 
635
  ### 4. MEMORY
636
  Remembers conversation history and previous analyses.
637
 
 
632
  - **Extremes**: Filter data where values exceed thresholds
633
  - **Visualizations**: Any matplotlib plot saved to PLOTS_DIR
634
 
635
+ ### 3b. ANOMALY MAP STRATEGY (CRITICAL)
636
+ ⚠️ When computing **anomaly maps** for large spatial areas (≥30°×30°):
637
+
638
+ 1. **Download target period** — 1 `retrieve_era5_data` call for the requested month
639
+ 2. **Download baseline** — MAX 3-5 `retrieve_era5_data` calls (same month, recent years like 2018-2022)
640
+ 3. **Compute in `python_repl`** — Load all files, average baseline, subtract from target
641
+
642
+ **NEVER download 20-30 individual years** for climatology — that is extremely wasteful.
643
+ A 5-year baseline is sufficient for spatial anomaly maps.
644
+
645
  ### 4. MEMORY
646
  Remembers conversation history and previous analyses.
647
 
src/eurus/retrieval.py CHANGED
@@ -190,6 +190,7 @@ def retrieve_era5_data(
190
  min_longitude: float = 0.0,
191
  max_longitude: float = 359.75,
192
  region: Optional[str] = None,
 
193
  ) -> str:
194
  """
195
  Retrieve ERA5 reanalysis data from Earthmover's cloud-optimized archive.
@@ -204,6 +205,7 @@ def retrieve_era5_data(
204
  min_longitude: Western bound (0 to 360)
205
  max_longitude: Eastern bound (0 to 360)
206
  region: Optional predefined region name (overrides lat/lon)
 
207
 
208
  Returns:
209
  Success message with file path, or error message.
@@ -213,8 +215,8 @@ def retrieve_era5_data(
213
  """
214
  memory = get_memory()
215
 
216
- # Get API key
217
- api_key = os.environ.get("ARRAYLAKE_API_KEY")
218
  if not api_key:
219
  return (
220
  "Error: ARRAYLAKE_API_KEY not found in environment.\n"
@@ -327,9 +329,11 @@ def retrieve_era5_data(
327
  f"Error: Spatial queries are limited to 1 year max ({date_span_days} days requested).\n"
328
  f"The spatial dataset is optimised for maps, not long time series.\n\n"
329
  f"Options:\n"
330
- f"1. Split into yearly requests (e.g. one call per year)\n"
331
- f"2. Use query_type='temporal' for multi-year time-series analysis\n"
332
- f"3. Narrow the date range to ≤ 366 days"
 
 
333
  )
334
 
335
  # Download with retry logic
 
190
  min_longitude: float = 0.0,
191
  max_longitude: float = 359.75,
192
  region: Optional[str] = None,
193
+ api_key: Optional[str] = None,
194
  ) -> str:
195
  """
196
  Retrieve ERA5 reanalysis data from Earthmover's cloud-optimized archive.
 
205
  min_longitude: Western bound (0 to 360)
206
  max_longitude: Eastern bound (0 to 360)
207
  region: Optional predefined region name (overrides lat/lon)
208
+ api_key: Optional Arraylake API key (falls back to env var)
209
 
210
  Returns:
211
  Success message with file path, or error message.
 
215
  """
216
  memory = get_memory()
217
 
218
+ # Get API key: prefer explicit parameter, fall back to env var
219
+ api_key = api_key or os.environ.get("ARRAYLAKE_API_KEY")
220
  if not api_key:
221
  return (
222
  "Error: ARRAYLAKE_API_KEY not found in environment.\n"
 
329
  f"Error: Spatial queries are limited to 1 year max ({date_span_days} days requested).\n"
330
  f"The spatial dataset is optimised for maps, not long time series.\n\n"
331
  f"Options:\n"
332
+ f"1. For anomaly maps: download ONLY the target period + a few recent baseline years "
333
+ f"(3-5 calls max), then compute climatology in python_repl\n"
334
+ f"2. Narrow the date range to ≤ 366 days\n\n"
335
+ f"⚠️ Do NOT split into 20-30 yearly retrieve_era5_data calls — "
336
+ f"that is extremely wasteful and slow!"
337
  )
338
 
339
  # Download with retry logic
src/eurus/tools/analysis_guide.py CHANGED
@@ -118,8 +118,16 @@ ANALYSIS_GUIDES = {
118
  4. **Convert units** — Report in °C, mm, m/s (not K, m, Pa).
119
  5. **Assess magnitude** — Compare to σ of the baseline period.
120
 
 
 
 
 
 
 
 
 
121
  ### Quality Checklist
122
- - [ ] Baseline ≥10 years
123
  - [ ] Same calendar grouping for clim and analysis
124
  - [ ] Units converted for readability
125
  - [ ] Spatial context: is anomaly regional or localized?
 
118
  4. **Convert units** — Report in °C, mm, m/s (not K, m, Pa).
119
  5. **Assess magnitude** — Compare to σ of the baseline period.
120
 
121
+ ### Data Strategy for Large-Area Anomaly Maps
122
+ ⚠️ For spatial areas ≥30°×30° (e.g., tropical Pacific), do NOT download 30 years one-by-one!
123
+ 1. Download target period with 1 `retrieve_era5_data` call
124
+ 2. Download 3-5 recent years of the same month as baseline (3-5 calls)
125
+ 3. Average baseline files in `python_repl` → climatology
126
+ 4. Subtract climatology from target → anomaly map
127
+ A 5-year baseline is sufficient for spatial anomaly maps.
128
+
129
  ### Quality Checklist
130
+ - [ ] Baseline ≥10 years (for temporal/small-area analysis; 3-5 years OK for spatial maps)
131
  - [ ] Same calendar grouping for clim and analysis
132
  - [ ] Units converted for readability
133
  - [ ] Spatial context: is anomaly regional or localized?