Spaces:

CliDyn
/

Eurus

Sleeping

App Files Files Community

dmpantiu commited on Feb 18

Commit

ab07cb1

verified ·

1 Parent(s): 9bfbf46

Upload folder using huggingface_hub

Browse files

Files changed (45) hide show

.dockerignore +44 -0
.gitattributes +2 -35
.gitignore +70 -0
Dockerfile +61 -0
LICENSE +21 -0
README.md +213 -5
assets/eurus_logo.jpeg +3 -0
assets/eurus_logo_neon.jpeg +3 -0
docker-compose.yml +47 -0
main.py +428 -0
pyproject.toml +148 -0
requirements.txt +50 -0
requirements_full.txt +190 -0
scripts/qa_image_review.py +369 -0
scripts/qa_runner.py +738 -0
setup_env.sh +21 -0
src/eurus/__init__.py +77 -0
src/eurus/config.py +751 -0
src/eurus/logging_config.py +100 -0
src/eurus/memory.py +508 -0
src/eurus/retrieval.py +536 -0
src/eurus/server.py +258 -0
src/eurus/tools/__init__.py +66 -0
src/eurus/tools/analysis_guide.py +1191 -0
src/eurus/tools/era5.py +204 -0
src/eurus/tools/repl.py +564 -0
src/eurus/tools/routing.py +289 -0
tests/test_config.py +105 -0
tests/test_e2e.py +368 -0
tests/test_edge_cases.py +210 -0
tests/test_server_integration.py +365 -0
web/__init__.py +7 -0
web/agent_wrapper.py +306 -0
web/app.py +125 -0
web/routes/__init__.py +7 -0
web/routes/api.py +173 -0
web/routes/pages.py +27 -0
web/routes/websocket.py +131 -0
web/static/css/style.css +854 -0
web/static/eurus_avatar.png +0 -0
web/static/favicon.jpeg +0 -0
web/static/js/chat.js +734 -0
web/templates/base.html +59 -0
web/templates/components/message.html +53 -0
web/templates/index.html +63 -0

.dockerignore ADDED Viewed

	@@ -0,0 +1,44 @@

+# Virtual environments
+.venv/
+venv/
+# Git
+.git/
+.gitignore
+# IDE
+.vscode/
+.idea/
+*.swp
+# Python cache
+__pycache__/
+*.py[cod]
+*.egg-info/
+.pytest_cache/
+.coverage
+htmlcov/
+# Project artifacts (not needed in container)
+data/
+.memory/
+.cache/
+.claude/
+icechunk/
+logs/
+*.log
+# Build/docs
+docs/
+publications/
+deep_searches/
+bug_reports/
+test_reports/
+dummy_key/
+# Misc
+.DS_Store
+project_structure.txt
+save_new.py
+improvements_feedback.txt
+user_queries.txt

.gitattributes CHANGED Viewed

@@ -1,35 +1,2 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


1	+ *.jpeg filter=lfs diff=lfs merge=lfs -text
2	+ *.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,70 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# Virtual Environment
+.venv/
+venv/
+ENV/
+env/
+# IDEs
+.idea/
+.vscode/
+*.swp
+.DS_Store
+# Testing
+.pytest_cache/
+.coverage
+htmlcov/
+.tox/
+# Project specific
+.env
+.mcp.json
+.memory/
+.cache/
+.claude/
+data/
+!data/plots/.gitkeep
+# Logs
+*.log
+session.log
+session_retry.log
+# Generated/Temp files
+icechunk/
+save_new.py
+project_structure.txt
+dummy_key/
+test_reports/
+bug_reports/
+publications/
+user_queries.txt
+improvements_feedback.txt
+docs/
+deep_searches/
+# Generated project dumps
+full_project.txt
+src_structure.txt

Dockerfile ADDED Viewed

	@@ -0,0 +1,61 @@

+# ============================================================================
+# Eurus ERA5 Agent — Docker Image
+# ============================================================================
+# Multi-target build:
+#   docker build --target agent -t eurus-agent .
+#   docker build --target web   -t eurus-web   .
+#
+# Or use docker-compose (preferred):
+#   docker compose run --rm agent     # interactive CLI
+#   docker compose up web             # FastAPI on :8000
+# ============================================================================
+# ---------- base ----------
+FROM python:3.12-slim AS base
+# System deps for scientific stack (numpy/scipy wheels, geopandas/shapely, matplotlib)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        gcc g++ \
+        libgeos-dev \
+        libproj-dev \
+        libffi-dev \
+        curl \
+    && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+# Install Python deps first (layer caching)
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy project source
+COPY pyproject.toml .
+COPY src/ src/
+COPY main.py .
+COPY web/ web/
+COPY tests/ tests/
+COPY scripts/ scripts/
+COPY assets/ assets/
+COPY README.md LICENSE ./
+# Install eurus package in editable mode
+RUN pip install --no-cache-dir -e ".[agent,web]"
+# Create dirs the agent expects
+RUN mkdir -p /app/data/plots /app/.memory /app/logs
+# Signal to the REPL that we're inside Docker → security checks disabled
+ENV EURUS_DOCKER=1
+# Matplotlib: no GUI backend
+ENV MPLBACKEND=Agg
+# Ensure Python output is unbuffered (for docker logs)
+ENV PYTHONUNBUFFERED=1
+# ---------- agent (CLI mode) ----------
+FROM base AS agent
+ENTRYPOINT ["python", "main.py"]
+# ---------- web (FastAPI mode) ----------
+FROM base AS web
+EXPOSE 7860
+CMD ["uvicorn", "web.app:app", "--host", "0.0.0.0", "--port", "7860"]

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 Vostok Team
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,11 +1,219 @@
 ---
 title: Eurus
-emoji: 🏃
-colorFrom: yellow
-colorTo: purple
 sdk: docker
 pinned: false
-license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Eurus
+emoji: 🌊
+colorFrom: purple
+colorTo: blue
 sdk: docker
 pinned: false
 ---
+# Eurus - ERA5 Climate Analysis Agent
+<div align="center">
+  <img src="assets/eurus_logo.jpeg?v=2" alt="Eurus Logo" width="300"/>
+  <h3><b>Next-Generation Oceanographic & Climate Data Intelligence</b></h3>
+  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
+  [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
+  [![MCP Protocol](https://img.shields.io/badge/MCP-1.0-orange.svg)](https://modelcontextprotocol.io)
+  [![Built with Earthmover](https://img.shields.io/badge/Built%20with-Earthmover-blue.svg)](https://earthmover.io)
+</div>
+---
+**Eurus** is a high-performance, intelligent climate analysis agent designed for oceanographers, climate scientists, and data engineers. Built on the cutting-edge **Icechunk** transactional storage engine, Eurus bridges Earthmover's cloud-optimized ERA5 archives with advanced LLM reasoning, enabling seamless, natural language-driven exploration of planetary-scale climate data.
+### ❄️ Powered By
+This project is made possible by the incredible open-source work from the **[Earthmover](https://earthmover.io)** team:
+- **[Icechunk](https://github.com/earth-mover/icechunk)**: The transactional storage engine for Zarr that provides the backbone for our high-performance data access.
+- **Arraylake**: The cloud-native data lake that hosts the global ERA5 reanalysis archives used by this agent.
+### 🚀 Core Pillars
+- **Intelligence-First Analysis**: Leveraging LLMs to translate complex natural language queries into precise data retrieval and scientific analysis.
+- **Multi-Interface Access**: Interact via a powerful CLI, a rich Web Interface, or integrate directly into IDEs via the Model Context Protocol (MCP).
+- **Cloud-Native Performance**: Direct integration with Earthmover's Arraylake and Icechunk/Zarr storage for lightning-fast, subsetted data access.
+- **Python REPL**: Built-in interactive Python environment with pandas, xarray, matplotlib for custom analysis.
+- **Maritime Routing**: Calculate optimal shipping routes with weather risk assessment.
+- **Persistent Context**: Memory system that tracks cached datasets across sessions.
+---
+## Features
+- **Cloud-Optimized Data Retrieval**: Downloads ERA5 reanalysis data directly from Earthmover's Arraylake.
+- **Python REPL**: Interactive Python environment with pre-loaded scientific libraries (pandas, numpy, xarray, matplotlib).
+- **Maritime Routing**: Calculate optimal shipping routes considering land masks (requires scgraph).
+- **Analysis Guides**: Built-in methodology guides for climate analysis and visualization.
+- **Automatic Visualization**: Matplotlib plots automatically saved to `./data/plots/`.
+- **Intelligent Caching**: Re-uses previously downloaded data to save bandwidth.
+- **MCP Server**: Acts as a brain for Claude and other AI assistants.
+## Installation
+### Prerequisites
+- Python 3.10 or higher
+- An Earthmover Arraylake API Key
+- An OpenAI API Key
+### Setup
+1. **Clone the repository:**
+   ```bash
+   git clone https://github.com/yourusername/era_5_agent.git
+   cd era_5_agent
+   ```
+2. **Create and activate a virtual environment:**
+   ```bash
+   python -m venv .venv
+   source .venv/bin/activate  # or `.venv\Scripts\activate` on Windows
+   ```
+3. **Install dependencies:**
+   ```bash
+   pip install -r requirements.txt
+   ```
+4. **Configuration:**
+   Create a `.env` file in the root directory with your API keys:
+   ```env
+   OPENAI_API_KEY=your_openai_api_key
+   ARRAYLAKE_API_KEY=your_arraylake_api_key
+   # Optional: Custom Host/Port for Web UI
+   # WEB_HOST=127.0.0.1
+   # WEB_PORT=8000
+   ```
+---
+## Usage
+Eurus provides three ways to interact with the agent.
+### 1. Interactive CLI Agent
+The classic terminal experience with rich text output and direct interaction.
+```bash
+python main.py
+```
+**Commands:**
+- `/help` - Show help message
+- `/clear` - Clear conversation history
+- `/cache` - List cached datasets
+- `/memory` - Show memory summary
+- `/cleardata` - Clear all downloaded datasets
+- `/quit` or `q` - Exit
+### 2. Web Interface
+A modern web-based chat interface with rendered plots and easier navigation.
+```bash
+python web/app.py
+# or
+eurus-web
+```
+Access the interface at `http://127.0.0.1:8000`.
+### 3. MCP Server (for Claude / IDEs)
+Integrate Eurus's capabilities directly into Claude Desktop or compatible IDEs using the Model Context Protocol.
+**Configuration for Claude Desktop:**
+Add the following to your `claude_desktop_config.json`:
+```json
+{
+  "mcpServers": {
+    "eurus": {
+      "command": "python",
+      "args": ["-m", "eurus.server"],
+      "env": {
+        "ARRAYLAKE_API_KEY": "your_key_here",
+        "PYTHONPATH": "/absolute/path/to/era_5_agent/src"
+      }
+    }
+  }
+}
+```
+Or run directly for testing:
+```bash
+python -m eurus.server
+```
+---
+## Example Queries
+Eurus can answer questions like:
+*   **Data Retrieval:** "Show me the sea surface temperature off California for 2023."
+*   **Visualization:** "Plot a time series of temperature anomalies in the North Atlantic."
+*   **Comparison:** "Compare SST between El Niño region and the California coast."
+*   **Routing:** "Calculate a ship route from Rotterdam to Singapore with weather risk."
+*   **Custom Analysis:** "Use Python to calculate the monthly mean SST and plot it."
+## Available Data
+### Variables
+| Variable | Description | Units |
+|----------|-------------|-------|
+| `sst` | Sea Surface Temperature | K |
+| `t2` | 2m Air Temperature | K |
+| `u10` | 10m U-Wind Component | m/s |
+| `v10` | 10m V-Wind Component | m/s |
+| `mslp` | Mean Sea Level Pressure | Pa |
+| `sp` | Surface Pressure | Pa |
+| `tcc` | Total Cloud Cover | 0-1 |
+| `tp` | Total Precipitation | m |
+### Predefined Regions
+Eurus knows many regions by name, including:
+- `north_atlantic`, `south_atlantic`
+- `north_pacific`, `south_pacific`
+- `california_coast`, `gulf_of_mexico`, `caribbean`
+- `mediterranean`, `europe`, `asia_east`
+- `arctic`, `antarctic`
+- `nino34`, `nino3`, `nino4`
+---
+## Project Structure
+```
+era_5_agent/
+├── main.py              # CLI Entry Point
+├── pyproject.toml       # Project configuration
+├── requirements.txt     # Python dependencies
+├── src/
+│   └── eurus/
+│       ├── config.py    # Configuration & Constants
+│       ├── memory.py    # Persistent Memory System
+│       ├── server.py    # MCP Server Entry Point
+│       └── tools/       # Agent Tools
+│           ├── era5.py       # Data Retrieval
+│           ├── routing.py    # Maritime Routing
+│           └── analysis_guide.py
+├── web/                 # Web Interface
+│   ├── app.py           # FastAPI Application
+│   ├── routes/          # API & Page Routes
+│   └── templates/       # HTML Templates
+├── data/                # Data Storage (Local)
+│   ├── plots/           # Generated Visualizations
+│   └── *.zarr/          # Cached ERA5 Datasets
+└── .memory/             # Agent Conversation History
+```
+## License
+MIT License
+---
+<div align="center">
+  <p>Special thanks to the <b>Icechunk</b> and <b>Earthmover</b> teams for their pioneering work in cloud-native scientific data storage.</p>
+</div>

assets/eurus_logo.jpeg ADDED Viewed

Git LFS Details

SHA256: 90fbf765444a5941dfe8f9163f8716723225f499d0924d0415797ad4e165b1e9
Pointer size: 132 Bytes
Size of remote file: 3.49 MB

assets/eurus_logo_neon.jpeg ADDED Viewed

Git LFS Details

SHA256: 1f89f4ee11f699a87d660387dde958d9ecaeb367edd0785f539650b96ba684e8
Pointer size: 132 Bytes
Size of remote file: 3.61 MB

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,47 @@

+# ============================================================================
+# Eurus — Docker Compose
+# ============================================================================
+# Usage:
+#   docker compose run --rm agent     # interactive CLI
+#   docker compose up web             # web UI on http://localhost:8000
+#   docker compose up web -d          # web UI (detached)
+# ============================================================================
+services:
+  # ── Interactive CLI agent ──────────────────────────────────────────────
+  agent:
+    build:
+      context: .
+      target: agent
+    image: eurus-agent
+    env_file: .env
+    environment:
+      - EURUS_DOCKER=1
+    volumes:
+      - eurus-data:/app/data        # persist downloaded datasets
+      - eurus-memory:/app/.memory   # persist memory between runs
+      - eurus-plots:/app/data/plots # persist generated plots
+    stdin_open: true                 # -i (interactive)
+    tty: true                        # -t (terminal)
+  # ── Web interface (FastAPI + WebSocket) ────────────────────────────────
+  web:
+    build:
+      context: .
+      target: web
+    image: eurus-web
+    env_file: .env
+    environment:
+      - EURUS_DOCKER=1
+    ports:
+      - "8000:8000"
+    volumes:
+      - eurus-data:/app/data
+      - eurus-memory:/app/.memory
+      - eurus-plots:/app/data/plots
+    restart: unless-stopped
+volumes:
+  eurus-data:
+  eurus-memory:
+  eurus-plots:

main.py ADDED Viewed

	@@ -0,0 +1,428 @@

+#!/usr/bin/env python3
+"""
+Eurus - ERA5 Climate Analysis Agent
+======================================
+An intelligent oceanography and climate data analysis assistant.
+Features:
+- Persistent memory across sessions
+- Cloud-optimized ERA5 data retrieval
+- Interactive Python analysis with visualization
+- Conversation history and context awareness
+Usage:
+    python main.py
+Commands:
+    q, quit, exit  - Exit the agent
+    /clear         - Clear conversation history
+    /cache         - List cached datasets
+    /memory        - Show memory summary
+    /cleardata     - Clear all downloaded ERA5 datasets
+    /help          - Show help message
+"""
+import os
+import sys
+import logging
+import warnings
+from pathlib import Path
+from datetime import datetime
+# Suppress noisy warnings from xarray/zarr
+warnings.filterwarnings("ignore", category=FutureWarning)
+warnings.filterwarnings("ignore", message="Consolidated metadata", category=UserWarning)
+from dotenv import load_dotenv
+# Load environment variables first
+load_dotenv()
+# Add src to path
+PROJECT_ROOT = Path(__file__).parent
+sys.path.insert(0, str(PROJECT_ROOT / "src"))
+# Setup centralized logging
+from eurus.logging_config import setup_logging, cleanup_old_logs
+setup_logging(mode="cli")
+cleanup_old_logs(keep=20)
+logger = logging.getLogger(__name__)
+# Import after logging is configured
+from langchain_openai import ChatOpenAI
+from langchain.agents import create_agent
+from eurus.config import CONFIG, AGENT_SYSTEM_PROMPT, DATA_DIR, PLOTS_DIR
+from eurus.memory import get_memory, MemoryManager
+from eurus.tools import get_all_tools
+# ============================================================================
+# BANNER AND HELP
+# ============================================================================
+BANNER = """
+╔═══════════════════════════════════════════════════════════════════════════╗
+║                                                                           ║
+║    ███████╗██╗   ██╗██████╗ ██╗   ██╗███████╗                              ║
+║    ██╔════╝██║   ██║██╔══██╗██║   ██║██╔════╝                              ║
+║    █████╗  ██║   ██║██████╔╝██║   ██║███████╗                              ║
+║    ██╔══╝  ██║   ██║██╔══██╗██║   ██║╚════██║                              ║
+║    ███████╗╚██████╔╝██║  ██║╚██████╔╝███████║                              ║
+║    ╚══════╝ ╚═════╝ ╚═╝  ╚═╝ ╚═════╝ ╚══════╝                              ║
+║                                                                           ║
+║                  AI Climate Physicist v2.0                                ║
+║           ─────────────────────────────────────────                       ║
+║                                                                           ║
+║   Scientific Capabilities:                                                ║
+║   • ERA5 reanalysis data retrieval (SST, wind, temperature, pressure)     ║
+║   • Climate Diagnostics: Anomalies, Z-Scores, Statistical Significance    ║
+║   • Pattern Discovery: EOF/PCA analysis for climate modes                 ║
+║   • Compound Extremes: "Ocean Oven" detection (Heat + Stagnation)         ║
+║   • Trend Analysis: Decadal trends with p-value significance              ║
+║   • Teleconnections: Correlation and lead-lag analysis                    ║
+║   • Maritime Routing & Lagrangian Risk Assessment                         ║
+║                                                                           ║
+║   Commands: /help, /clear, /cache, /memory, /quit                         ║
+║                                                                           ║
+╚═══════════════════════════════════════════════════════════════════════════╝
+"""
+HELP_TEXT = """
+╔═══════════════════════════════════════════════════════════════════════════╗
+║                          EURUS HELP - AI Climate Physicist               ║
+╠═════════════════════════��═════════════════════════════════════════════════╣
+║                                                                           ║
+║  COMMANDS:                                                                ║
+║  ─────────────────────────────────────────────────────────────────────   ║
+║    /help       - Show this help message                                   ║
+║    /clear      - Clear conversation history (fresh start)                 ║
+║    /cache      - List all cached ERA5 datasets                            ║
+║    /memory     - Show memory summary (datasets, analyses)                 ║
+║    /cleardata  - Clear all downloaded ERA5 datasets                       ║
+║    /quit       - Exit the agent (also: q, quit, exit)                     ║
+║                                                                           ║
+║  SCIENTIFIC ANALYSIS (Publication-Grade):                                 ║
+║  ─────────────────────────────────────────────────────────────────────   ║
+║    "Analyze marine heatwaves in the North Atlantic summer 2023"           ║
+║    "Find compound extremes where high SST coincides with low wind"        ║
+║    "Perform EOF analysis on SST anomalies to find climate modes"          ║
+║    "Calculate SST trends with statistical significance"                   ║
+║    "Detect Ocean Ovens in the Mediterranean"                              ║
+║                                                                           ║
+║  SCIENCE TOOLS (The "Physics Brain"):                                     ║
+║  ─────────────────────────────────────────────────────────────────────   ║
+║    analyze_climate_modes_eof    - Pattern discovery via EOF/PCA           ║
+║    detect_compound_extremes     - "Ocean Oven" detection                  ║
+║    calculate_climate_trends     - Trends with p-value significance        ║
+║    detrend_climate_data         - Remove warming trend for analysis       ║
+║    detect_percentile_extremes   - Percentile-based extreme detection      ║
+║    fetch_climate_index          - NOAA indices (Nino3.4, NAO, PDO, AMO)   ║
+║    calculate_return_periods     - GEV/EVT (1-in-100 year events)          ║
+║    analyze_granger_causality    - Prove X causes Y (not just correlated)  ║
+║                                                                           ║
+║  AVAILABLE VARIABLES:                                                     ║
+║  ─────────────────────────────────────────────────────────────────────   ║
+║    sst  - Sea Surface Temperature (K)                                     ║
+║    t2   - 2m Air Temperature (K)                                          ║
+║    u10  - 10m U-Wind Component (m/s)                                      ║
+║    v10  - 10m V-Wind Component (m/s)                                      ║
+║    mslp - Mean Sea Level Pressure (Pa)                                    ║
+║    tcc  - Total Cloud Cover (0-1)                                         ║
+║    tp   - Total Precipitation (m)                                         ║
+║                                                                           ║
+║  PREDEFINED REGIONS:                                                      ║
+║  ─────────────────────────────────────────────────────────────────────   ║
+║    north_atlantic, north_pacific, california_coast, mediterranean         ║
+║    gulf_of_mexico, caribbean, nino34, nino3, nino4, arctic, antarctic     ║
+║                                                                           ║
+║  SCIENTIFIC WORKFLOW:                                                     ║
+║  ─────────────────────────────────────────────────────────────────────   ║
+║    1. RETRIEVE data → 2. DIAGNOSE (Z-scores) → 3. DISCOVER (EOF)          ║
+║    4. DETECT (extremes) → 5. ATTRIBUTE (correlation) → 6. VISUALIZE       ║
+║                                                                           ║
+║  TIPS:                                                                    ║
+║  ─────────────────────────────────────────────────────────────────────   ║
+║    • Always report in anomalies/Z-scores, not raw values                  ║
+║    • Z > 2σ means statistically significant extreme                       ║
+║    • Use diverging colormaps (RdBu_r) centered at 0 for anomalies         ║
+║    • Add stippling for p < 0.05 significance                              ║
+║                                                                           ║
+╚═══════════════════════════════════════════════════════════════════════════╝
+"""
+def clear_data_directory(data_dir: Path = None) -> tuple[int, float]:
+    """
+    Remove all downloaded ERA5 datasets (zarr directories) from the data folder.
+    Args:
+        data_dir: Data directory path. Defaults to DATA_DIR from config.
+    Returns:
+        Tuple of (datasets_removed, total_size_mb_freed)
+    """
+    import shutil
+    if data_dir is None:
+        data_dir = DATA_DIR
+    datasets_removed = 0
+    total_bytes = 0
+    if not data_dir.exists():
+        return 0, 0.0
+    # Find and remove all .zarr directories
+    for zarr_dir in data_dir.glob('*.zarr'):
+        if zarr_dir.is_dir():
+            # Calculate size before removing
+            dir_size = sum(f.stat().st_size for f in zarr_dir.rglob('*') if f.is_file())
+            total_bytes += dir_size
+            shutil.rmtree(zarr_dir)
+            datasets_removed += 1
+            logger.debug(f"Removed dataset: {zarr_dir}")
+    total_mb = total_bytes / (1024 * 1024)
+    return datasets_removed, total_mb
+# ============================================================================
+# COMMAND HANDLERS
+# ============================================================================
+def handle_command(command: str, memory: MemoryManager) -> tuple[bool, str]:
+    """
+    Handle slash commands.
+    Returns:
+        (should_continue, response_message)
+    """
+    cmd = command.lower().strip()
+    if cmd in ('/quit', '/exit', '/q', 'quit', 'exit', 'q'):
+        return False, "Goodbye! Your conversation has been saved."
+    elif cmd == '/help':
+        return True, HELP_TEXT
+    elif cmd == '/clear':
+        memory.clear_conversation()
+        return True, "Conversation history cleared. Starting fresh!"
+    elif cmd == '/cache':
+        cache_info = memory.list_datasets()
+        return True, f"\n{cache_info}\n"
+    elif cmd == '/memory':
+        summary = memory.get_context_summary()
+        datasets = len([p for p in memory.datasets if os.path.exists(p)])
+        analyses = len(memory.analyses)
+        convos = len(memory.conversations)
+        response = f"""
+╔═══════════════════════════════════════════════════════════════════════════╗
+║                         MEMORY SUMMARY                                    ║
+╠═══════════════════════════════════════════════════════════════════════════╣
+║  Conversation messages: {convos:<5}                                        ║
+║  Cached datasets: {datasets:<5}                                             ║
+║  Recorded analyses: {analyses:<5}                                           ║
+╚═══════════════════════════════════════════════════════════════════════════╝
+{summary}
+"""
+        return True, response
+    elif cmd == '/cleardata':
+        datasets_removed, size_freed = clear_data_directory(DATA_DIR)
+        # Also clear memory references
+        memory.datasets.clear()
+        memory._save_datasets()
+        response = f"""
+╔═══════════════════════════════════════════════════════════════════════════╗
+║                       ERA5 DATA CLEARED                                   ║
+╠═══════════════════════════════════════════════════════════════════════════╣
+║  Datasets removed: {datasets_removed:<5}                                               ║
+║  Space freed: {size_freed:>8.2f} MB                                              ║
+╚═══════════════════════════════════════════════════════════════════════════╝
+"""
+        return True, response
+    elif cmd.startswith('/'):
+        return True, f"Unknown command: {cmd}\nType /help for available commands."
+    return True, None  # Not a command
+# ============================================================================
+# CALLBACK FOR TOOL PROGRESS
+# ============================================================================
+from langchain_core.callbacks import BaseCallbackHandler
+class ToolProgressCallback(BaseCallbackHandler):
+    """Print tool calls in real-time during agent execution."""
+    def on_tool_start(self, serialized, input_str, **kwargs):
+        tool_name = serialized.get('name', kwargs.get('name', 'unknown'))
+        print(f"🔧 Calling: {tool_name}...", flush=True)
+    def on_tool_end(self, output, name=None, **kwargs):
+        display_name = name or "tool"
+        print(f"   ✓ {display_name} done", flush=True)
+# ============================================================================
+# MAIN AGENT LOOP
+# ============================================================================
+def main():
+    """Main entry point for the Eurus agent."""
+    # Print banner
+    print(BANNER)
+    # Check for required API keys
+    if not os.environ.get("ARRAYLAKE_API_KEY"):
+        print("ERROR: ARRAYLAKE_API_KEY not found in environment.")
+        print("Please add it to your .env file:")
+        print("  ARRAYLAKE_API_KEY=your_api_key_here")
+        sys.exit(1)
+    if not os.environ.get("OPENAI_API_KEY"):
+        print("ERROR: OPENAI_API_KEY not found in environment.")
+        print("Please add it to your .env file:")
+        print("  OPENAI_API_KEY=your_api_key_here")
+        sys.exit(1)
+    # Initialize memory
+    print("Initializing memory system...")
+    memory = get_memory()
+    # Load recent conversation context
+    recent_messages = memory.get_langchain_messages(n_messages=10)
+    logger.info(f"Loaded {len(recent_messages)} messages from history")
+    # Initialize tools
+    print("Starting Python kernel...")
+    # All capabilities enabled by default (including maritime routing)
+    tools = get_all_tools(enable_routing=True, enable_guide=True)
+    logger.info(f"Loaded {len(tools)} tools")
+    # Initialize LLM
+    print("Connecting to LLM...")
+    llm = ChatOpenAI(
+        model=CONFIG.model_name,
+        temperature=CONFIG.temperature,
+        streaming=True  # Enable streaming for real-time output
+    )
+    # Create enhanced system prompt with context
+    context_summary = memory.get_context_summary()
+    enhanced_prompt = AGENT_SYSTEM_PROMPT
+    if context_summary and context_summary != "No context available.":
+        enhanced_prompt += f"\n\n## CURRENT CONTEXT\n{context_summary}"
+    # Create agent
+    print("Creating agent...")
+    agent = create_agent(
+        model=llm,
+        tools=tools,
+        system_prompt=enhanced_prompt,
+        debug=False
+    )
+    # Initialize messages with history
+    messages = recent_messages.copy()
+    print("\n" + "=" * 75)
+    print("READY! Type your question or /help for commands.")
+    print("=" * 75 + "\n")
+    # Main interaction loop
+    try:
+        while True:
+            # Get user input
+            try:
+                user_input = input(">> You: ").strip()
+            except EOFError:
+                break
+            if not user_input:
+                continue
+            # Handle commands
+            should_continue, response = handle_command(user_input, memory)
+            if response:
+                print(response)
+            if not should_continue:
+                break
+            if response:  # Command was handled, skip agent
+                continue
+            # Save user message to memory
+            memory.add_message("user", user_input)
+            messages.append({"role": "user", "content": user_input})
+            # Get agent response
+            print("\nThinking...\n")
+            try:
+                print("\n" + "─" * 75)
+                # Use invoke() with callback handler for real-time tool progress
+                config = {"recursion_limit": 35, "callbacks": [ToolProgressCallback()]}
+                result = agent.invoke({"messages": messages}, config=config)
+                # Update messages from result (keep as LangChain messages)
+                messages = list(result["messages"])
+                last_message = messages[-1]
+                if hasattr(last_message, 'content') and last_message.content:
+                    response_text = last_message.content
+                elif isinstance(last_message, dict) and last_message.get('content'):
+                    response_text = last_message['content']
+                else:
+                    response_text = str(last_message)
+                print(f"\n📝 Eurus:\n{response_text}", flush=True)
+                print("─" * 75 + "\n")
+                memory.add_message("assistant", response_text)
+            except KeyboardInterrupt:
+                print("\n\nInterrupted. Type /quit to exit or continue with a new question.")
+            except Exception as e:
+                error_msg = f"Error: {str(e)}"
+                logger.error(error_msg, exc_info=True)
+                print(f"\nError during processing: {error_msg}")
+                print("Please try again or rephrase your question.\n")
+    except KeyboardInterrupt:
+        print("\n\nReceived interrupt signal.")
+    finally:
+        # Cleanup
+        print("\nShutting down...")
+        # Clean up missing dataset records
+        removed = memory.cleanup_missing_datasets()
+        if removed:
+            logger.info(f"Cleaned up {removed} missing dataset records")
+        print("Session saved. Goodbye!")
+# ============================================================================
+# ENTRY POINT
+# ============================================================================
+if __name__ == "__main__":
+    main()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,148 @@

+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "eurus"
+version = "1.0.0"
+description = "Eurus Climate Agent - Access ERA5 reanalysis data through Model Context Protocol"
+readme = "README.md"
+license = {text = "MIT"}
+requires-python = ">=3.10"
+authors = [
+    {name = "Eurus Team", email = "eurus@example.com"}
+]
+keywords = [
+    "era5",
+    "climate",
+    "mcp",
+    "model-context-protocol",
+    "oceanography",
+    "reanalysis",
+    "weather",
+    "xarray",
+    "zarr"
+]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Science/Research",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Scientific/Engineering :: Atmospheric Science",
+    "Topic :: Scientific/Engineering :: GIS",
+]
+dependencies = [
+    "mcp>=1.0.0",
+    "arraylake>=0.10.0",
+    "xarray>=2024.10.0",
+    "zarr>=3.0.0",
+    "pandas>=2.0.0",
+    "numpy>=1.24.0",
+    "pydantic>=2.0.0",
+    "python-dotenv>=1.0.0",
+]
+[project.optional-dependencies]
+agent = [
+    "langchain>=0.3.0",
+    "langchain-openai>=0.2.0",
+    "langchain-core>=0.3.0",
+    "openai>=1.0.0",
+    "jupyter_client>=8.0.0",
+    "ipykernel>=6.0.0",
+    "matplotlib>=3.7.0",
+    "scipy>=1.10.0",
+    "seaborn>=0.12.0",
+]
+web = [
+    "fastapi>=0.109.0",
+    "uvicorn[standard]>=0.27.0",
+    "jinja2>=3.1.0",
+    "python-multipart>=0.0.6",
+    "websockets>=12.0",
+]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "pytest-cov>=4.0.0",
+    "black>=23.0.0",
+    "ruff>=0.1.0",
+    "mypy>=1.0.0",
+    "pre-commit>=3.0.0",
+]
+docs = [
+    "mkdocs>=1.5.0",
+    "mkdocs-material>=9.0.0",
+    "mkdocstrings[python]>=0.24.0",
+]
+[project.urls]
+Homepage = "https://github.com/yourusername/era5-mcp"
+Documentation = "https://github.com/yourusername/era5-mcp#readme"
+Repository = "https://github.com/yourusername/era5-mcp"
+Issues = "https://github.com/yourusername/era5-mcp/issues"
+[project.scripts]
+eurus-mcp = "eurus.server:main"
+eurus-agent = "eurus.agent:main"
+eurus-web = "web.app:main"
+[tool.hatch.build.targets.wheel]
+packages = ["src/eurus"]
+[tool.hatch.build.targets.sdist]
+include = [
+    "/src",
+    "/tests",
+    "/README.md",
+    "/LICENSE",
+]
+[tool.black]
+line-length = 100
+target-version = ['py310', 'py311', 'py312']
+[tool.ruff]
+line-length = 100
+select = [
+    "E",  # pycodestyle errors
+    "W",  # pycodestyle warnings
+    "F",  # pyflakes
+    "I",  # isort
+    "B",  # flake8-bugbear
+    "C4", # flake8-comprehensions
+    "UP", # pyupgrade
+]
+ignore = [
+    "E501",  # line too long (handled by black)
+    "B008",  # do not perform function calls in argument defaults
+]
+[tool.mypy]
+python_version = "3.10"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+[tool.pytest.ini_options]
+asyncio_mode = "auto"
+testpaths = ["tests"]
+addopts = "-v --cov=src/eurus --cov-report=term-missing"
+[tool.coverage.run]
+source = ["src/eurus"]
+branch = true
+[tool.coverage.report]
+exclude_lines = [
+    "pragma: no cover",
+    "def __repr__",
+    "raise AssertionError",
+    "raise NotImplementedError",
+]

requirements.txt ADDED Viewed

	@@ -0,0 +1,50 @@

+# ERA5 Agent Dependencies
+# =======================
+# LangChain (Agent Framework)
+langchain>=0.3.0
+langchain-openai>=0.2.0
+langchain-core>=0.3.0
+# OpenAI
+openai>=1.0.0
+# Data Access
+arraylake>=0.10.0
+icechunk>=0.1.0
+# Scientific Computing
+xarray>=2024.10.0
+zarr>=3.0.0
+pandas>=2.0.0
+numpy>=1.24.0
+scipy>=1.10.0
+scikit-learn>=1.3.0  # For EOF/PCA climate pattern analysis
+statsmodels>=0.14.0  # For Granger Causality & trend analysis
+bottleneck>=1.3.0    # Fast rolling windows for time series
+# Visualization
+matplotlib>=3.7.0
+seaborn>=0.12.0
+geopandas
+# Validation & Config
+pydantic>=2.0.0
+python-dotenv>=1.0.0
+# Jupyter Kernel
+jupyter_client>=8.0.0
+ipykernel>=6.0.0
+# MCP Server
+mcp>=1.0.0
+# Maritime Routing (Optional Extended Features)
+scgraph>=1.0.0
+global-land-mask>=1.0.0
+#Web
+fastapi
+uvicorn[standard]
+websockets
+jinja2

requirements_full.txt ADDED Viewed

	@@ -0,0 +1,190 @@

+# Eurus Environment - Thu Feb 12 22:56:45 CET 2026
+# Python: Python 3.12.2
+affine==2.4.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anyio==4.12.1
+appnope==0.1.4
+arraylake==0.28.1
+asttokens==3.0.1
+attrs==25.4.0
+cachetools==7.0.0
+cachey==0.2.1
+Cartopy==0.25.0
+certifi==2026.1.4
+cf_xarray==0.10.11
+cffi==2.0.0
+cftime==1.6.5
+charset-normalizer==3.4.4
+click==8.3.1
+cligj==0.7.2
+cloudpickle==3.1.2
+cmocean==4.0.3
+colorcet==3.1.0
+comm==0.2.3
+contourpy==1.3.3
+coverage==7.13.2
+cryptography==46.0.4
+cycler==0.12.1
+dask==2026.1.2
+datashader==0.18.2
+debugpy==1.8.19
+decorator==5.2.1
+distro==1.9.0
+dnspython==2.8.0
+donfig==0.8.1.post1
+email-validator==2.3.0
+executing==2.2.1
+fastapi==0.128.0
+fonttools==4.61.1
+frozenlist==1.8.0
+fsspec==2026.2.0
+geographiclib==2.1
+geopandas==1.1.2
+geopy==2.4.1
+global-land-mask==1.0.0
+google-crc32c==1.8.0
+h11==0.16.0
+HeapDict==1.0.1
+httpcore==1.0.9
+httptools==0.7.1
+httpx==0.27.2
+httpx-sse==0.4.3
+icechunk==1.1.17
+idna==3.11
+iniconfig==2.3.0
+ipykernel==7.1.0
+ipython==9.9.0
+ipython_pygments_lexers==1.1.1
+jedi==0.19.2
+Jinja2==3.1.6
+jiter==0.12.0
+joblib==1.5.3
+jsonpatch==1.33
+jsonpointer==3.0.0
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+jupyter_client==8.8.0
+jupyter_core==5.9.1
+kiwisolver==1.4.9
+langchain==1.2.7
+langchain-core==1.2.7
+langchain-openai==1.1.7
+langgraph==1.0.7
+langgraph-checkpoint==4.0.0
+langgraph-prebuilt==1.0.7
+langgraph-sdk==0.3.3
+langsmith==0.6.6
+llvmlite==0.46.0
+locket==1.0.0
+markdown-it-py==4.0.0
+MarkupSafe==3.0.3
+matplotlib==3.10.8
+matplotlib-inline==0.2.1
+mcp==1.26.0
+mdurl==0.1.2
+morecantile==7.0.3
+multidict==6.7.1
+multipledispatch==1.0.0
+nest-asyncio==1.6.0
+numba==0.63.1
+numba_celltree==0.4.1
+numbagg==0.9.4
+numcodecs==0.16.5
+numpy==2.3.5
+openai==2.16.0
+orjson==3.11.5
+ormsgpack==1.12.2
+packaging==25.0
+pandas==3.0.0
+param==2.3.2
+parso==0.8.5
+partd==1.4.2
+pexpect==4.9.0
+pillow==12.1.0
+platformdirs==4.5.1
+pluggy==1.6.0
+pooch==1.9.0
+prompt_toolkit==3.0.52
+propcache==0.4.1
+psutil==7.2.2
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==3.0
+pyct==0.6.0
+pydantic==2.12.5
+pydantic-settings==2.12.0
+pydantic-xml==2.18.0
+pydantic_core==2.41.5
+Pygments==2.19.2
+PyJWT==2.10.1
+PyMuPDF==1.26.7
+pyogrio==0.12.1
+pyparsing==3.3.2
+pyproj==3.7.2
+pyshp==3.0.3
+PySide6==6.10.1
+PySide6_Addons==6.10.1
+PySide6_Essentials==6.10.1
+pytest==9.0.2
+pytest-cov==7.0.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.2.1
+python-multipart==0.0.22
+PyYAML==6.0.3
+pyzmq==27.1.0
+rasterio==1.5.0
+rasterix==0.2.0
+referencing==0.37.0
+regex==2026.1.15
+requests==2.32.5
+requests-toolbelt==1.0.0
+rich==14.3.1
+rioxarray==0.21.0
+rpds-py==0.30.0
+ruamel.yaml==0.19.1
+scgraph==2.15.0
+scikit-learn==1.8.0
+scipy==1.17.0
+seaborn==0.13.2
+shapely==2.1.2
+shellingham==1.5.4
+shiboken6==6.10.1
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==3.2.0
+stack-data==0.6.3
+starlette==0.50.0
+structlog==25.5.0
+tenacity==9.1.2
+term-image==0.7.2
+threadpoolctl==3.6.0
+tiktoken==0.12.0
+toolz==1.1.0
+tornado==6.5.4
+tqdm==4.67.1
+traitlets==5.14.3
+triangle==20250106
+typer==0.21.1
+typing-inspection==0.4.2
+typing_extensions==4.15.0
+urllib3==2.6.3
+uuid_utils==0.14.0
+uvicorn==0.40.0
+uvloop==0.22.1
+-e git+https://github.com/dmpantiu/Eurus.git@9a6d481226f01ea0cc61969659907827cc0933d1#egg=vostok
+watchfiles==1.1.1
+wcwidth==0.5.0
+websockets==16.0
+xarray==2025.11.0
+xproj==0.2.1
+xpublish==0.4.2
+xpublish-tiles==0.4.0
+xxhash==3.6.0
+yarl==1.22.0
+zarr==3.1.5
+zstandard==0.25.0

scripts/qa_image_review.py ADDED Viewed

	@@ -0,0 +1,369 @@

+#!/usr/bin/env python3
+"""
+QA Image Reviewer — Uses Gemini 3 Pro Preview (Vertex AI Express) to review
+all generated plots from a QA run and checks whether each plot matches its
+task requirements.
+Usage:
+    python scripts/qa_image_review.py [--run RUN_DIR] [--query N] [--output FILE]
+"""
+import argparse
+import json
+import os
+import sys
+import time
+from datetime import datetime
+from pathlib import Path
+from dotenv import load_dotenv
+from google import genai
+from google.genai import types
+# ── project root ────────────────────────────────────────────────────
+PROJECT_ROOT = Path(__file__).resolve().parent.parent
+load_dotenv(PROJECT_ROOT / ".env")
+# ── Gemini Config ───────────────────────────────────────────────────
+PRIMARY_MODEL = "gemini-3-pro-preview"
+FALLBACK_MODEL = "gemini-2.0-flash"
+# ── The query definitions (mirrored from qa_runner.py) ──────────────
+QA_QUERIES = {
+    1:  {"slug": "europe_heatwave_anomaly",
+         "task": "Spatial map of 2m temperature anomalies across Europe during June 2023 heatwave vs June 2022."},
+    2:  {"slug": "storm_isha_mslp_wind",
+         "task": "MSLP isobars and 10m wind vectors over the North Atlantic for 2024-01-22 showing Storm Isha."},
+    3:  {"slug": "atmospheric_river_jan2023",
+         "task": "Total column water vapour for the US West Coast, Jan 2023, showing the atmospheric river event around Jan 9th."},
+    4:  {"slug": "sahara_heat_july2024",
+         "task": "Daily mean 2m temperature time series over the Sahara for July 2024 vs July 2023 on the same chart."},
+    5:  {"slug": "great_plains_wind_may2024",
+         "task": "Map of mean 10m wind speed over US Great Plains for May 2024, highlighting areas >5 m/s."},
+    6:  {"slug": "nino34_index",
+         "task": "Niño 3.4 index from ERA5 SST for 2015-2024 classifying El Niño / La Niña episodes."},
+    7:  {"slug": "elnino_vs_lanina_tropical_belt",
+         "task": "SST anomaly difference map: Dec 2023 (El Niño) minus Dec 2022 (La Niña) across the tropical belt."},
+    8:  {"slug": "nao_index",
+         "task": "NAO index from MSLP (Azores minus Iceland) for 2000-2024 with 3-month rolling mean."},
+    9:  {"slug": "australia_enso_rainfall",
+         "task": "Two-panel map of annual total precipitation over Eastern Australia for La Niña 2022 vs El Niño 2023, plus difference map."},
+    10: {"slug": "med_eof_sst",
+         "task": "EOF analysis on Mediterranean SST anomalies for 2019-2024: first 3 modes with variance explained."},
+    11: {"slug": "arctic_polar_amplification",
+         "task": "January mean 2m temperature maps for the Arctic (>70°N): 2024 vs 2000 side by side, with polar amplification quantification."},
+    12: {"slug": "med_marine_heatwave_2023",
+         "task": "Summer JJA 2023 SST anomaly map over the Mediterranean vs 2018-2022 mean, highlighting marine heatwave hotspots >+2°C."},
+    13: {"slug": "paris_decadal_comparison",
+         "task": "Average summer (JJA) temperature difference map for Paris: 2014-2023 vs 2000-2009, plus time series."},
+    14: {"slug": "alps_snow_trend",
+         "task": "December-February snow depth trend over the Alps for the last 30 years."},
+    15: {"slug": "uk_precip_anomaly_winter2024",
+         "task": "Total precipitation anomaly map over the British Isles for January 2024 vs 2019-2023 January mean, highlighting >150% normal."},
+    16: {"slug": "delhi_heatwave_detection",
+         "task": "Heatwave events in Delhi 2010-2024 using 90th percentile threshold with 3-day criterion; frequency change analysis."},
+    17: {"slug": "horn_africa_drought",
+         "task": "3-month SPI proxy for the Horn of Africa 2020-2024, identifying worst drought periods."},
+    18: {"slug": "baghdad_hot_days",
+         "task": "Bar chart of days per year >35°C in Baghdad from 1980-2024 with trend line."},
+    19: {"slug": "sea_p95_precip",
+         "task": "95th percentile daily precipitation map for Southeast Asia 2010-2023."},
+    20: {"slug": "scandinavia_blocking_2018",
+         "task": "Blocking event over Scandinavia July 2018: MSLP anomalies persisting 5+ days."},
+    21: {"slug": "rotterdam_shanghai_route",
+         "task": "Maritime route from Rotterdam to Shanghai with wind risk analysis for December."},
+    22: {"slug": "indian_ocean_sst_dipole",
+         "task": "SST anomaly map across the Indian Ocean for October 2023 relative to 2019-2022 October mean, showing IOD pattern."},
+    23: {"slug": "japan_typhoon_season_wind",
+         "task": "Mean and maximum 10m wind speed maps around Japan during typhoon season (Aug-Oct) 2023, highlighting areas >8 m/s."},
+    24: {"slug": "south_atlantic_sst_gradient",
+         "task": "Mean SST field across the South Atlantic for March 2024 with SST isotherms and Brazil-Malvinas confluence zone."},
+    25: {"slug": "north_sea_wind_power",
+         "task": "Mean 100m wind power density map across the North Sea for 2020-2024 identifying best offshore wind sites."},
+    26: {"slug": "german_bight_weibull",
+         "task": "Weibull distribution fit to 100m wind speed at German Bight for 2023 with histogram and fit overlay."},
+    27: {"slug": "solar_sahara_vs_germany",
+         "task": "Monthly mean incoming solar radiation (SSRD) comparison: Sahara vs Northern Germany for 2023."},
+    28: {"slug": "persian_gulf_sst_summer",
+         "task": "Mean SST map across Persian Gulf and Arabian Sea for August 2023, highlighting areas where SST >32°C."},
+    29: {"slug": "sahara_diurnal_t2_blh",
+         "task": "Diurnal cycle of 2m temperature and boundary layer height in the Sahara for July 2024, dual-axis plot."},
+    30: {"slug": "amazon_convective_peak",
+         "task": "Hourly climatology of convective precipitation peak over the Amazon basin during DJF."},
+    31: {"slug": "europe_rh_august",
+         "task": "Relative humidity map from 2m temperature and dewpoint for central Europe, August 2023."},
+    32: {"slug": "hovmoller_equator_skt",
+         "task": "Hovmöller diagram of skin temperature along the equator for 2023 to visualize MJO."},
+    33: {"slug": "hurricane_otis_dashboard",
+         "task": "Summary dashboard for Hurricane Otis (Oct 2023): SST map, wind speed time series, TCWV distribution in one figure."},
+    34: {"slug": "california_sst_jan",
+         "task": "Average SST off California coast in January 2024 with spatial map of the SST field."},
+    35: {"slug": "berlin_monthly_temp",
+         "task": "2023 monthly mean temperature for Berlin as a seasonal curve."},
+    36: {"slug": "biscay_wind_stats",
+         "task": "10m wind speed stats for Bay of Biscay (last 3 years) with histogram or time series plot."},
+}
+REVIEW_SYSTEM_PROMPT = """\
+You are a senior scientific visualization reviewer for a climate/weather data agent.
+You will receive one or more PNG plots generated by an AI agent and the TASK that the agent was asked to complete.
+Review each plot against the task and provide a structured assessment:
+1. **Task Compliance** (1-10): Does the plot address what was asked?
+2. **Scientific Accuracy** (1-10): Are axes labeled, units correct, colorbar present, projections reasonable?
+3. **Visual Quality** (1-10): Is the plot publication-quality? Good resolution, readable labels, professional aesthetics?
+4. **Spatial/Map Quality** (1-10): If it's a map — does it have coastlines, proper projection, geographic labels? If not a map, rate the chart type appropriateness.
+5. **Overall Score** (1-10): Weighted average considering all factors.
+Also provide:
+- **Summary**: 1-2 sentence summary of what the plot shows.
+- **Strengths**: Key things done well.
+- **Issues**: Any problems, missing elements, or improvements needed.
+Respond ONLY in valid JSON with this exact structure:
+{
+  "task_compliance": <int>,
+  "scientific_accuracy": <int>,
+  "visual_quality": <int>,
+  "spatial_quality": <int>,
+  "overall_score": <int>,
+  "summary": "<string>",
+  "strengths": ["<string>", ...],
+  "issues": ["<string>", ...]
+}
+"""
+def create_client() -> genai.Client:
+    """Create Gemini API client using Vertex AI Express (same pattern as cmip6 project)."""
+    api_key = os.environ.get("vertex_api_key")
+    if not api_key:
+        print("❌ vertex_api_key not found in .env!")
+        sys.exit(1)
+    print(f"  Using Vertex AI Express (API key auth)")
+    return genai.Client(vertexai=True, api_key=api_key)
+def review_single_question(client: genai.Client, qid: int, task: str,
+                           image_paths: list[Path], model: str) -> dict:
+    """Send images + task to Gemini and get structured review."""
+    # Build content parts: text prompt + inline images
+    prompt_text = (
+        f"**TASK (Q{qid:02d}):** {task}\n\n"
+        f"Below are {len(image_paths)} plot(s) generated by the agent. "
+        f"Review them against the task."
+    )
+    parts = [types.Part.from_text(text=prompt_text)]
+    for img_path in image_paths:
+        with open(img_path, "rb") as f:
+            img_bytes = f.read()
+        parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/png"))
+    for attempt in range(4):
+        try:
+            response = client.models.generate_content(
+                model=model,
+                contents=parts,
+                config=types.GenerateContentConfig(
+                    system_instruction=REVIEW_SYSTEM_PROMPT,
+                    temperature=0.2,
+                    max_output_tokens=1000,
+                ),
+            )
+            raw = response.text.strip()
+            # Strip markdown code fences if present
+            if raw.startswith("```"):
+                raw = raw.split("\n", 1)[1] if "\n" in raw else raw[3:]
+                if raw.endswith("```"):
+                    raw = raw[:-3]
+                raw = raw.strip()
+            return json.loads(raw)
+        except json.JSONDecodeError:
+            # Try to extract JSON from the response
+            import re
+            match = re.search(r'\{[^{}]*\}', raw, re.DOTALL)
+            if match:
+                try:
+                    return json.loads(match.group())
+                except json.JSONDecodeError:
+                    pass
+            return {"error": f"Failed to parse JSON: {raw[:500]}"}
+        except Exception as e:
+            err_str = str(e)
+            if "429" in err_str or "RESOURCE_EXHAUSTED" in err_str:
+                wait = min(2 ** attempt * 5, 60)
+                print(f"\n  Rate limited, waiting {wait}s (attempt {attempt+1}/4)...", end="", flush=True)
+                time.sleep(wait)
+            else:
+                if attempt < 3:
+                    time.sleep(2)
+                    continue
+                return {"error": str(e)[:300]}
+    return {"error": "Max retries exceeded"}
+def main():
+    parser = argparse.ArgumentParser(description="QA Image Reviewer using Gemini 3 Pro Preview")
+    parser.add_argument("--run", type=str, default=None,
+                        help="Path to QA run directory (default: latest in data/qa_runs/)")
+    parser.add_argument("--query", type=int, default=None,
+                        help="Review only a specific query ID")
+    parser.add_argument("--output", type=str, default=None,
+                        help="Output JSON file (default: <run_dir>/image_review.json)")
+    parser.add_argument("--model", type=str, default=PRIMARY_MODEL,
+                        help=f"Gemini model to use (default: {PRIMARY_MODEL})")
+    args = parser.parse_args()
+    # Find run directory
+    if args.run:
+        run_dir = Path(args.run)
+    else:
+        qa_runs = PROJECT_ROOT / "data" / "qa_runs"
+        runs = sorted(qa_runs.glob("run_*"))
+        if not runs:
+            print("❌ No QA runs found in data/qa_runs/")
+            sys.exit(1)
+        run_dir = runs[-1]
+    if not run_dir.exists():
+        print(f"❌ Run directory not found: {run_dir}")
+        sys.exit(1)
+    # Gemini client (Vertex AI Express)
+    client = create_client()
+    print(f"""
+╔══════════════════════════════════════════════════════╗
+║    QA Image Reviewer (Gemini 3 Pro Preview)         ║
+║    {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}                          ║
+╚══════════════════════════════════════════════════════╝
+Run directory: {run_dir}
+Model: {args.model}
+""")
+    # Collect questions to review
+    all_reviews = {}
+    question_dirs = sorted(run_dir.glob("q*_*"))
+    for qdir in question_dirs:
+        # Extract question ID from folder name (e.g., q01_xxx -> 1)
+        try:
+            qid = int(qdir.name.split("_")[0][1:])
+        except (ValueError, IndexError):
+            continue
+        if args.query and qid != args.query:
+            continue
+        if qid not in QA_QUERIES:
+            print(f"⚠️  Q{qid:02d}: Unknown query ID, skipping")
+            continue
+        # Find PNG files
+        pngs = sorted(qdir.glob("*.png"))
+        if not pngs:
+            print(f"⏭️  Q{qid:02d} ({QA_QUERIES[qid]['slug']}): No PNG files, skipping")
+            all_reviews[qid] = {"status": "no_images", "slug": QA_QUERIES[qid]["slug"]}
+            continue
+        task_desc = QA_QUERIES[qid]["task"]
+        png_names = [p.name for p in pngs]
+        print(f"🔍 Q{qid:02d} ({QA_QUERIES[qid]['slug']}): Reviewing {len(pngs)} image(s)...", end=" ", flush=True)
+        try:
+            start = time.time()
+            review = review_single_question(client, qid, task_desc, pngs, args.model)
+            elapsed = time.time() - start
+            review["slug"] = QA_QUERIES[qid]["slug"]
+            review["task"] = task_desc
+            review["images"] = png_names
+            review["status"] = "reviewed"
+            review["review_time_s"] = round(elapsed, 1)
+            score = review.get("overall_score", "?")
+            if isinstance(score, int):
+                icon = "✅" if score >= 7 else "⚠️" if score >= 5 else "❌"
+            else:
+                icon = "❓"
+            print(f"{icon} Score: {score}/10 ({elapsed:.1f}s)")
+            all_reviews[qid] = review
+        except Exception as e:
+            print(f"❌ Error: {e}")
+            all_reviews[qid] = {
+                "status": "error",
+                "slug": QA_QUERIES[qid]["slug"],
+                "error": str(e),
+            }
+        # Rate limit: pause between calls
+        time.sleep(1)
+    # ── Summary ──────────────────────────────────────────────────────
+    reviewed = [v for v in all_reviews.values() if v.get("status") == "reviewed"]
+    scores = [v["overall_score"] for v in reviewed if isinstance(v.get("overall_score"), int)]
+    print(f"\n{'='*70}")
+    print("REVIEW SUMMARY")
+    print(f"{'='*70}")
+    # Score table
+    for qid in sorted(all_reviews.keys()):
+        r = all_reviews[qid]
+        if r.get("status") == "reviewed":
+            s = r.get("overall_score", 0)
+            if isinstance(s, int):
+                icon = "✅" if s >= 7 else "⚠️" if s >= 5 else "❌"
+            else:
+                icon = "❓"
+            tc = r.get("task_compliance", "?")
+            sa = r.get("scientific_accuracy", "?")
+            vq = r.get("visual_quality", "?")
+            sq = r.get("spatial_quality", "?")
+            print(f"  {icon} Q{qid:02d} {r['slug']:35s} | Overall: {s:>2}/10 | "
+                  f"Task:{tc} Sci:{sa} Vis:{vq} Spa:{sq}")
+        elif r.get("status") == "no_images":
+            print(f"  ⏭️  Q{qid:02d} {r['slug']:35s} | No images")
+        else:
+            print(f"  ❌ Q{qid:02d} {r['slug']:35s} | Error: {r.get('error', 'unknown')[:50]}")
+    if scores:
+        avg = sum(scores) / len(scores)
+        excellent = sum(1 for s in scores if s >= 8)
+        good = sum(1 for s in scores if 6 <= s < 8)
+        needs_work = sum(1 for s in scores if s < 6)
+        print(f"\n📊 Average score: {avg:.1f}/10 across {len(scores)} reviewed plots")
+        print(f"   🟢 Excellent (8-10): {excellent}")
+        print(f"   🟡 Good (6-7):       {good}")
+        print(f"   🔴 Needs work (<6):  {needs_work}")
+    # ── Save results ─────────────────────────────────────────────────
+    output_path = Path(args.output) if args.output else run_dir / "image_review.json"
+    # Convert int keys to strings for JSON
+    output_data = {
+        "timestamp": datetime.now().isoformat(),
+        "run_directory": str(run_dir),
+        "model": args.model,
+        "total_reviewed": len(reviewed),
+        "average_score": round(avg, 2) if scores else None,
+        "reviews": {f"q{k:02d}": v for k, v in sorted(all_reviews.items())},
+    }
+    with open(output_path, "w") as f:
+        json.dump(output_data, f, indent=2, ensure_ascii=False)
+    print(f"\n💾 Full review saved to: {output_path}")
+if __name__ == "__main__":
+    main()

scripts/qa_runner.py ADDED Viewed

	@@ -0,0 +1,738 @@

+#!/usr/bin/env python3
+"""
+QA Runner — Automated End-to-End Agent Testing
+===============================================
+Runs test queries through the Eurus agent, captures ALL intermediate steps
+(tool calls, tool outputs, reasoning, plots) and saves structured results
+to data/qa_results/q{NN}_{slug}/.
+Usage:
+    PYTHONPATH=src OPENAI_API_KEY=... python3 scripts/qa_runner.py
+Or run a single query:
+    PYTHONPATH=src OPENAI_API_KEY=... python3 scripts/qa_runner.py --query 2
+"""
+import os
+import sys
+import json
+import shutil
+import base64
+import time
+import argparse
+from pathlib import Path
+from datetime import datetime
+from typing import Optional
+# Ensure eurus package is importable
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT / "src"))
+sys.path.insert(0, str(PROJECT_ROOT))
+# Load .env (API keys)
+from dotenv import load_dotenv
+load_dotenv(PROJECT_ROOT / ".env")
+from langchain_openai import ChatOpenAI
+from langchain.agents import create_agent
+from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
+from eurus.config import AGENT_SYSTEM_PROMPT, CONFIG, get_plots_dir
+from eurus.tools import get_all_tools
+# ============================================================================
+# QA TEST QUERIES — 36 research-grade demo queries
+#
+#  §1  Synoptic Meteorology & Case Studies         (Q01–Q05)
+#  §2  Climate Variability & Teleconnections        (Q06–Q10)
+#  §3  Trends & Climate Change Signals              (Q11–Q15)
+#  §4  Extreme Events & Risk                        (Q16–Q20)
+#  §5  Maritime & Shipping                          (Q21–Q24)
+#  §6  Energy Assessment                            (Q25–Q28)
+#  §7  Diurnal & Sub-Daily Processes                (Q29–Q30)
+#  §8  Multi-Variable & Diagnostics                 (Q31–Q33)
+#  §9  Quick Lookups                                (Q34–Q36)
+# ============================================================================
+QA_QUERIES = [
+    # ═══════════════════════════════════════════════════════════════
+    #  §1 — Synoptic Meteorology & Case Studies
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 1,
+        "slug": "europe_heatwave_anomaly",
+        "query": "Show me a spatial map of 2m temperature anomalies across Europe "
+                 "during the June 2023 heatwave compared to June 2022.",
+        "type": "anomaly_map",
+        "variables": ["t2"],
+        "region": "Europe",
+    },
+    {
+        "id": 2,
+        "slug": "storm_isha_mslp_wind",
+        "query": "Plot MSLP isobars and 10m wind vectors over the North Atlantic "
+                 "for 2024-01-22 — I want to see Storm Isha's structure.",
+        "type": "contour_quiver",
+        "variables": ["mslp", "u10", "v10"],
+        "region": "North Atlantic",
+    },
+    {
+        "id": 3,
+        "slug": "atmospheric_river_jan2023",
+        "query": "Download total column water vapour for the US West Coast, Jan 2023, "
+                 "and show the atmospheric river event around Jan 9th.",
+        "type": "ar_detection",
+        "variables": ["tcwv"],
+        "region": "US West Coast",
+    },
+    {
+        "id": 4,
+        "slug": "sahara_heat_july2024",
+        "query": "Plot the daily mean 2m temperature time series averaged over "
+                 "the Sahara (20-30°N, 0 to 15°E) for July 2024 and compare "
+                 "it to July 2023 on the same chart.",
+        "type": "time_series",
+        "variables": ["t2"],
+        "region": "Sahara",
+    },
+    {
+        "id": 5,
+        "slug": "great_plains_wind_may2024",
+        "query": "Plot a map of mean 10m wind speed over the US Great Plains "
+                 "(30-45°N, -105 to -90°W) for May 2024 and highlight areas exceeding 5 m/s.",
+        "type": "threshold_map",
+        "variables": ["u10", "v10"],
+        "region": "US Great Plains",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §2 — Climate Variability & Teleconnections
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 6,
+        "slug": "nino34_index",
+        "query": "Calculate the Niño 3.4 index from ERA5 SST for 2015-2024 and "
+                 "classify El Niño / La Niña episodes.",
+        "type": "climate_index",
+        "variables": ["sst"],
+        "region": "Tropical Pacific",
+    },
+    {
+        "id": 7,
+        "slug": "elnino_vs_lanina_tropical_belt",
+        "query": "Compare SST anomalies across the entire tropical belt "
+                 "(30°S-30°N, global) for December 2023 (peak El Niño) vs December 2022 "
+                 "(La Niña). Show the full basin-wide pattern across the Pacific, "
+                 "Atlantic, and Indian oceans in a single anomaly difference map.",
+        "type": "anomaly_comparison",
+        "variables": ["sst"],
+        "region": "Tropical Belt (global)",
+    },
+    {
+        "id": 8,
+        "slug": "nao_index",
+        "query": "Compute the NAO index from MSLP (Azores minus Iceland) for 2000-2024 "
+                 "and plot it with a 3-month rolling mean.",
+        "type": "climate_index",
+        "variables": ["mslp"],
+        "region": "North Atlantic",
+    },
+    {
+        "id": 9,
+        "slug": "australia_enso_rainfall",
+        "query": "Compare precipitation over Eastern Australia (25-45°S, 145-155°E) "
+                 "between the La Niña year 2022 and El Niño year 2023. "
+                 "Show a two-panel map of annual total precipitation for each year "
+                 "and a difference map (2023 minus 2022).",
+        "type": "multi_year_anomaly",
+        "variables": ["tp"],
+        "region": "Australia",
+    },
+    {
+        "id": 10,
+        "slug": "med_eof_sst",
+        "query": "Perform an EOF analysis on Mediterranean SST anomalies "
+                 "(30-46°N, -6 to 36°E) for 2019-2024 and show the first 3 modes "
+                 "with variance explained. Interpret the dominant patterns.",
+        "type": "eof_analysis",
+        "variables": ["sst"],
+        "region": "Mediterranean",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §3 — Trends & Climate Change Signals
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 11,
+        "slug": "arctic_polar_amplification",
+        "query": "Compare January mean 2m temperature across the entire Arctic "
+                 "(north of 70°N) for 2024 vs 2000. Show both maps side by side, "
+                 "compute the area-weighted temperature difference, and quantify "
+                 "polar amplification.",
+        "type": "decadal_comparison",
+        "variables": ["t2"],
+        "region": "Arctic (>70°N)",
+    },
+    {
+        "id": 12,
+        "slug": "med_marine_heatwave_2023",
+        "query": "Map the summer (JJA) 2023 mean SST anomaly across the entire "
+                 "Mediterranean basin (30-46°N, -6 to 36°E) compared to the 2018-2022 "
+                 "summer mean. Identify marine heatwave hotspots where SST exceeded "
+                 "+2°C above normal.",
+        "type": "marine_heatwave",
+        "variables": ["sst"],
+        "region": "Mediterranean",
+    },
+    {
+        "id": 13,
+        "slug": "paris_decadal_comparison",
+        "query": "Compare the average summer (JJA) temperature in Paris between the "
+                 "decades 2000-2009 and 2014-2023 — show a difference map and time series.",
+        "type": "multi_panel_comparison",
+        "variables": ["t2"],
+        "region": "Paris",
+    },
+    {
+        "id": 14,
+        "slug": "alps_snow_trend",
+        "query": "Has the snow depth over the Alps decreased over the last 30 years? "
+                 "Show me the December-February trend.",
+        "type": "trend_analysis",
+        "variables": ["sd"],
+        "region": "Alps",
+    },
+    {
+        "id": 15,
+        "slug": "uk_precip_anomaly_winter2024",
+        "query": "Map the total precipitation anomaly over the British Isles "
+                 "(49-60°N, 11°W-2°E) for January 2024 compared to the 2019-2023 "
+                 "January mean. Highlight regions receiving more than 150% of normal "
+                 "rainfall. Save the map as a PNG file.",
+        "type": "anomaly_map",
+        "variables": ["tp"],
+        "region": "British Isles",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §4 — Extreme Events & Risk
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 16,
+        "slug": "delhi_heatwave_detection",
+        "query": "Detect heatwave events in Delhi from 2010-2024 using the 90th "
+                 "percentile threshold with a 3-day duration criterion — how has the "
+                 "frequency changed?",
+        "type": "heatwave_detection",
+        "variables": ["t2"],
+        "region": "Delhi",
+    },
+    {
+        "id": 17,
+        "slug": "horn_africa_drought",
+        "query": "Calculate a 3-month SPI proxy for the Horn of Africa "
+                 "(Ethiopia/Somalia) for 2020-2024 — when were the worst drought periods?",
+        "type": "drought_analysis",
+        "variables": ["tp"],
+        "region": "Horn of Africa",
+    },
+    {
+        "id": 18,
+        "slug": "baghdad_hot_days",
+        "query": "How many days per year exceeded 35°C in Baghdad from 1980 to 2024? "
+                 "Plot as a bar chart with a trend line.",
+        "type": "exceedance_frequency",
+        "variables": ["t2"],
+        "region": "Baghdad",
+    },
+    {
+        "id": 19,
+        "slug": "sea_p95_precip",
+        "query": "Show me the 95th percentile daily precipitation map for Southeast Asia "
+                 "for 2010-2023.",
+        "type": "extreme_percentile",
+        "variables": ["tp"],
+        "region": "Southeast Asia",
+    },
+    {
+        "id": 20,
+        "slug": "scandinavia_blocking_2018",
+        "query": "Analyse the blocking event over Scandinavia in July 2018 — show MSLP "
+                 "anomalies persisting for 5+ days.",
+        "type": "blocking_detection",
+        "variables": ["mslp"],
+        "region": "Scandinavia",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §5 — Maritime & Shipping
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 21,
+        "slug": "rotterdam_shanghai_route",
+        "query": "Calculate the maritime route from Rotterdam to Shanghai and analyse "
+                 "wind risk along the route for December.",
+        "type": "maritime_route_risk",
+        "variables": ["u10", "v10"],
+        "region": "Europe-Asia",
+    },
+    {
+        "id": 22,
+        "slug": "indian_ocean_sst_dipole",
+        "query": "Map the SST anomaly across the Indian Ocean (30°S-25°N, 30-120°E) "
+                 "for October 2023 relative to the 2019-2022 October mean. "
+                 "Show the Indian Ocean Dipole pattern. Save the map as PNG.",
+        "type": "anomaly_map",
+        "variables": ["sst"],
+        "region": "Indian Ocean",
+    },
+    {
+        "id": 23,
+        "slug": "japan_typhoon_season_wind",
+        "query": "Map the mean and maximum 10m wind speed over the seas around Japan "
+                 "(20-45°N, 120-150°E) during typhoon season (August-October) 2023. "
+                 "Show two-panel spatial maps highlighting areas where mean wind "
+                 "exceeded 8 m/s. Save as PNG.",
+        "type": "multi_panel_map",
+        "variables": ["u10", "v10"],
+        "region": "Japan",
+    },
+    {
+        "id": 24,
+        "slug": "south_atlantic_sst_gradient",
+        "query": "Map the mean SST field across the South Atlantic (40°S-5°N, 50°W-15°E) "
+                 "for March 2024. Overlay SST isotherms and highlight the "
+                 "Brazil-Malvinas confluence zone. Save as PNG.",
+        "type": "sst_map",
+        "variables": ["sst"],
+        "region": "South Atlantic",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §6 — Energy Assessment
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 25,
+        "slug": "north_sea_wind_power",
+        "query": "Map the mean 100m wind power density across the North Sea for "
+                 "2020-2024 — where are the best offshore wind sites?",
+        "type": "wind_energy",
+        "variables": ["u100", "v100"],
+        "region": "North Sea",
+    },
+    {
+        "id": 26,
+        "slug": "german_bight_weibull",
+        "query": "Fit a Weibull distribution to 100m wind speed at 54°N, 7°E "
+                 "(German Bight) for 2023 and estimate the capacity factor for a "
+                 "3-25 m/s turbine range. Plot the histogram with Weibull fit overlay "
+                 "and save as PNG.",
+        "type": "weibull_analysis",
+        "variables": ["u100", "v100"],
+        "region": "German Bight",
+    },
+    {
+        "id": 27,
+        "slug": "solar_sahara_vs_germany",
+        "query": "Compare incoming solar radiation (SSRD) between the Sahara and "
+                 "northern Germany across 2023 — show monthly means.",
+        "type": "comparison_timeseries",
+        "variables": ["ssrd"],
+        "region": "Sahara / Germany",
+    },
+    {
+        "id": 28,
+        "slug": "persian_gulf_sst_summer",
+        "query": "Map the mean SST across the Persian Gulf and Arabian Sea "
+                 "(12-32°N, 44-70°E) for August 2023. Highlight areas where SST "
+                 "exceeded 32°C in a spatial map. Save as PNG.",
+        "type": "threshold_map",
+        "variables": ["sst"],
+        "region": "Persian Gulf",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §7 — Diurnal & Sub-Daily Processes
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 29,
+        "slug": "sahara_diurnal_t2_blh",
+        "query": "Show the diurnal cycle of 2m temperature and boundary layer height "
+                 "in the Sahara for July 2024 — dual-axis plot.",
+        "type": "diurnal_cycle",
+        "variables": ["t2", "blh"],
+        "region": "Sahara",
+    },
+    {
+        "id": 30,
+        "slug": "amazon_convective_peak",
+        "query": "When does convective precipitation peak over the Amazon basin during "
+                 "DJF? Hourly climatology please.",
+        "type": "diurnal_cycle",
+        "variables": ["cp"],
+        "region": "Amazon",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §8 — Multi-Variable & Diagnostics
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 31,
+        "slug": "europe_rh_august",
+        "query": "Compute relative humidity from 2m temperature and dewpoint for "
+                 "central Europe, August 2023, and map the spatial mean.",
+        "type": "derived_variable",
+        "variables": ["t2", "d2"],
+        "region": "Central Europe",
+    },
+    {
+        "id": 32,
+        "slug": "hovmoller_equator_skt",
+        "query": "Create a Hovmöller diagram of 850 hPa equivalent — use skin "
+                 "temperature as proxy — along the equator for 2023 to visualise the MJO.",
+        "type": "hovmoller",
+        "variables": ["skt"],
+        "region": "Equatorial",
+    },
+    {
+        "id": 33,
+        "slug": "hurricane_otis_dashboard",
+        "query": "Plot a summary dashboard for Hurricane Otis (Oct 2023, Acapulco): "
+                 "SST map, wind speed time series, and TCWV distribution in one figure.",
+        "type": "dashboard",
+        "variables": ["sst", "u10", "v10", "tcwv"],
+        "region": "East Pacific / Mexico",
+    },
+    # ═══════════════════════════════════════════════════════════════
+    #  §9 — Quick Lookups
+    # ═══════════════════════════════════════════════════════════════
+    {
+        "id": 34,
+        "slug": "california_sst_jan",
+        "query": "What was the average SST off the coast of California in January 2024? "
+                 "Also plot a spatial map of the SST field for that month and save as PNG.",
+        "type": "point_retrieval",
+        "variables": ["sst"],
+        "region": "California",
+    },
+    {
+        "id": 35,
+        "slug": "berlin_monthly_temp",
+        "query": "Plot the 2023 monthly mean temperature for Berlin as a seasonal curve.",
+        "type": "time_series",
+        "variables": ["t2"],
+        "region": "Berlin",
+    },
+    {
+        "id": 36,
+        "slug": "biscay_wind_stats",
+        "query": "Download 10m wind speed for the Bay of Biscay, last 3 years, and "
+                 "give me basic statistics. Also plot a wind speed histogram or time "
+                 "series and save as PNG.",
+        "type": "stats_retrieval",
+        "variables": ["u10", "v10"],
+        "region": "Bay of Biscay",
+    },
+]
+# ============================================================================
+# AGENT SETUP  (mirrors main.py exactly)
+# ============================================================================
+def build_agent():
+    """Build a LangChain agent with full tool suite."""
+    llm = ChatOpenAI(
+        model=CONFIG.model_name,
+        temperature=CONFIG.temperature,
+    )
+    tools = get_all_tools(enable_routing=False, enable_guide=True)
+    agent = create_agent(
+        model=llm,
+        tools=tools,
+        system_prompt=AGENT_SYSTEM_PROMPT,
+        debug=False,
+    )
+    return agent
+# ============================================================================
+# STEP CAPTURE
+# ============================================================================
+def extract_steps(messages) -> list:
+    """
+    Extract ALL intermediate steps from agent message history.
+    Returns list of step dicts with type, content, tool_name, etc.
+    """
+    steps = []
+    for msg in messages:
+        if isinstance(msg, HumanMessage):
+            steps.append({
+                "step": len(steps) + 1,
+                "type": "user_query",
+                "content": msg.content[:2000],
+            })
+        elif isinstance(msg, AIMessage):
+            # AI thinking / tool calls
+            if msg.tool_calls:
+                for tc in msg.tool_calls:
+                    # Capture tool call request
+                    args = tc.get("args", {})
+                    # Truncate large args
+                    args_str = json.dumps(args, indent=2, default=str)
+                    if len(args_str) > 5000:
+                        args_str = args_str[:5000] + "\n... [TRUNCATED]"
+                    steps.append({
+                        "step": len(steps) + 1,
+                        "type": "tool_call",
+                        "tool_name": tc.get("name", "unknown"),
+                        "tool_id": tc.get("id", ""),
+                        "arguments": json.loads(args_str) if len(args_str) <= 5000 else args_str,
+                        "reasoning": msg.content[:1000] if msg.content else "",
+                    })
+            elif msg.content:
+                # Final response or intermediate reasoning
+                steps.append({
+                    "step": len(steps) + 1,
+                    "type": "ai_response",
+                    "content": msg.content[:5000],
+                })
+        elif isinstance(msg, ToolMessage):
+            # Tool output
+            content = msg.content if isinstance(msg.content, str) else str(msg.content)
+            if len(content) > 3000:
+                content = content[:3000] + "\n... [TRUNCATED]"
+            steps.append({
+                "step": len(steps) + 1,
+                "type": "tool_output",
+                "tool_name": msg.name if hasattr(msg, 'name') else "unknown",
+                "tool_call_id": msg.tool_call_id if hasattr(msg, 'tool_call_id') else "",
+                "content": content,
+            })
+    return steps
+# ============================================================================
+# QA RUNNER
+# ============================================================================
+def run_single_query(agent, query_def: dict, output_dir: Path) -> dict:
+    """
+    Run a single QA query and capture everything.
+    Returns: metadata dict
+    """
+    qid = query_def["id"]
+    slug = query_def["slug"]
+    query = query_def["query"]
+    folder = output_dir / f"q{qid:02d}_{slug}"
+    folder.mkdir(parents=True, exist_ok=True)
+    print(f"\n{'='*70}")
+    print(f"  Q{qid:02d}: {query[:70]}...")
+    print(f"{'='*70}")
+    start_time = time.time()
+    try:
+        # Snapshot existing plots BEFORE running so we only copy NEW ones
+        plots_dir = get_plots_dir()
+        existing_plots = set()
+        if plots_dir.exists():
+            existing_plots = {f.name for f in plots_dir.glob("*.png")}
+        # Invoke agent
+        config = {"recursion_limit": 35}
+        messages = [HumanMessage(content=query)]
+        result = agent.invoke({"messages": messages}, config=config)
+        elapsed = time.time() - start_time
+        result_messages = result["messages"]
+        # Extract intermediate steps
+        steps = extract_steps(result_messages)
+        # Get final response
+        final_response = ""
+        for msg in reversed(result_messages):
+            if isinstance(msg, AIMessage) and msg.content and not msg.tool_calls:
+                final_response = msg.content
+                break
+        # Save steps.json
+        steps_path = folder / "steps.json"
+        with open(steps_path, "w") as f:
+            json.dump(steps, f, indent=2, default=str, ensure_ascii=False)
+        # Save final response
+        response_path = folder / "response.md"
+        with open(response_path, "w") as f:
+            f.write(f"# Q{qid:02d}: {slug}\n\n")
+            f.write(f"**Query:** {query}\n\n")
+            f.write(f"**Elapsed:** {elapsed:.1f}s\n\n")
+            f.write("---\n\n")
+            f.write(final_response)
+        # Copy only NEW plots (diff against pre-query snapshot)
+        plot_files = []
+        if plots_dir.exists():
+            for f_path in sorted(plots_dir.glob("*.png")):
+                if f_path.name not in existing_plots:
+                    dest = folder / f_path.name
+                    shutil.copy2(f_path, dest)
+                    plot_files.append(f_path.name)
+                    print(f"   📊 Plot saved: {f_path.name}")
+        # Count tool calls
+        tool_calls = [s for s in steps if s["type"] == "tool_call"]
+        tools_used = list(set(s["tool_name"] for s in tool_calls))
+        # Build metadata
+        metadata = {
+            "query_id": qid,
+            "slug": slug,
+            "query": query,
+            "type": query_def.get("type", "unknown"),
+            "variables": query_def.get("variables", []),
+            "region": query_def.get("region", ""),
+            "timestamp": datetime.now().isoformat(),
+            "elapsed_seconds": round(elapsed, 1),
+            "status": "success",
+            "tools_used": tools_used,
+            "num_tool_calls": len(tool_calls),
+            "num_steps": len(steps),
+            "plot_files": plot_files,
+            "notes": "",
+        }
+        # Save metadata.json
+        meta_path = folder / "metadata.json"
+        with open(meta_path, "w") as f:
+            json.dump(metadata, f, indent=2, ensure_ascii=False)
+        print(f"   ✅ SUCCESS in {elapsed:.1f}s | Tools: {', '.join(tools_used)} | Steps: {len(steps)}")
+        return metadata
+    except Exception as e:
+        elapsed = time.time() - start_time
+        print(f"   ❌ FAILED in {elapsed:.1f}s: {e}")
+        metadata = {
+            "query_id": qid,
+            "slug": slug,
+            "query": query,
+            "type": query_def.get("type", "unknown"),
+            "variables": query_def.get("variables", []),
+            "region": query_def.get("region", ""),
+            "timestamp": datetime.now().isoformat(),
+            "elapsed_seconds": round(elapsed, 1),
+            "status": "error",
+            "error": str(e),
+            "tools_used": [],
+            "num_tool_calls": 0,
+            "num_steps": 0,
+            "plot_files": [],
+            "notes": f"Error: {e}",
+        }
+        meta_path = folder / "metadata.json"
+        with open(meta_path, "w") as f:
+            json.dump(metadata, f, indent=2, ensure_ascii=False)
+        return metadata
+def main():
+    parser = argparse.ArgumentParser(description="Eurus QA Runner")
+    parser.add_argument("--query", type=int, help="Run a single query by ID (1-36)")
+    parser.add_argument("--start", type=int, default=1, help="Start from query ID")
+    parser.add_argument("--end", type=int, default=36, help="End at query ID (inclusive)")
+    parser.add_argument("--output", type=str, default=None, help="Output directory (default: data/qa_results)")
+    parser.add_argument("--skip-existing", action="store_true", help="Skip if folder already has metadata.json")
+    args = parser.parse_args()
+    # Check API key
+    if not os.environ.get("OPENAI_API_KEY"):
+        print("❌ OPENAI_API_KEY not set!")
+        sys.exit(1)
+    if args.output:
+        output_dir = Path(args.output)
+    else:
+        output_dir = PROJECT_ROOT / "data" / "qa_results"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    print(f"""
+╔══════════════════════════════════════════════════════╗
+║          Eurus QA Runner v1.0                       ║
+║          {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}                        ║
+╚══════════════════════════════════════════════════════╝
+Output: {output_dir}
+""")
+    # Build agent once
+    print("🏗️  Building agent...")
+    agent = build_agent()
+    print("✅ Agent ready\n")
+    # Select queries
+    if args.query:
+        queries = [q for q in QA_QUERIES if q["id"] == args.query]
+    else:
+        queries = [q for q in QA_QUERIES if args.start <= q["id"] <= args.end]
+    results = []
+    for q in queries:
+        folder = output_dir / f"q{q['id']:02d}_{q['slug']}"
+        if args.skip_existing and (folder / "metadata.json").exists():
+            print(f"⏭️  Skipping Q{q['id']:02d} (already exists)")
+            continue
+        result = run_single_query(agent, q, output_dir)
+        results.append(result)
+    # Print summary
+    print(f"\n{'='*70}")
+    print("QA SUMMARY")
+    print(f"{'='*70}")
+    success = sum(1 for r in results if r["status"] == "success")
+    failed = sum(1 for r in results if r["status"] == "error")
+    total_time = sum(r["elapsed_seconds"] for r in results)
+    for r in results:
+        status = "✅" if r["status"] == "success" else "❌"
+        print(f"  {status} Q{r['query_id']:02d} ({r['slug']:20s}) | "
+              f"{r['elapsed_seconds']:5.1f}s | Tools: {', '.join(r['tools_used'])}")
+    print(f"\nTotal: {success} passed, {failed} failed, {total_time:.1f}s total")
+    # Save summary
+    summary_path = output_dir / "qa_summary.json"
+    with open(summary_path, "w") as f:
+        json.dump({
+            "timestamp": datetime.now().isoformat(),
+            "total_queries": len(results),
+            "passed": success,
+            "failed": failed,
+            "total_time_seconds": round(total_time, 1),
+            "results": results,
+        }, f, indent=2, ensure_ascii=False)
+    print(f"\nSummary saved to: {summary_path}")
+if __name__ == "__main__":
+    main()

setup_env.sh ADDED Viewed

	@@ -0,0 +1,21 @@

+#!/bin/bash
+echo "Setting up Eurus environment..."
+# Create .env file if it doesn't exist
+if [ ! -f .env ]; then
+    echo "Creating .env file..."
+    cat > .env << EOL
+OPENAI_API_KEY=your_openai_api_key
+ARRAYLAKE_API_KEY=your_arraylake_api_key
+EOL
+    echo ".env file created. Please update it with your API keys."
+else
+    echo ".env file already exists."
+fi
+# Install dependencies
+echo "Installing dependencies..."
+pip install -r requirements.txt
+pip install -e .
+echo "Setup complete!"

src/eurus/__init__.py ADDED Viewed

	@@ -0,0 +1,77 @@

+"""
+Eurus - ERA5 Climate Analysis Agent
+====================================
+A scientific climate analysis platform powered by ERA5 reanalysis data from
+Earthmover's cloud-optimized archive via Icechunk.
+Features:
+- ERA5 reanalysis data retrieval (SST, temperature, wind, pressure, etc.)
+- Interactive Python REPL with pre-loaded scientific libraries
+- Maritime route calculation with weather risk assessment
+- Analysis methodology guides for climate science
+- Intelligent caching with persistent memory
+- Predefined geographic regions (El Niño, Atlantic, Pacific, etc.)
+- Full MCP protocol support for Claude and other AI assistants
+Example usage as MCP server:
+    # In .mcp.json
+    {
+        "mcpServers": {
+            "era5": {
+                "command": "era5-mcp",
+                "env": {"ARRAYLAKE_API_KEY": "your_key"}
+            }
+        }
+    }
+Example usage as Python library:
+    from eurus import retrieve_era5_data, list_available_variables
+    from eurus.tools import get_all_tools
+    # Download SST data
+    result = retrieve_era5_data(
+        query_type="temporal",
+        variable_id="sst",
+        start_date="2024-01-01",
+        end_date="2024-01-07",
+        region="california_coast"
+    )
+    # Get all tools for agent (only core tools, no science clutter)
+    tools = get_all_tools(enable_routing=True)
+"""
+__version__ = "1.1.0"
+__author__ = "Eurus Team"
+from eurus.config import (
+    ERA5_VARIABLES,
+    GEOGRAPHIC_REGIONS,
+    AGENT_SYSTEM_PROMPT,
+    get_variable_info,
+    get_short_name,
+    list_available_variables,
+)
+from eurus.retrieval import retrieve_era5_data
+from eurus.memory import MemoryManager, get_memory
+from eurus.tools import get_all_tools
+__all__ = [
+    # Version
+    "__version__",
+    # Config
+    "ERA5_VARIABLES",
+    "GEOGRAPHIC_REGIONS",
+    "AGENT_SYSTEM_PROMPT",
+    "get_variable_info",
+    "get_short_name",
+    "list_available_variables",
+    # Retrieval
+    "retrieve_era5_data",
+    # Memory
+    "MemoryManager",
+    "get_memory",
+    # Tools
+    "get_all_tools",
+]

src/eurus/config.py ADDED Viewed

	@@ -0,0 +1,751 @@

+"""
+ERA5 MCP Configuration
+======================
+Centralized configuration including ERA5 variable catalog, geographic regions,
+and runtime settings.
+"""
+from __future__ import annotations
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Dict, Optional, List
+from datetime import datetime
+# =============================================================================
+# PATHS
+# =============================================================================
+def get_data_dir() -> Path:
+    """Get the data directory, creating it if necessary."""
+    data_dir = Path(os.environ.get("ERA5_DATA_DIR", Path.cwd() / "data"))
+    data_dir.mkdir(parents=True, exist_ok=True)
+    return data_dir
+def get_plots_dir() -> Path:
+    """Get the plots directory, creating it if necessary."""
+    plots_dir = get_data_dir() / "plots"
+    plots_dir.mkdir(parents=True, exist_ok=True)
+    return plots_dir
+def get_memory_dir() -> Path:
+    """Get the memory directory, creating it if necessary."""
+    memory_dir = Path(os.environ.get("ERA5_MEMORY_DIR", Path.cwd() / ".memory"))
+    memory_dir.mkdir(parents=True, exist_ok=True)
+    return memory_dir
+# =============================================================================
+# ERA5 VARIABLE CATALOG
+# =============================================================================
+@dataclass(frozen=True)
+class ERA5Variable:
+    """Metadata for an ERA5 variable."""
+    short_name: str
+    long_name: str
+    units: str
+    description: str
+    category: str
+    typical_range: tuple[float | None, float | None] = (None, None)
+    colormap: str = "viridis"
+    def __str__(self) -> str:
+        return f"{self.short_name}: {self.long_name} ({self.units})"
+# Comprehensive ERA5 variable mapping — ALL 22 Arraylake variables
+# Source: earthmover-public/era5-surface-aws Icechunk store
+ERA5_VARIABLES: Dict[str, ERA5Variable] = {
+    # ── Ocean ──────────────────────────────────────────────────────────────
+    "sst": ERA5Variable(
+        short_name="sst",
+        long_name="Sea Surface Temperature",
+        units="K",
+        description="Temperature of sea water near the surface",
+        category="ocean",
+        typical_range=(270, 310),
+        colormap="RdYlBu_r"
+    ),
+    # ── Temperature ────────────────────────────────────────────────────────
+    "t2": ERA5Variable(
+        short_name="t2",
+        long_name="2m Temperature",
+        units="K",
+        description="Air temperature at 2 meters above the surface",
+        category="atmosphere",
+        typical_range=(220, 330),
+        colormap="RdYlBu_r"
+    ),
+    "d2": ERA5Variable(
+        short_name="d2",
+        long_name="2m Dewpoint Temperature",
+        units="K",
+        description="Temperature to which air at 2m must cool to reach saturation; indicates humidity",
+        category="atmosphere",
+        typical_range=(220, 310),
+        colormap="RdYlBu_r"
+    ),
+    "skt": ERA5Variable(
+        short_name="skt",
+        long_name="Skin Temperature",
+        units="K",
+        description="Temperature of the Earth's uppermost surface layer (land, ocean, or ice)",
+        category="surface",
+        typical_range=(220, 340),
+        colormap="RdYlBu_r"
+    ),
+    # ── Wind 10 m ──────────────────────────────────────────────────────────
+    "u10": ERA5Variable(
+        short_name="u10",
+        long_name="10m U-Wind Component",
+        units="m/s",
+        description="Eastward component of wind at 10 meters above surface",
+        category="atmosphere",
+        typical_range=(-30, 30),
+        colormap="RdBu_r"
+    ),
+    "v10": ERA5Variable(
+        short_name="v10",
+        long_name="10m V-Wind Component",
+        units="m/s",
+        description="Northward component of wind at 10 meters above surface",
+        category="atmosphere",
+        typical_range=(-30, 30),
+        colormap="RdBu_r"
+    ),
+    # ── Wind 100 m (hub-height for wind energy) ───────────────────────────
+    "u100": ERA5Variable(
+        short_name="u100",
+        long_name="100m U-Wind Component",
+        units="m/s",
+        description="Eastward component of wind at 100 meters above surface (wind-turbine hub height)",
+        category="atmosphere",
+        typical_range=(-40, 40),
+        colormap="RdBu_r"
+    ),
+    "v100": ERA5Variable(
+        short_name="v100",
+        long_name="100m V-Wind Component",
+        units="m/s",
+        description="Northward component of wind at 100 meters above surface (wind-turbine hub height)",
+        category="atmosphere",
+        typical_range=(-40, 40),
+        colormap="RdBu_r"
+    ),
+    # ── Pressure ───────────────────────────────────────────────────────────
+    "sp": ERA5Variable(
+        short_name="sp",
+        long_name="Surface Pressure",
+        units="Pa",
+        description="Pressure at the Earth's surface",
+        category="atmosphere",
+        typical_range=(85000, 108000),
+        colormap="viridis"
+    ),
+    "mslp": ERA5Variable(
+        short_name="mslp",
+        long_name="Mean Sea Level Pressure",
+        units="Pa",
+        description="Atmospheric pressure reduced to mean sea level",
+        category="atmosphere",
+        typical_range=(96000, 105000),
+        colormap="viridis"
+    ),
+    # ── Boundary Layer ─────────────────────────────────────────────────────
+    "blh": ERA5Variable(
+        short_name="blh",
+        long_name="Boundary Layer Height",
+        units="m",
+        description="Height of the planetary boundary layer above ground",
+        category="atmosphere",
+        typical_range=(50, 3000),
+        colormap="viridis"
+    ),
+    "cape": ERA5Variable(
+        short_name="cape",
+        long_name="Convective Available Potential Energy",
+        units="J/kg",
+        description="Instability indicator for convection/thunderstorm potential",
+        category="atmosphere",
+        typical_range=(0, 5000),
+        colormap="YlOrRd"
+    ),
+    # ── Cloud & Precipitation ──────────────────────────────────────────────
+    "tcc": ERA5Variable(
+        short_name="tcc",
+        long_name="Total Cloud Cover",
+        units="fraction (0-1)",
+        description="Fraction of sky covered by clouds",
+        category="atmosphere",
+        typical_range=(0, 1),
+        colormap="gray_r"
+    ),
+    "cp": ERA5Variable(
+        short_name="cp",
+        long_name="Convective Precipitation",
+        units="m",
+        description="Accumulated precipitation from convective processes",
+        category="precipitation",
+        typical_range=(0, 0.1),
+        colormap="Blues"
+    ),
+    "lsp": ERA5Variable(
+        short_name="lsp",
+        long_name="Large-scale Precipitation",
+        units="m",
+        description="Accumulated precipitation from large-scale weather systems",
+        category="precipitation",
+        typical_range=(0, 0.1),
+        colormap="Blues"
+    ),
+    "tp": ERA5Variable(
+        short_name="tp",
+        long_name="Total Precipitation",
+        units="m",
+        description="Total accumulated precipitation (convective + large-scale)",
+        category="precipitation",
+        typical_range=(0, 0.2),
+        colormap="Blues"
+    ),
+    # ── Radiation ──────────────────────────────────────────────────────────
+    "ssr": ERA5Variable(
+        short_name="ssr",
+        long_name="Surface Net Solar Radiation",
+        units="J/m²",
+        description="Net balance of downward minus reflected shortwave radiation at the surface",
+        category="radiation",
+        typical_range=(0, 3e7),
+        colormap="YlOrRd"
+    ),
+    "ssrd": ERA5Variable(
+        short_name="ssrd",
+        long_name="Surface Solar Radiation Downwards",
+        units="J/m²",
+        description="Total incoming shortwave (solar) radiation reaching the surface (direct + diffuse)",
+        category="radiation",
+        typical_range=(0, 3.5e7),
+        colormap="YlOrRd"
+    ),
+    # ── Moisture Columns ───────────────────────────────────────────────────
+    "tcw": ERA5Variable(
+        short_name="tcw",
+        long_name="Total Column Water",
+        units="kg/m²",
+        description="Total water (vapour + liquid + ice) in the atmospheric column",
+        category="atmosphere",
+        typical_range=(0, 80),
+        colormap="Blues"
+    ),
+    "tcwv": ERA5Variable(
+        short_name="tcwv",
+        long_name="Total Column Water Vapour",
+        units="kg/m²",
+        description="Total water vapour in the atmospheric column (precipitable water)",
+        category="atmosphere",
+        typical_range=(0, 70),
+        colormap="Blues"
+    ),
+    # ── Land Surface ───────────────────────────────────────────────────────
+    "sd": ERA5Variable(
+        short_name="sd",
+        long_name="Snow Depth",
+        units="m water equiv.",
+        description="Depth of snow expressed as meters of water equivalent",
+        category="land_surface",
+        typical_range=(0, 2),
+        colormap="Blues"
+    ),
+    "stl1": ERA5Variable(
+        short_name="stl1",
+        long_name="Soil Temperature Level 1",
+        units="K",
+        description="Temperature of the topmost soil layer (0-7 cm depth)",
+        category="land_surface",
+        typical_range=(220, 330),
+        colormap="RdYlBu_r"
+    ),
+    "swvl1": ERA5Variable(
+        short_name="swvl1",
+        long_name="Volumetric Soil Water Layer 1",
+        units="m³/m³",
+        description="Volume fraction of water in the topmost soil layer (0-7 cm depth)",
+        category="land_surface",
+        typical_range=(0, 0.5),
+        colormap="YlGnBu"
+    ),
+}
+# Aliases for long variable names → short names
+VARIABLE_ALIASES: Dict[str, str] = {
+    # Ocean
+    "sea_surface_temperature": "sst",
+    # Temperature
+    "2m_temperature": "t2",
+    "temperature": "t2",
+    "2m_dewpoint_temperature": "d2",
+    "dewpoint_temperature": "d2",
+    "dewpoint": "d2",
+    "skin_temperature": "skt",
+    # Wind 10m
+    "10m_u_component_of_wind": "u10",
+    "10m_v_component_of_wind": "v10",
+    # Wind 100m
+    "100m_u_component_of_wind": "u100",
+    "100m_v_component_of_wind": "v100",
+    # Pressure
+    "surface_pressure": "sp",
+    "mean_sea_level_pressure": "mslp",
+    # Boundary layer
+    "boundary_layer_height": "blh",
+    "convective_available_potential_energy": "cape",
+    # Cloud & precipitation
+    "total_cloud_cover": "tcc",
+    "convective_precipitation": "cp",
+    "large_scale_precipitation": "lsp",
+    "total_precipitation": "tp",
+    # Radiation
+    "surface_net_solar_radiation": "ssr",
+    "surface_solar_radiation_downwards": "ssrd",
+    # Moisture columns
+    "total_column_water": "tcw",
+    "total_column_water_vapour": "tcwv",
+    # Land surface
+    "snow_depth": "sd",
+    "soil_temperature": "stl1",
+    "soil_temperature_level_1": "stl1",
+    "soil_moisture": "swvl1",
+    "volumetric_soil_water_layer_1": "swvl1",
+}
+def get_variable_info(variable_id: str) -> Optional[ERA5Variable]:
+    """Get variable metadata by ID (case-insensitive, supports aliases)."""
+    key = variable_id.lower()
+    # Check aliases first
+    if key in VARIABLE_ALIASES:
+        key = VARIABLE_ALIASES[key]
+    return ERA5_VARIABLES.get(key)
+def get_short_name(variable_id: str) -> str:
+    """Get the short name for a variable (for dataset access)."""
+    key = variable_id.lower()
+    # Check aliases first
+    if key in VARIABLE_ALIASES:
+        return VARIABLE_ALIASES[key]
+    var_info = ERA5_VARIABLES.get(key)
+    if var_info:
+        return var_info.short_name
+    return key
+def list_available_variables() -> str:
+    """Return a formatted list of available variables."""
+    seen: set[str] = set()
+    lines = ["Available ERA5 Variables:", "=" * 50]
+    for var_id, var_info in ERA5_VARIABLES.items():
+        if var_info.short_name not in seen:
+            seen.add(var_info.short_name)
+            lines.append(
+                f"  {var_info.short_name:8} | {var_info.long_name:30} | {var_info.units}"
+            )
+    return "\n".join(lines)
+def get_all_short_names() -> list[str]:
+    """Get list of all unique short variable names."""
+    return list({v.short_name for v in ERA5_VARIABLES.values()})
+# =============================================================================
+# GEOGRAPHIC REGIONS (Common oceanographic areas)
+# =============================================================================
+@dataclass(frozen=True)
+class GeographicRegion:
+    """A predefined geographic region."""
+    name: str
+    min_lat: float
+    max_lat: float
+    min_lon: float
+    max_lon: float
+    description: str = ""
+    def to_dict(self) -> dict:
+        return {
+            "min_lat": self.min_lat,
+            "max_lat": self.max_lat,
+            "min_lon": self.min_lon,
+            "max_lon": self.max_lon,
+        }
+GEOGRAPHIC_REGIONS: Dict[str, GeographicRegion] = {
+    "global": GeographicRegion(
+        "global", -90, 90, 0, 359.75,
+        "Entire globe"
+    ),
+    "north_atlantic": GeographicRegion(
+        "north_atlantic", 0, 65, 280, 360,
+        "North Atlantic Ocean"
+    ),
+    "south_atlantic": GeographicRegion(
+        "south_atlantic", -60, 0, 280, 20,
+        "South Atlantic Ocean"
+    ),
+    "north_pacific": GeographicRegion(
+        "north_pacific", 0, 65, 100, 260,
+        "North Pacific Ocean"
+    ),
+    "south_pacific": GeographicRegion(
+        "south_pacific", -60, 0, 150, 290,
+        "South Pacific Ocean"
+    ),
+    "indian_ocean": GeographicRegion(
+        "indian_ocean", -60, 30, 20, 120,
+        "Indian Ocean"
+    ),
+    "arctic": GeographicRegion(
+        "arctic", 65, 90, 0, 359.75,
+        "Arctic Ocean and surrounding areas"
+    ),
+    "antarctic": GeographicRegion(
+        "antarctic", -90, -60, 0, 359.75,
+        "Antarctic and Southern Ocean"
+    ),
+    "mediterranean": GeographicRegion(
+        "mediterranean", 30, 46, 354, 42,
+        "Mediterranean Sea"
+    ),
+    "gulf_of_mexico": GeographicRegion(
+        "gulf_of_mexico", 18, 31, 262, 282,
+        "Gulf of Mexico"
+    ),
+    "caribbean": GeographicRegion(
+        "caribbean", 8, 28, 255, 295,
+        "Caribbean Sea"
+    ),
+    "california_coast": GeographicRegion(
+        "california_coast", 32, 42, 235, 250,
+        "California coastal waters"
+    ),
+    "east_coast_us": GeographicRegion(
+        "east_coast_us", 25, 45, 280, 295,
+        "US East Coast"
+    ),
+    "europe": GeographicRegion(
+        "europe", 35, 72, 350, 40,
+        "Europe"
+    ),
+    "asia_east": GeographicRegion(
+        "asia_east", 15, 55, 100, 145,
+        "East Asia"
+    ),
+    "australia": GeographicRegion(
+        "australia", -45, -10, 110, 155,
+        "Australia and surrounding waters"
+    ),
+    # El Niño regions
+    "nino34": GeographicRegion(
+        "nino34", -5, 5, 190, 240,
+        "El Niño 3.4 region (central Pacific)"
+    ),
+    "nino3": GeographicRegion(
+        "nino3", -5, 5, 210, 270,
+        "El Niño 3 region (eastern Pacific)"
+    ),
+    "nino4": GeographicRegion(
+        "nino4", -5, 5, 160, 210,
+        "El Niño 4 region (western Pacific)"
+    ),
+    "nino12": GeographicRegion(
+        "nino12", -10, 0, 270, 280,
+        "El Niño 1+2 region (far eastern Pacific)"
+    ),
+}
+def get_region(name: str) -> Optional[GeographicRegion]:
+    """Get a geographic region by name (case-insensitive)."""
+    return GEOGRAPHIC_REGIONS.get(name.lower())
+def list_regions() -> str:
+    """Return a formatted list of available regions."""
+    lines = ["Available Geographic Regions:", "=" * 70]
+    for name, region in GEOGRAPHIC_REGIONS.items():
+        lines.append(
+            f"  {name:20} | lat: [{region.min_lat:6.1f}, {region.max_lat:6.1f}] "
+            f"| lon: [{region.min_lon:6.1f}, {region.max_lon:6.1f}]"
+        )
+    return "\n".join(lines)
+# =============================================================================
+# AGENT CONFIGURATION
+# =============================================================================
+@dataclass
+class AgentConfig:
+    """Configuration for the ERA5 Agent."""
+    # LLM Settings
+    model_name: str = "gpt-5.2"
+    temperature: float = 0
+    max_tokens: int = 4096
+    # Data Settings
+    data_source: str = "earthmover-public/era5-surface-aws"
+    default_query_type: str = "temporal"
+    max_download_size_gb: float = 2.0
+    # Retrieval Settings
+    max_retries: int = 5
+    retry_delay: float = 2.0
+    # Memory Settings
+    enable_memory: bool = True
+    max_conversation_history: int = 100
+    memory_file: str = "conversation_history.json"
+    # Visualization Settings
+    default_figure_size: tuple = (12, 8)
+    default_dpi: int = 150
+    save_plots: bool = True
+    plot_format: str = "png"
+    # Kernel Settings
+    kernel_timeout: float = 300.0
+    auto_import_packages: List[str] = field(default_factory=lambda: [
+        "pandas", "numpy", "xarray",
+        "matplotlib", "matplotlib.pyplot", "datetime"
+    ])
+    # Logging
+    log_level: str = "INFO"
+    log_to_file: bool = True
+    log_file: str = "era5_agent.log"
+# Global config instance
+CONFIG = AgentConfig()
+# Convenience path variables (for backward compatibility)
+DATA_DIR = get_data_dir()
+PLOTS_DIR = get_plots_dir()
+# =============================================================================
+# SYSTEM PROMPTS
+# =============================================================================
+AGENT_SYSTEM_PROMPT = """You are Eurus, an AI Climate Physicist conducting research for high-impact scientific publications.
+## ⚠️ CRITICAL: RESPECT USER INTENT FIRST
+**Your PRIMARY directive is to do EXACTLY what the user asks.**
+### TOOL USAGE RULES:
+1. **`python_repl`**: Use for:
+   - Custom analysis (anomalies, trends, statistics)
+   - Visualization with matplotlib
+   - Any computation not directly provided by other tools
+2. **`retrieve_era5_data`**: Use for downloading climate data
+3. **`calculate_maritime_route`**: Use for ship routing
+4. **`get_analysis_guide`/`get_visualization_guide`**: Use for methodology help
+### EXAMPLES:
+- "Get temperature for Berlin and plot it" → Retrieve data, plot RAW temperature time series
+- "Show temperature anomalies for Berlin" → Retrieve data, use python_repl to compute anomalies
+- "Analyze temperature trends" → Retrieve data, use python_repl for trend calculation
+- "Why was 2023 so hot?" → Retrieve data, analyze with python_repl
+## YOUR CAPABILITIES
+### 1. DATA RETRIEVAL: `retrieve_era5_data`
+Downloads ERA5 reanalysis data from Earthmover's cloud-optimized archive.
+**⚠️ STRICT QUERY TYPE RULE (WRONG = 10-100x SLOWER!):**
+┌─────────────────────────────────────────────────────────────────┐
+│ TEMPORAL: (time > 1 day) AND (area < 30°×30°)                   │
+│ SPATIAL:  (time ≤ 1 day) OR  (area ≥ 30°×30°)                   │
+└─────────────────────────────────────────────────────────────────┘
+**COORDINATES - USE ROUTE BOUNDING BOX:**
+- Latitude: -90 to 90
+- Longitude: Use values from route tool's bounding box DIRECTLY!
+  - For Europe/Atlantic: Use -10 to 15 (NOT 0 to 360!)
+  - For Pacific crossing dateline: Use 0-360 system
+**⚠️ CRITICAL:** When `calculate_maritime_route` returns a bounding box,
+USE THOSE EXACT VALUES for min/max longitude. Do NOT convert to 0-360!
+**DATA AVAILABILITY:** 1975 to present (updated regularly)
+**Available Variables (22 total):**
+| Variable | Description | Units | Category |
+|----------|-------------|-------|----------|
+| sst | Sea Surface Temperature | K | Ocean |
+| t2 | 2m Air Temperature | K | Temperature |
+| d2 | 2m Dewpoint Temperature | K | Temperature |
+| skt | Skin Temperature | K | Surface |
+| u10 | 10m U-Wind (Eastward) | m/s | Wind |
+| v10 | 10m V-Wind (Northward) | m/s | Wind |
+| u100 | 100m U-Wind (Eastward) | m/s | Wind |
+| v100 | 100m V-Wind (Northward) | m/s | Wind |
+| sp | Surface Pressure | Pa | Pressure |
+| mslp | Mean Sea Level Pressure | Pa | Pressure |
+| blh | Boundary Layer Height | m | Atmosphere |
+| cape | Convective Available Potential Energy | J/kg | Atmosphere |
+| tcc | Total Cloud Cover | 0-1 | Cloud |
+| cp | Convective Precipitation | m | Precipitation |
+| lsp | Large-scale Precipitation | m | Precipitation |
+| tp | Total Precipitation | m | Precipitation |
+| ssr | Surface Net Solar Radiation | J/m² | Radiation |
+| ssrd | Surface Solar Radiation Downwards | J/m² | Radiation |
+| tcw | Total Column Water | kg/m² | Moisture |
+| tcwv | Total Column Water Vapour | kg/m² | Moisture |
+| sd | Snow Depth | m water eq. | Land |
+| stl1 | Soil Temperature Level 1 | K | Land |
+| swvl1 | Volumetric Soil Water Layer 1 | m³/m³ | Land |
+### 2. CUSTOM ANALYSIS: `python_repl`
+Persistent Python kernel for custom analysis and visualization.
+**Pre-loaded:** pandas (pd), numpy (np), xarray (xr), matplotlib.pyplot (plt)
+#### What you can do with python_repl:
+- **Anomalies**: `anomaly = data - data.mean('time')`
+- **Z-Scores**: `z = (data - clim.mean('time')) / clim.std('time')`
+- **Trends**: Use `scipy.stats.linregress` or numpy polyfit
+- **Extremes**: Filter data where values exceed thresholds
+- **Visualizations**: Any matplotlib plot saved to PLOTS_DIR
+### 4. MEMORY
+Remembers conversation history and previous analyses.
+### 5. MARITIME LOGISTICS: `calculate_maritime_route` (Captain Mode)
+Plans shipping routes and assesses climatological hazards.
+**WORKFLOW (Mandatory Protocol):**
+1. **ROUTE**: Call `calculate_maritime_route(origin_lat, origin_lon, dest_lat, dest_lon, month)`
+   - Returns waypoints avoiding land via global shipping lane graph
+   - Returns bounding box for data download
+   - Returns STEP-BY-STEP INSTRUCTIONS
+2. **DATA**: Download ERA5 climatology for the route region
+   - Variables: `u10`, `v10` (10m wind components) → compute wind speed
+   - NOTE: `swh` (wave height) is NOT available in this dataset!
+   - Period: Target month over LAST 3 YEARS (e.g., July 2021-2023)
+   - Why 3 years? To compute climatological statistics, not just a forecast
+3. **METHODOLOGY**: Call `get_visualization_guide(viz_type='maritime_risk_assessment')`
+   - Returns mathematical formulas for Lagrangian risk analysis
+   - Defines hazard thresholds (e.g., wind speed > 15 m/s = DANGER)
+   - Explains how to compute route risk score
+4. **ANALYSIS**: Execute in `python_repl` following the methodology:
+   - Extract data at each waypoint (nearest neighbor)
+   - Compute wind speed: `wspd = sqrt(u10² + v10²)`
+   - Compute max/mean/p95 statistics
+   - Identify danger zones (wind > threshold)
+   - Calculate route-level risk score
+5. **DECISION**:
+   - If danger zones found → Recommend route deviation
+   - If route safe → Confirm with confidence level
+**Key Formulas (from methodology):**
+- Wind speed: `wspd = sqrt(u10² + v10²)`
+- Exceedance probability: `P = count(wspd > threshold) / N_total`
+- Route risk: `max(wspd_i)` for all waypoints i
+## SCIENTIFIC PROTOCOL (For Publication-Grade Analysis)
+When the user requests scientific analysis:
+1. **ANOMALY ANALYSIS**: Report:
+   - Anomalies: "2.5°C above normal"
+   - Z-Scores: "+2.5σ (statistically significant)"
+   - Use `python_repl` to compute anomalies from downloaded data
+2. **MECHANISM**: Explain WHY:
+   - Use `python_repl` to look for patterns in the data
+   - Consider atmospheric blocking, ENSO teleconnections, etc.
+3. **COMPOUND EVENTS**: Look for dangerous combinations with python_repl:
+   - High heat + Low wind = "Ocean Oven"
+   - Filter data where multiple thresholds are exceeded
+4. **STATISTICAL RIGOR**: Always test significance:
+   - Use Z > 2σ for "extreme"
+   - Use p < 0.05 for trends
+   - Report confidence intervals when possible
+## VISUALIZATION STANDARDS
+**Publication-grade light-theme rcParams are pre-set** — figures get white background,
+black text, grid, 300 DPI on save, and a high-contrast color cycle. Do NOT override unless necessary.
+### Mandatory Rules
+1. **DPI**: Saved at 300 (print-quality) — do not lower it
+2. **Figure size**: Default 10×6 for time series, use `figsize=(12, 8)` for map plots
+3. **Unit conversions in labels**:
+   - Temperature → always show °C (`- 273.15`)
+   - Pressure → show hPa (`/ 100`)
+   - Precipitation → show mm (`* 1000`)
+4. **Colormaps**:
+   - SST/Temperature: `'RdYlBu_r'` or `'coolwarm'`
+   - Wind speed:        `'YlOrRd'`
+   - Anomalies:         `'RdBu_r'` (diverging, centered at zero via `TwoSlopeNorm`)
+   - Precipitation:     `'YlGnBu'`
+   - Cloud cover:       `'Greys'`
+   - **NEVER** use `'jet'`
+5. **Colorbar**: Always include `label=` with units:
+   ```python
+   cbar = plt.colorbar(mesh, label='SST (°C)', shrink=0.8)
+   ```
+6. **Maritime maps**: Call `get_analysis_guide(topic='maritime_visualization')` for the full template
+### Available in REPL Namespace
+`pd, np, xr, plt, mcolors, cm, datetime, timedelta, PLOTS_DIR`
+## RESPONSE STYLE
+- Be precise and scientific
+- Follow user intent exactly
+- Include statistical significance when doing scientific analysis
+- Reference specific dates/locations
+- Acknowledge limitations and uncertainty
+- **NEVER list file paths** of saved plots in your response — plots are displayed automatically in the UI
+- Do NOT say "you can view it here" or similar — the user already sees the plot inline
+"""
+# =============================================================================
+# UTILITY FUNCTIONS
+# =============================================================================
+def format_file_size(size_bytes: int) -> str:
+    """Format file size in human-readable format."""
+    for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
+        if size_bytes < 1024:
+            return f"{size_bytes:.2f} {unit}"
+        size_bytes /= 1024
+    return f"{size_bytes:.2f} PB"
+def get_timestamp() -> str:
+    """Get current timestamp string."""
+    return datetime.now().strftime("%Y-%m-%d %H:%M:%S")

src/eurus/logging_config.py ADDED Viewed

	@@ -0,0 +1,100 @@

+"""
+Eurus Logging Configuration
+============================
+Centralized logging setup for both web and CLI modes.
+Logs are saved to PROJECT_ROOT/logs/ with timestamps.
+"""
+import os
+import sys
+import logging
+from pathlib import Path
+from datetime import datetime
+# Project root
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+# Logs directory
+LOGS_DIR = PROJECT_ROOT / "logs"
+LOGS_DIR.mkdir(exist_ok=True)
+def setup_logging(mode: str = "web", level: int = logging.DEBUG) -> logging.Logger:
+    """
+    Configure logging for Eurus.
+    Args:
+        mode: 'web' or 'cli' - determines log file prefix
+        level: logging level (default: DEBUG for full logs)
+    Returns:
+        Root logger configured with file and console handlers
+    """
+    # Create timestamped log filename
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    log_file = LOGS_DIR / f"eurus_{mode}_{timestamp}.log"
+    # Create formatters
+    detailed_formatter = logging.Formatter(
+        fmt="%(asctime)s | %(levelname)-8s | %(name)-30s | %(funcName)-20s | %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S"
+    )
+    console_formatter = logging.Formatter(
+        fmt="%(asctime)s | %(levelname)-5s | %(name)s | %(message)s",
+        datefmt="%H:%M:%S"
+    )
+    # Get root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(level)
+    # Clear existing handlers
+    root_logger.handlers.clear()
+    # File handler - FULL DEBUG logs
+    file_handler = logging.FileHandler(log_file, encoding='utf-8')
+    file_handler.setLevel(logging.DEBUG)
+    file_handler.setFormatter(detailed_formatter)
+    root_logger.addHandler(file_handler)
+    # Console handler - respects ERA5_LOG_LEVEL env var (default: INFO)
+    console_level_name = os.environ.get("ERA5_LOG_LEVEL", "INFO").upper()
+    console_level = getattr(logging, console_level_name, logging.INFO)
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(console_level)
+    console_handler.setFormatter(console_formatter)
+    root_logger.addHandler(console_handler)
+    # Log startup info
+    logger = logging.getLogger("eurus.logging")
+    logger.info(f"=" * 80)
+    logger.info(f"EURUS {mode.upper()} STARTING")
+    logger.info(f"Log file: {log_file}")
+    logger.info(f"=" * 80)
+    # Reduce noise from external libraries
+    logging.getLogger("httpx").setLevel(logging.WARNING)
+    logging.getLogger("httpcore").setLevel(logging.WARNING)
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+    logging.getLogger("asyncio").setLevel(logging.WARNING)
+    logging.getLogger("uvicorn.access").setLevel(logging.INFO)
+    return root_logger
+def get_logger(name: str) -> logging.Logger:
+    """Get a logger with the given name."""
+    return logging.getLogger(name)
+# Cleanup old logs (keep last 20)
+def cleanup_old_logs(keep: int = 20):
+    """Remove old log files, keeping the most recent ones."""
+    try:
+        log_files = sorted(LOGS_DIR.glob("eurus_*.log"), key=os.path.getmtime)
+        if len(log_files) > keep:
+            for old_file in log_files[:-keep]:
+                old_file.unlink()
+    except Exception:
+        pass  # Don't fail on cleanup

src/eurus/memory.py ADDED Viewed

	@@ -0,0 +1,508 @@

+"""
+ERA5 MCP Memory System
+======================
+Session-based memory with smart compression for conversation history.
+Dataset cache persists across sessions, but conversations are fresh each session.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import tiktoken
+from dataclasses import asdict, dataclass, field
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from eurus.config import get_memory_dir, CONFIG
+logger = logging.getLogger(__name__)
+# ============================================================================
+# CONFIGURATION
+# ============================================================================
+# Token limits for smart memory management
+MAX_CONTEXT_TOKENS = 8000  # Max tokens to keep in active memory
+COMPRESSION_THRESHOLD = 6000  # Start compressing when we hit this
+SUMMARY_TARGET_TOKENS = 500  # Target tokens for compressed summary
+# ============================================================================
+# DATA STRUCTURES
+# ============================================================================
+@dataclass
+class DatasetRecord:
+    """Record of a downloaded dataset."""
+    path: str
+    variable: str
+    query_type: str
+    start_date: str
+    end_date: str
+    lat_bounds: tuple[float, float]
+    lon_bounds: tuple[float, float]
+    file_size_bytes: int
+    download_timestamp: str
+    shape: Optional[tuple[int, ...]] = None
+    def to_dict(self) -> dict:
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, data: dict) -> "DatasetRecord":
+        if isinstance(data.get("lat_bounds"), list):
+            data["lat_bounds"] = tuple(data["lat_bounds"])
+        if isinstance(data.get("lon_bounds"), list):
+            data["lon_bounds"] = tuple(data["lon_bounds"])
+        if isinstance(data.get("shape"), list):
+            data["shape"] = tuple(data["shape"])
+        return cls(**data)
+@dataclass
+class Message:
+    """A conversation message."""
+    role: str
+    content: str
+    timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
+    is_compressed: bool = False  # Flag for compressed summary messages
+    def to_dict(self) -> dict:
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, data: dict) -> "Message":
+        valid_keys = {'role', 'content', 'timestamp', 'is_compressed'}
+        filtered = {k: v for k, v in data.items() if k in valid_keys}
+        return cls(**filtered)
+    def to_langchain(self) -> dict:
+        """Convert to LangChain message format."""
+        return {"role": self.role, "content": self.content}
+@dataclass
+class AnalysisRecord:
+    """Record of an analysis performed."""
+    description: str
+    code: str
+    output: str
+    timestamp: str
+    datasets_used: List[str] = field(default_factory=list)
+    plots_generated: List[str] = field(default_factory=list)
+    def to_dict(self) -> dict:
+        return asdict(self)
+    @classmethod
+    def from_dict(cls, data: dict) -> "AnalysisRecord":
+        return cls(**data)
+# ============================================================================
+# TOKEN COUNTER
+# ============================================================================
+class TokenCounter:
+    """Efficient token counting using tiktoken."""
+    _encoder = None
+    @classmethod
+    def get_encoder(cls):
+        if cls._encoder is None:
+            try:
+                cls._encoder = tiktoken.encoding_for_model("gpt-4")
+            except Exception:
+                cls._encoder = tiktoken.get_encoding("cl100k_base")
+        return cls._encoder
+    @classmethod
+    def count(cls, text: str) -> int:
+        """Count tokens in text."""
+        try:
+            return len(cls.get_encoder().encode(text))
+        except Exception:
+            # Fallback: rough estimate
+            return len(text) // 4
+# ============================================================================
+# SMART CONVERSATION MEMORY
+# ============================================================================
+class SmartConversationMemory:
+    """
+    Session-based conversation memory with smart compression.
+    Features:
+    - Fresh start each session (no persistent history)
+    - Automatic compression when context gets too long
+    - Preserves recent messages in full, compresses older ones
+    - Token-aware memory management
+    """
+    def __init__(self):
+        self.messages: List[Message] = []
+        self.compressed_summary: Optional[str] = None
+        self._token_count = 0
+        logger.info("SmartConversationMemory initialized (fresh session)")
+    def add_message(self, role: str, content: str) -> Message:
+        """Add a message and check if compression is needed."""
+        msg = Message(role=role, content=content)
+        self.messages.append(msg)
+        # Update token count
+        self._token_count += TokenCounter.count(content)
+        # Check if we need to compress
+        if self._token_count > COMPRESSION_THRESHOLD:
+            self._compress_history()
+        return msg
+    def _compress_history(self) -> None:
+        """Compress older messages into a summary."""
+        if len(self.messages) < 6:
+            return  # Not enough messages to compress
+        # Keep the last 4 messages in full
+        keep_count = 4
+        to_compress = self.messages[:-keep_count]
+        to_keep = self.messages[-keep_count:]
+        if not to_compress:
+            return
+        # Create a concise summary of compressed messages
+        summary_parts = []
+        for msg in to_compress:
+            role = msg.role.upper()
+            # Truncate long content for summary
+            content = msg.content[:200] + "..." if len(msg.content) > 200 else msg.content
+            summary_parts.append(f"[{role}]: {content}")
+        summary = "[Previous conversation summary]\n" + "\n".join(summary_parts)
+        # Truncate summary to target token size
+        while TokenCounter.count(summary) > SUMMARY_TARGET_TOKENS and summary:
+            # Trim from the oldest messages in the summary
+            lines = summary.split('\n')
+            if len(lines) <= 2:
+                break
+            summary = lines[0] + '\n' + '\n'.join(lines[2:])
+        summary_msg = Message(
+            role="system",
+            content=summary,
+            is_compressed=True
+        )
+        self.messages = [summary_msg] + to_keep
+        # Recalculate token count
+        self._token_count = sum(
+            TokenCounter.count(m.content) for m in self.messages
+        )
+        logger.info(f"Compressed {len(to_compress)} messages. Current tokens: {self._token_count}")
+    def get_messages(self, n_messages: Optional[int] = None) -> List[Message]:
+        """Get conversation messages."""
+        if n_messages is None:
+            return list(self.messages)
+        return list(self.messages)[-n_messages:]
+    def get_langchain_messages(self, n_messages: Optional[int] = None) -> List[dict]:
+        """Get messages in LangChain format."""
+        messages = self.get_messages(n_messages)
+        return [m.to_langchain() for m in messages]
+    def clear(self) -> None:
+        """Clear all messages."""
+        self.messages.clear()
+        self.compressed_summary = None
+        self._token_count = 0
+        logger.info("Conversation memory cleared")
+    def get_token_count(self) -> int:
+        """Get current token count."""
+        return self._token_count
+# ============================================================================
+# MEMORY MANAGER
+# ============================================================================
+class MemoryManager:
+    """
+    Manages memory for ERA5 MCP.
+    Features:
+    - Dataset cache registry (persists across sessions)
+    - Session-based conversation history (fresh each restart)
+    - Smart compression for long conversations
+    - NO persistent conversation history to avoid stale context
+    """
+    def __init__(self, memory_dir: Optional[Path] = None, persist_conversations: bool = False):
+        self.memory_dir = memory_dir or get_memory_dir()
+        self.memory_dir.mkdir(parents=True, exist_ok=True)
+        self.persist_conversations = persist_conversations
+        # File paths (only datasets persist)
+        self.datasets_file = self.memory_dir / "datasets.json"
+        self.analyses_file = self.memory_dir / "analyses.json"
+        # In-memory storage
+        self.datasets: Dict[str, DatasetRecord] = {}
+        self.analyses: List[AnalysisRecord] = []
+        # Session-based conversation memory (FRESH each time!)
+        self.conversation_memory = SmartConversationMemory()
+        # Load persistent data (only datasets)
+        self._load_datasets()
+        self._load_analyses()
+        logger.info(
+            f"MemoryManager initialized: {len(self.datasets)} datasets, "
+            f"FRESH conversation (session-based)"
+        )
+    # ========================================================================
+    # PERSISTENCE (Datasets only)
+    # ========================================================================
+    def _load_datasets(self) -> None:
+        """Load dataset registry from disk."""
+        if self.datasets_file.exists():
+            try:
+                with open(self.datasets_file, "r") as f:
+                    data = json.load(f)
+                    for path, record_data in data.items():
+                        self.datasets[path] = DatasetRecord.from_dict(record_data)
+            except Exception as e:
+                logger.warning(f"Failed to load datasets: {e}")
+    def _save_datasets(self) -> None:
+        """Save dataset registry to disk."""
+        try:
+            with open(self.datasets_file, "w") as f:
+                json.dump({p: r.to_dict() for p, r in self.datasets.items()}, f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save datasets: {e}")
+    def _load_analyses(self) -> None:
+        """Load analysis history from disk."""
+        if self.analyses_file.exists():
+            try:
+                with open(self.analyses_file, "r") as f:
+                    data = json.load(f)
+                    self.analyses = [AnalysisRecord.from_dict(r) for r in data[-20:]]  # Keep last 20
+            except Exception as e:
+                logger.warning(f"Failed to load analyses: {e}")
+    def _save_analyses(self) -> None:
+        """Save analysis history to disk."""
+        try:
+            with open(self.analyses_file, "w") as f:
+                json.dump([a.to_dict() for a in self.analyses[-20:]], f, indent=2)
+        except Exception as e:
+            logger.error(f"Failed to save analyses: {e}")
+    # ========================================================================
+    # DATASET MANAGEMENT
+    # ========================================================================
+    def register_dataset(
+        self,
+        path: str,
+        variable: str,
+        query_type: str,
+        start_date: str,
+        end_date: str,
+        lat_bounds: tuple[float, float],
+        lon_bounds: tuple[float, float],
+        file_size_bytes: int = 0,
+        shape: Optional[tuple[int, ...]] = None,
+    ) -> DatasetRecord:
+        """Register a downloaded dataset."""
+        record = DatasetRecord(
+            path=path,
+            variable=variable,
+            query_type=query_type,
+            start_date=start_date,
+            end_date=end_date,
+            lat_bounds=lat_bounds,
+            lon_bounds=lon_bounds,
+            file_size_bytes=file_size_bytes,
+            download_timestamp=datetime.now().isoformat(),
+            shape=shape,
+        )
+        self.datasets[path] = record
+        self._save_datasets()
+        logger.info(f"Registered dataset: {path}")
+        return record
+    def get_dataset(self, path: str) -> Optional[DatasetRecord]:
+        """Get dataset record by path."""
+        return self.datasets.get(path)
+    def list_datasets(self) -> str:
+        """Return formatted list of cached datasets."""
+        if not self.datasets:
+            return "No datasets in cache."
+        lines = ["Cached Datasets:", "=" * 70]
+        for path, record in self.datasets.items():
+            if os.path.exists(path):
+                size_str = self._format_size(record.file_size_bytes)
+                lines.append(
+                    f"  {record.variable:5} | {record.start_date} to {record.end_date} | "
+                    f"{record.query_type:8} | {size_str:>10}"
+                )
+                lines.append(f"        Path: {path}")
+            else:
+                lines.append(f"  [MISSING] {path}")
+        return "\n".join(lines)
+    def cleanup_missing_datasets(self) -> int:
+        """Remove records for datasets that no longer exist."""
+        missing = [p for p in self.datasets if not os.path.exists(p)]
+        for path in missing:
+            del self.datasets[path]
+            logger.info(f"Removed missing dataset: {path}")
+        if missing:
+            self._save_datasets()
+        return len(missing)
+    # ========================================================================
+    # CONVERSATION MANAGEMENT (Session-based)
+    # ========================================================================
+    def add_message(self, role: str, content: str) -> Message:
+        """Add a message to conversation history."""
+        return self.conversation_memory.add_message(role, content)
+    def get_conversation_history(self, n_messages: Optional[int] = None) -> List[Message]:
+        """Get recent conversation history."""
+        return self.conversation_memory.get_messages(n_messages)
+    def clear_conversation(self) -> None:
+        """Clear conversation history."""
+        self.conversation_memory.clear()
+        logger.info("Conversation history cleared")
+    def get_langchain_messages(self, n_messages: Optional[int] = None) -> List[dict]:
+        """Get messages in LangChain format."""
+        return self.conversation_memory.get_langchain_messages(n_messages)
+    # Legacy property for compatibility
+    @property
+    def conversations(self) -> List[Message]:
+        return self.conversation_memory.messages
+    # ========================================================================
+    # ANALYSIS TRACKING
+    # ========================================================================
+    def record_analysis(
+        self,
+        description: str,
+        code: str,
+        output: str,
+        datasets_used: Optional[List[str]] = None,
+        plots_generated: Optional[List[str]] = None,
+    ) -> AnalysisRecord:
+        """Record an analysis for history."""
+        record = AnalysisRecord(
+            description=description,
+            code=code,
+            output=output[:2000],  # Truncate long output
+            timestamp=datetime.now().isoformat(),
+            datasets_used=datasets_used or [],
+            plots_generated=plots_generated or [],
+        )
+        self.analyses.append(record)
+        self._save_analyses()
+        return record
+    def get_recent_analyses(self, n: int = 10) -> List[AnalysisRecord]:
+        """Get recent analyses."""
+        return self.analyses[-n:]
+    # ========================================================================
+    # CONTEXT SUMMARY
+    # ========================================================================
+    def get_context_summary(self) -> str:
+        """Get a summary of current context for the agent."""
+        lines = []
+        # Token usage
+        tokens = self.conversation_memory.get_token_count()
+        if tokens > 0:
+            lines.append(f"Session tokens: {tokens}/{MAX_CONTEXT_TOKENS}")
+        # Recent conversation (brief)
+        recent = self.get_conversation_history(3)
+        if recent:
+            lines.append("\nRecent in this session:")
+            for msg in recent:
+                preview = msg.content[:80] + "..." if len(msg.content) > 80 else msg.content
+                lines.append(f"  [{msg.role}]: {preview}")
+        # Available datasets
+        valid_datasets = {p: r for p, r in self.datasets.items() if os.path.exists(p)}
+        if valid_datasets:
+            lines.append(f"\nCached Datasets ({len(valid_datasets)}):")
+            for path, record in list(valid_datasets.items())[:5]:
+                lines.append(f"  - {record.variable}: {record.start_date} to {record.end_date}")
+        return "\n".join(lines) if lines else "Fresh session - no context yet."
+    # ========================================================================
+    # UTILITIES
+    # ========================================================================
+    @staticmethod
+    def _format_size(size_bytes: int) -> str:
+        """Format file size in human-readable format."""
+        for unit in ["B", "KB", "MB", "GB"]:
+            if size_bytes < 1024:
+                return f"{size_bytes:.1f} {unit}"
+            size_bytes /= 1024
+        return f"{size_bytes:.1f} TB"
+# ============================================================================
+# GLOBAL INSTANCE
+# ============================================================================
+_memory_instance: Optional[MemoryManager] = None
+def get_memory() -> MemoryManager:
+    """Get the global memory manager instance."""
+    global _memory_instance
+    if _memory_instance is None:
+        _memory_instance = MemoryManager()
+    return _memory_instance
+def reset_memory() -> None:
+    """Reset the global memory instance (new session)."""
+    global _memory_instance
+    _memory_instance = None
+    logger.info("Memory reset - next get_memory() will create fresh session")

src/eurus/retrieval.py ADDED Viewed

	@@ -0,0 +1,536 @@

+"""
+ERA5 Data Retrieval
+===================
+Cloud-optimized data retrieval from Earthmover's ERA5 archive.
+"""
+from __future__ import annotations
+import json
+import logging
+import os
+import shutil
+import threading
+import time
+from datetime import datetime, timedelta
+from pathlib import Path
+from typing import Optional
+from urllib.request import Request, urlopen
+from eurus.config import (
+    CONFIG,
+    get_data_dir,
+    get_region,
+    get_short_name,
+    get_variable_info,
+    list_available_variables,
+)
+from eurus.memory import get_memory
+logger = logging.getLogger(__name__)
+def _format_coord(value: float) -> str:
+    """Format coordinates for stable, filename-safe identifiers."""
+    if abs(value) < 0.005:
+        value = 0.0
+    return f"{value:.2f}"
+def generate_filename(
+    variable: str,
+    query_type: str,
+    start: str,
+    end: str,
+    min_latitude: float,
+    max_latitude: float,
+    min_longitude: float,
+    max_longitude: float,
+    region: Optional[str] = None,
+) -> str:
+    """Generate a descriptive filename for the dataset."""
+    clean_var = variable.replace("_", "")
+    clean_start = start.replace("-", "")
+    clean_end = end.replace("-", "")
+    if region:
+        region_tag = region.lower()
+    else:
+        region_tag = (
+            f"lat{_format_coord(min_latitude)}_{_format_coord(max_latitude)}"
+            f"_lon{_format_coord(min_longitude)}_{_format_coord(max_longitude)}"
+        )
+    return f"era5_{clean_var}_{query_type}_{clean_start}_{clean_end}_{region_tag}.zarr"
+def format_file_size(size_bytes: int) -> str:
+    """Format file size in human-readable format."""
+    for unit in ["B", "KB", "MB", "GB"]:
+        if size_bytes < 1024:
+            return f"{size_bytes:.2f} {unit}"
+        size_bytes /= 1024
+    return f"{size_bytes:.2f} TB"
+_aws_region_lock = threading.Lock()
+_aws_region_set = False
+def _ensure_aws_region(api_key: str, repo_name: Optional[str] = None) -> None:
+    """
+    Populate AWS S3 region/endpoint env vars from Arraylake repo metadata.
+    Some environments fail S3 resolution unless region/endpoint are explicit.
+    """
+    global _aws_region_set
+    if _aws_region_set:
+        return  # Only run once per process
+    with _aws_region_lock:
+        if _aws_region_set:
+            return  # Double-checked locking
+        repo = repo_name or CONFIG.data_source
+        try:
+            req = Request(
+                f"https://api.earthmover.io/repos/{repo}",
+                headers={"Authorization": f"Bearer {api_key}"},
+            )
+            with urlopen(req, timeout=30) as resp:
+                payload = resp.read().decode("utf-8")
+            repo_meta = json.loads(payload)
+        except Exception as exc:
+            logger.debug("Could not auto-detect AWS region from Arraylake metadata: %s", exc)
+            _aws_region_set = True  # Don't retry on failure
+            return
+    if not isinstance(repo_meta, dict):
+        return
+    bucket = repo_meta.get("bucket")
+    if not isinstance(bucket, dict):
+        return
+    extra_cfg = bucket.get("extra_config")
+    if not isinstance(extra_cfg, dict):
+        return
+    region_name = extra_cfg.get("region_name")
+    if not isinstance(region_name, str) or not region_name:
+        return
+    endpoint = f"https://s3.{region_name}.amazonaws.com"
+    desired_values = {
+        "AWS_REGION": region_name,
+        "AWS_DEFAULT_REGION": region_name,
+        "AWS_ENDPOINT_URL": endpoint,
+        "AWS_S3_ENDPOINT": endpoint,
+    }
+    updated = False
+    for key, value in desired_values.items():
+        if not os.environ.get(key):
+            os.environ[key] = value
+            updated = True
+        if updated:
+            logger.info(
+                "Auto-set AWS region/endpoint for Arraylake: region=%s endpoint=%s",
+                region_name,
+                endpoint,
+            )
+        _aws_region_set = True
+def retrieve_era5_data(
+    query_type: str,
+    variable_id: str,
+    start_date: str,
+    end_date: str,
+    min_latitude: float = -90.0,
+    max_latitude: float = 90.0,
+    min_longitude: float = 0.0,
+    max_longitude: float = 359.75,
+    region: Optional[str] = None,
+) -> str:
+    """
+    Retrieve ERA5 reanalysis data from Earthmover's cloud-optimized archive.
+    Args:
+        query_type: Either "temporal" (time series) or "spatial" (maps)
+        variable_id: ERA5 variable name (e.g., "sst", "t2", "u10")
+        start_date: Start date in YYYY-MM-DD format
+        end_date: End date in YYYY-MM-DD format
+        min_latitude: Southern bound (-90 to 90)
+        max_latitude: Northern bound (-90 to 90)
+        min_longitude: Western bound (0 to 360)
+        max_longitude: Eastern bound (0 to 360)
+        region: Optional predefined region name (overrides lat/lon)
+    Returns:
+        Success message with file path, or error message.
+    Raises:
+        No exceptions raised - errors returned as strings.
+    """
+    memory = get_memory()
+    # Get API key
+    api_key = os.environ.get("ARRAYLAKE_API_KEY")
+    if not api_key:
+        return (
+            "Error: ARRAYLAKE_API_KEY not found in environment.\n"
+            "Please set it via environment variable or .env file."
+        )
+    _ensure_aws_region(api_key)
+    # Check dependencies
+    try:
+        import icechunk  # noqa: F401
+    except ImportError:
+        return (
+            "Error: The 'icechunk' library is required.\n"
+            "Install with: pip install icechunk"
+        )
+    try:
+        import xarray as xr
+    except ImportError:
+        return (
+            "Error: The 'xarray' library is required.\n"
+            "Install with: pip install xarray"
+        )
+    # Apply region bounds if specified
+    region_tag = None
+    if region:
+        region_info = get_region(region)
+        if region_info:
+            min_latitude = region_info.min_lat
+            max_latitude = region_info.max_lat
+            min_longitude = region_info.min_lon
+            max_longitude = region_info.max_lon
+            region_tag = region.lower()
+            logger.info(f"Using region '{region}'")
+        else:
+            logger.warning(f"Unknown region '{region}', using provided coordinates")
+    # Resolve variable name
+    short_var = get_short_name(variable_id)
+    var_info = get_variable_info(variable_id)
+    # Check for future / too-recent dates (ERA5T has a ~5-day processing lag)
+    req_start = datetime.strptime(start_date, '%Y-%m-%d')
+    if req_start > datetime.now() - timedelta(days=5):
+        return (
+            f"Error: Requested start date ({start_date}) is too recent or in the future.\n"
+            f"ERA5 data has a ~5-day processing lag. Please request dates at least 5 days ago."
+        )
+    # Setup paths
+    output_dir = get_data_dir()
+    filename = generate_filename(
+        short_var,
+        query_type,
+        start_date,
+        end_date,
+        min_latitude,
+        max_latitude,
+        min_longitude,
+        max_longitude,
+        region_tag,
+    )
+    local_path = str(output_dir / filename)
+    # Check cache first
+    if os.path.exists(local_path):
+        existing = memory.get_dataset(local_path)
+        if existing:
+            logger.info(f"Cache hit: {local_path}")
+            var_name = f"{short_var} ({var_info.long_name})" if var_info else short_var
+            return (
+                f"CACHE HIT - Data already downloaded\n"
+                f"  Variable: {var_name}\n"
+                f"  Period: {existing.start_date} to {existing.end_date}\n"
+                f"  Path: {local_path}\n\n"
+                f"Load with: ds = xr.open_dataset('{local_path}', engine='zarr')"
+            )
+        else:
+            # File exists but not registered - register it
+            try:
+                file_size = sum(f.stat().st_size for f in Path(local_path).rglob("*") if f.is_file())
+                memory.register_dataset(
+                    path=local_path,
+                    variable=short_var,
+                    query_type=query_type,
+                    start_date=start_date,
+                    end_date=end_date,
+                    lat_bounds=(min_latitude, max_latitude),
+                    lon_bounds=(min_longitude, max_longitude),
+                    file_size_bytes=file_size,
+                )
+            except Exception as e:
+                logger.warning(f"Could not register existing dataset: {e}")
+            return (
+                f"CACHE HIT - Found existing data\n"
+                f"  Variable: {short_var}\n"
+                f"  Path: {local_path}\n\n"
+                f"Load with: ds = xr.open_dataset('{local_path}', engine='zarr')"
+            )
+    # Guard: spatial queries are chunked for map access — multi-year ranges
+    # cause thousands of S3 chunk fetches and streaming errors.
+    # Limit spatial queries to 1 year max; suggest splitting or using temporal mode.
+    req_end = datetime.strptime(end_date, '%Y-%m-%d')
+    date_span_days = (req_end - req_start).days
+    if query_type == "spatial" and date_span_days > 366:
+        return (
+            f"Error: Spatial queries are limited to 1 year max ({date_span_days} days requested).\n"
+            f"The spatial dataset is optimised for maps, not long time series.\n\n"
+            f"Options:\n"
+            f"1. Split into yearly requests (e.g. one call per year)\n"
+            f"2. Use query_type='temporal' for multi-year time-series analysis\n"
+            f"3. Narrow the date range to ≤ 366 days"
+        )
+    # Download with retry logic
+    for attempt in range(CONFIG.max_retries):
+        try:
+            from arraylake import Client
+            logger.info(f"Connecting to Earthmover (attempt {attempt + 1})...")
+            client = Client(token=api_key)
+            repo = client.get_repo(CONFIG.data_source)
+            session = repo.readonly_session("main")
+            logger.info(f"Opening {query_type} dataset...")
+            ds = xr.open_dataset(
+                session.store,
+                engine="zarr",
+                consolidated=False,
+                zarr_format=3,
+                chunks=None,
+                group=query_type,
+            )
+            # Validate variable exists
+            # Auto-compute tp = cp + lsp if tp is not directly available
+            compute_tp = False
+            if short_var not in ds:
+                if short_var == "tp" and "cp" in ds and "lsp" in ds:
+                    logger.info("Variable 'tp' not in store — will compute tp = cp + lsp")
+                    compute_tp = True
+                else:
+                    available = list(ds.data_vars)
+                    return (
+                        f"Error: Variable '{short_var}' not found in dataset.\n"
+                        f"Available variables: {', '.join(available)}\n\n"
+                        f"Variable reference:\n{list_available_variables()}"
+                    )
+            # ERA5 latitude is stored 90 -> -90 (descending)
+            lat_slice = slice(max_latitude, min_latitude)
+            # Handle longitude - ERA5 uses 0-360 but we accept -180 to 180
+            # CRITICAL: If coordinates are in Europe (-10 to 30), we need to
+            # convert to 0-360 for ERA5's coordinate system
+            # Special case: Full world range (-180 to 180)
+            # Both become 180 after % 360, which creates empty slice!
+            if min_longitude == -180 and max_longitude == 180:
+                req_min = 0.0
+                req_max = 360.0
+            elif min_longitude > max_longitude and min_longitude >= 0 and max_longitude >= 0:
+                # Already in 0-360 format but wraps around 0° (e.g., Mediterranean: 354 to 42)
+                # This comes from predefined regions — go directly to two-slice logic
+                req_min = min_longitude
+                req_max = max_longitude
+            elif min_longitude < 0:
+                # Convert -180/+180 to 0-360 for ERA5
+                # e.g., -0.9 becomes 359.1
+                req_min = min_longitude % 360
+                req_max = max_longitude if max_longitude >= 0 else max_longitude % 360
+            else:
+                req_min = min_longitude
+                req_max = max_longitude if max_longitude >= 0 else max_longitude % 360
+            # Now handle the actual slicing
+            # If min > max after conversion, it means we span the prime meridian (0°)
+            # e.g., req_min=359.1 (was -0.9) and req_max=25.9 means we need 359.1->360 + 0->25.9
+            if req_min > req_max:
+                # Crosses prime meridian in ERA5's 0-360 system
+                # We need to get two slices and concatenate
+                logger.info(f"Region spans prime meridian: {req_min:.1f}° to {req_max:.1f}° (ERA5 coords)")
+                # Get western portion (from req_min to 360)
+                west_slice = slice(req_min, 360.0)
+                # Get eastern portion (from 0 to req_max)
+                east_slice = slice(0.0, req_max)
+                # Subset both portions
+                logger.info("Subsetting data (two-part: west + east of prime meridian)...")
+                fetch_vars = ["cp", "lsp"] if compute_tp else [short_var]
+                subsets_all = []
+                for fv in fetch_vars:
+                    subset_west = ds[fv].sel(
+                        time=slice(start_date, end_date),
+                        latitude=lat_slice,
+                        longitude=west_slice,
+                    )
+                    subset_east = ds[fv].sel(
+                        time=slice(start_date, end_date),
+                        latitude=lat_slice,
+                        longitude=east_slice,
+                    )
+                    # Convert western longitudes from 360+ to negative (for -180/+180 output)
+                    # e.g., 359.1 -> -0.9
+                    subset_west = subset_west.assign_coords(
+                        longitude=subset_west.longitude - 360
+                    )
+                    # Concatenate along longitude
+                    subsets_all.append(xr.concat([subset_west, subset_east], dim='longitude'))
+                if compute_tp:
+                    subset = (subsets_all[0] + subsets_all[1]).rename("tp")
+                else:
+                    subset = subsets_all[0]
+            else:
+                # Normal case - no prime meridian crossing
+                lon_slice = slice(req_min, req_max)
+                # Subset the data
+                logger.info("Subsetting data...")
+                fetch_vars = ["cp", "lsp"] if compute_tp else [short_var]
+                subsets_all = []
+                for fv in fetch_vars:
+                    subsets_all.append(ds[fv].sel(
+                        time=slice(start_date, end_date),
+                        latitude=lat_slice,
+                        longitude=lon_slice,
+                    ))
+                if compute_tp:
+                    subset = (subsets_all[0] + subsets_all[1]).rename("tp")
+                else:
+                    subset = subsets_all[0]
+            # Convert to dataset
+            ds_out = subset.to_dataset(name=short_var)
+            # Check for empty time dimension (no data in requested range)
+            if ds_out.dims.get('time', 0) == 0:
+                # Get actual data availability
+                time_max = ds['time'].max().values
+                import numpy as np
+                last_available = str(np.datetime_as_string(time_max, unit='D'))
+                return (
+                    f"Error: No data available for the requested time range.\n"
+                    f"Requested: {start_date} to {end_date}\n"
+                    f"ERA5 data on Arraylake is available until {last_available}.\n\n"
+                    f"Please request dates up to {last_available}."
+                )
+            # Check for empty data (all NaNs) — only check 1st timestep
+            # Guard: skip the check for very large spatial slices to prevent OOM
+            first_step = ds_out[short_var].isel(time=0)
+            if first_step.size < 500_000 and first_step.isnull().all().compute():
+                 return (
+                    f"Error: The downloaded data for '{short_var}' is entirely empty (NaNs).\n"
+                    f"Possible causes:\n"
+                    f"1. The requested date/region has no data (e.g., SST over land).\n"
+                    f"2. The request is too recent (ERA5T has a 5-day delay).\n"
+                    f"3. Region bounds might be invalid or cross the prime meridian incorrectly."
+                )
+            # Size guard — prevent downloading datasets larger than the configured limit
+            estimated_gb = ds_out.nbytes / (1024 ** 3)
+            if estimated_gb > CONFIG.max_download_size_gb:
+                return (
+                    f"Error: Estimated download size ({estimated_gb:.1f} GB) exceeds the "
+                    f"{CONFIG.max_download_size_gb} GB limit.\n"
+                    f"Try narrowing the time range or spatial area."
+                )
+            # Clear encoding for clean serialization
+            for var in ds_out.variables:
+                ds_out[var].encoding = {}
+            # Add metadata
+            ds_out.attrs["source"] = "ERA5 Reanalysis via Earthmover Arraylake"
+            ds_out.attrs["download_date"] = datetime.now().isoformat()
+            ds_out.attrs["query_type"] = query_type
+            if var_info:
+                ds_out[short_var].attrs["long_name"] = var_info.long_name
+                ds_out[short_var].attrs["units"] = var_info.units
+            # Clean up existing file
+            if os.path.exists(local_path):
+                shutil.rmtree(local_path)
+            # Save to Zarr
+            logger.info(f"Saving to {local_path}...")
+            start_time = time.time()
+            ds_out.to_zarr(local_path, mode="w", consolidated=True, compute=True)
+            download_time = time.time() - start_time
+            # Get actual file size
+            file_size = sum(f.stat().st_size for f in Path(local_path).rglob("*") if f.is_file())
+            shape = tuple(ds_out[short_var].shape)
+            # Register in memory
+            memory.register_dataset(
+                path=local_path,
+                variable=short_var,
+                query_type=query_type,
+                start_date=start_date,
+                end_date=end_date,
+                lat_bounds=(min_latitude, max_latitude),
+                lon_bounds=(min_longitude, max_longitude),
+                file_size_bytes=file_size,
+                shape=shape,
+            )
+            # Build success message
+            result = f"SUCCESS - Data downloaded\n{'='*50}\n  Variable: {short_var}"
+            if var_info:
+                result += f" ({var_info.long_name})"
+            result += (
+                f"\n  Units: {var_info.units if var_info else 'Unknown'}\n"
+                f"  Period: {start_date} to {end_date}\n"
+                f"  Shape: {shape}\n"
+                f"  Size: {format_file_size(file_size)}\n"
+                f"  Time: {download_time:.1f}s\n"
+                f"  Path: {local_path}\n"
+                f"{'='*50}\n\n"
+                f"Load with:\n"
+                f"  ds = xr.open_dataset('{local_path}', engine='zarr')"
+            )
+            return result
+        except Exception as e:
+            error_msg = str(e)
+            logger.error(f"Attempt {attempt + 1} failed: {error_msg}")
+            # Clean up partial download
+            if os.path.exists(local_path):
+                shutil.rmtree(local_path, ignore_errors=True)
+            if attempt < CONFIG.max_retries - 1:
+                wait_time = CONFIG.retry_delay * (2**attempt)
+                logger.info(f"Retrying in {wait_time:.1f}s...")
+                time.sleep(wait_time)
+            else:
+                return (
+                    f"Error: Failed after {CONFIG.max_retries} attempts.\n"
+                    f"Last error: {error_msg}\n\n"
+                    f"Troubleshooting:\n"
+                    f"1. Check your ARRAYLAKE_API_KEY\n"
+                    f"2. Verify internet connection\n"
+                    f"3. Try a smaller date range or region\n"
+                    f"4. Check if variable '{short_var}' is available"
+                )
+    return "Error: Unexpected failure in retrieval logic."

src/eurus/server.py ADDED Viewed

	@@ -0,0 +1,258 @@

+#!/usr/bin/env python3
+"""
+ERA5 MCP Server
+===============
+Model Context Protocol server for ERA5 climate data retrieval.
+Usage:
+    eurus-mcp                          # If installed as package
+    python -m eurus.server         # Direct execution
+Configuration via environment variables:
+    ARRAYLAKE_API_KEY    - Required for data access
+    ERA5_DATA_DIR        - Data storage directory (default: ./data)
+    ERA5_MEMORY_DIR      - Memory storage directory (default: ./.memory)
+    ERA5_MAX_RETRIES     - Download retry attempts (default: 3)
+    ERA5_LOG_LEVEL       - Logging level (default: INFO)
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import os
+import sys
+from typing import Any
+from dotenv import load_dotenv
+# Load environment variables early
+load_dotenv()
+# Configure logging
+log_level = os.environ.get("ERA5_LOG_LEVEL", "INFO").upper()
+logging.basicConfig(
+    level=getattr(logging, log_level),
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
+    datefmt="%H:%M:%S",
+)
+logger = logging.getLogger(__name__)
+# Import MCP components
+try:
+    from mcp.server import Server
+    from mcp.server.stdio import stdio_server
+    from mcp.types import (
+        CallToolResult,
+        TextContent,
+        Tool,
+    )
+except ImportError:
+    logger.error("MCP library not found. Install with: pip install mcp")
+    sys.exit(1)
+# Import ERA5 components
+from eurus.config import (
+    list_available_variables,
+)
+from eurus.memory import get_memory
+from eurus.tools.era5 import retrieve_era5_data, ERA5RetrievalArgs
+# Import Maritime Routing tool
+from eurus.tools.routing import (
+    calculate_maritime_route,
+    RouteArgs,
+    HAS_ROUTING_DEPS,
+)
+# Create MCP server
+server = Server("era5-climate-data")
+# Alias for compatibility
+app = server
+# ============================================================================
+# TOOL DEFINITIONS
+# ============================================================================
+@server.list_tools()
+async def list_tools() -> list[Tool]:
+    """List available MCP tools."""
+    tools = [
+        Tool(
+            name="retrieve_era5_data",
+            description=(
+                "Retrieve ERA5 climate reanalysis data from Earthmover's cloud archive.\n\n"
+                "⚠️ QUERY TYPE is AUTO-DETECTED based on time/area:\n"
+                "- 'temporal': time > 1 day AND region < 30°×30° (time series, small area)\n"
+                "- 'spatial': time ≤ 1 day OR region ≥ 30°×30° (maps, snapshots, large area)\n\n"
+                "VARIABLES: sst, t2, u10, v10, mslp, tcc, tp\n"
+                "NOTE: swh (waves) is NOT available in this dataset!\n\n"
+                "COORDINATES: Always specify lat/lon bounds explicitly.\n"
+                "Longitude: Use 0-360 format (e.g., -74°W = 286°E)\n\n"
+                "Returns file path. Load: xr.open_dataset('PATH', engine='zarr')"
+            ),
+            inputSchema=ERA5RetrievalArgs.model_json_schema()
+        ),
+        Tool(
+            name="list_era5_variables",
+            description=(
+                "List all available ERA5 variables with their descriptions, units, "
+                "and short names for use with retrieve_era5_data."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {},
+                "additionalProperties": False
+            }
+        ),
+        Tool(
+            name="list_cached_datasets",
+            description=(
+                "List all ERA5 datasets that have been downloaded and cached locally. "
+                "Shows variable, date range, file path, and size."
+            ),
+            inputSchema={
+                "type": "object",
+                "properties": {},
+                "additionalProperties": False
+            }
+        ),
+    ]
+    # ========== MARITIME ROUTING TOOL (if dependencies available) ==========
+    if HAS_ROUTING_DEPS:
+        tools.append(
+            Tool(
+                name="calculate_maritime_route",
+                description=(
+                    "Calculate a realistic maritime shipping route between two ports. "
+                    "Uses global shipping lane graph to avoid land and find optimal path.\n\n"
+                    "RETURNS: Waypoint coordinates, bounding box, and INSTRUCTIONS for "
+                    "climatological risk assessment protocol.\n\n"
+                    "DOES NOT: Check weather itself. The Agent must follow the returned "
+                    "protocol to assess route safety using ERA5 data.\n\n"
+                    "WORKFLOW:\n"
+                    "1. Call this tool → get waypoints + instructions\n"
+                    "2. Download ERA5 wind data (u10, v10) for the region\n"
+                    "3. Call get_visualization_guide(viz_type='maritime_risk_assessment')\n"
+                    "4. Execute analysis in python_repl"
+                ),
+                inputSchema=RouteArgs.model_json_schema()
+            )
+        )
+    return tools
+# ============================================================================
+# TOOL HANDLERS
+# ============================================================================
+@server.call_tool()
+async def call_tool(name: str, arguments: dict[str, Any]) -> CallToolResult:
+    """Handle tool calls."""
+    try:
+        if name == "retrieve_era5_data":
+            # Run synchronous function in thread pool (query_type auto-detected)
+            result = await asyncio.get_event_loop().run_in_executor(
+                None,
+                lambda: retrieve_era5_data(
+                    variable_id=arguments["variable_id"],
+                    start_date=arguments["start_date"],
+                    end_date=arguments["end_date"],
+                    min_latitude=arguments["min_latitude"],
+                    max_latitude=arguments["max_latitude"],
+                    min_longitude=arguments["min_longitude"],
+                    max_longitude=arguments["max_longitude"],
+                )
+            )
+            return CallToolResult(content=[TextContent(type="text", text=result)])
+        elif name == "list_era5_variables":
+            result = list_available_variables()
+            return CallToolResult(content=[TextContent(type="text", text=result)])
+        elif name == "list_cached_datasets":
+            memory = get_memory()
+            result = memory.list_datasets()
+            return CallToolResult(content=[TextContent(type="text", text=result)])
+        # ========== MARITIME ROUTING HANDLER ==========
+        elif name == "calculate_maritime_route":
+            if not HAS_ROUTING_DEPS:
+                return CallToolResult(
+                    content=[TextContent(
+                        type="text",
+                        text="Error: Maritime routing dependencies not installed.\n"
+                             "Install with: pip install scgraph geopy"
+                    )],
+                    isError=True
+                )
+            result = await asyncio.get_event_loop().run_in_executor(
+                None,
+                lambda: calculate_maritime_route(
+                    origin_lat=arguments["origin_lat"],
+                    origin_lon=arguments["origin_lon"],
+                    dest_lat=arguments["dest_lat"],
+                    dest_lon=arguments["dest_lon"],
+                    month=arguments["month"],
+                    year=arguments.get("year"),
+                    speed_knots=arguments.get("speed_knots", 14.0)
+                )
+            )
+            return CallToolResult(content=[TextContent(type="text", text=result)])
+        else:
+            return CallToolResult(
+                content=[TextContent(type="text", text=f"Unknown tool: {name}")],
+                isError=True
+            )
+    except Exception as e:
+        logger.exception(f"Error executing tool {name}")
+        return CallToolResult(
+            content=[TextContent(type="text", text=f"Error: {str(e)}")],
+            isError=True
+        )
+# ============================================================================
+# SERVER STARTUP
+# ============================================================================
+async def run_server() -> None:
+    """Run the MCP server using stdio transport."""
+    logger.info("Starting ERA5 MCP Server...")
+    # Check for API key
+    if not os.environ.get("ARRAYLAKE_API_KEY"):
+        logger.warning(
+            "ARRAYLAKE_API_KEY not set. Data retrieval will fail. "
+            "Set it via environment variable or .env file."
+        )
+    async with stdio_server() as (read_stream, write_stream):
+        await server.run(
+            read_stream,
+            write_stream,
+            server.create_initialization_options()
+        )
+def main() -> None:
+    """Main entry point."""
+    try:
+        asyncio.run(run_server())
+    except KeyboardInterrupt:
+        logger.info("Server shutdown requested")
+    except Exception as e:
+        logger.exception(f"Server error: {e}")
+        sys.exit(1)
+if __name__ == "__main__":
+    main()

src/eurus/tools/__init__.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""
+Eurus Tools Registry
+=====================
+Central hub for all agent tools.
+Tools:
+- Data Retrieval: ERA5 data access
+- Analysis: Python REPL for custom analysis
+- Guides: Methodology and visualization guidance
+- Routing: Maritime navigation (optional)
+"""
+from typing import List
+from langchain_core.tools import BaseTool
+# Import core tools
+from .era5 import era5_tool
+from .repl import PythonREPLTool
+from .routing import routing_tool
+from .analysis_guide import analysis_guide_tool, visualization_guide_tool
+# Optional dependency check for routing
+try:
+    import scgraph
+    HAS_ROUTING_DEPS = True
+except ImportError:
+    HAS_ROUTING_DEPS = False
+def get_all_tools(
+    enable_routing: bool = True,
+    enable_guide: bool = True
+) -> List[BaseTool]:
+    """
+    Return a list of all available tools for the agent.
+    Args:
+        enable_routing: If True, includes the maritime routing tool (default: True).
+        enable_guide: If True, includes the guide tools (default: True).
+    Returns:
+        List of LangChain tools for the agent.
+    """
+    # Core tools: data retrieval + Python analysis
+    tools = [
+        era5_tool,
+        PythonREPLTool(working_dir=".")
+    ]
+    # Guide tools: methodology and visualization guidance
+    if enable_guide:
+        tools.append(analysis_guide_tool)
+        tools.append(visualization_guide_tool)
+    # Routing tools: maritime navigation
+    if enable_routing:
+        if HAS_ROUTING_DEPS:
+            tools.append(routing_tool)
+        else:
+            print("WARNING: Routing tools requested but dependencies (scgraph) are missing.")
+    return tools
+# Alias for backward compatibility
+get_tools = get_all_tools

src/eurus/tools/analysis_guide.py ADDED Viewed

	@@ -0,0 +1,1191 @@

+"""
+Analysis Guide Tool
+====================
+Provides methodological guidance for climate data analysis using python_repl.
+This tool returns TEXT INSTRUCTIONS (not executable code!) for:
+- What approach to take
+- How to structure the analysis
+- Quality checks and pitfalls
+- Best practices for visualization
+The agent uses python_repl to execute the actual analysis.
+"""
+from typing import Literal
+from pydantic import BaseModel, Field
+from langchain_core.tools import StructuredTool
+# =============================================================================
+# ANALYSIS GUIDES
+# =============================================================================
+ANALYSIS_GUIDES = {
+    # -------------------------------------------------------------------------
+    # DATA OPERATIONS
+    # -------------------------------------------------------------------------
+    "load_data": """
+## Loading ERA5 Data
+### When to use
+- Initializing any analysis
+- Loading downloaded Zarr data
+### Workflow
+1. **Load data** — Use `xr.open_dataset('path', engine='zarr')` or `xr.open_zarr('path')`.
+2. **Inspect dataset** — Check coordinates and available variables.
+3. **Convert units** before any analysis:
+   - Temp (`t2`, `d2`, `skt`, `sst`, `stl1`): subtract 273.15 → °C
+   - Precip (`tp`, `cp`, `lsp`): multiply by 1000 → mm
+   - Pressure (`sp`, `mslp`): divide by 100 → hPa
+### Quality Checklist
+- [ ] Data loaded lazily (avoid `.load()` on large datasets)
+- [ ] Units converted before aggregations
+- [ ] Coordinate names verified (latitude vs lat, etc.)
+### Common Pitfalls
+- ⚠️ Loading multi-year global data into memory causes OOM. Keep operations lazy until subsetted.
+- ⚠️ Some Zarr stores have `valid_time` instead of `time` — check with `.coords`.
+""",
+    "spatial_subset": """
+## Spatial Subsetting
+### When to use
+- Focusing on a specific region, country, or routing bounding box
+- Reducing data size before heavy analysis
+### Workflow
+1. **Determine bounds** — Find min/max latitude and longitude.
+2. **Check coordinate orientation** — ERA5 latitude is often descending (90 to -90).
+3. **Slice data** — `.sel(latitude=slice(north, south), longitude=slice(west, east))`.
+### Quality Checklist
+- [ ] Latitude sliced from North to South (max to min) for descending coords
+- [ ] Longitudes match dataset format (convert -180/180 ↔ 0/360 if needed)
+- [ ] Result is not empty — verify with `.shape`
+### Common Pitfalls
+- ⚠️ Slicing `slice(south, north)` on descending coords → empty result.
+- ⚠️ Crossing the prime meridian in 0-360 coords requires concatenating two slices.
+- ⚠️ Use `.sel(method='nearest')` for point extraction, not exact matching.
+""",
+    "temporal_subset": """
+## Temporal Subsetting & Aggregation
+### When to use
+- Isolating specific events, months, or seasons
+- Downsampling hourly data to daily/monthly
+### Workflow
+1. **Time slice** — `.sel(time=slice('2023-01-01', '2023-12-31'))`.
+2. **Filter** — Seasons: `.sel(time=ds.time.dt.season == 'DJF')`.
+3. **Resample** — `.resample(time='1D').mean()` for daily means.
+### Quality Checklist
+- [ ] Aggregation matches variable: `.mean()` for T/wind, `.sum()` for precip
+- [ ] Leap years handled if using day-of-year grouping
+### Common Pitfalls
+- ⚠️ DJF wraps across years — verify start/end boundaries.
+- ⚠️ `.resample()` (continuous) ≠ `.groupby()` (climatological). Don't mix them up.
+- ⚠️ Radiation variables (`ssr`, `ssrd`) are accumulated — need differencing, not averaging.
+""",
+    # -------------------------------------------------------------------------
+    # STATISTICAL ANALYSIS
+    # -------------------------------------------------------------------------
+    "anomalies": """
+## Anomaly Analysis
+### When to use
+- "How unusual was this period?"
+- Comparing current conditions to "normal"
+- Any "above/below average" question
+### Workflow
+1. **Define baseline** — ≥10 years (30 ideal). E.g. 1991-2020.
+2. **Compute climatology** — `clim = ds.groupby('time.month').mean('time')`.
+3. **Subtract** — `anomaly = ds.groupby('time.month') - clim`.
+4. **Convert units** — Report in °C, mm, m/s (not K, m, Pa).
+5. **Assess magnitude** — Compare to σ of the baseline period.
+### Quality Checklist
+- [ ] Baseline ≥10 years
+- [ ] Same calendar grouping for clim and analysis
+- [ ] Units converted for readability
+- [ ] Spatial context: is anomaly regional or localized?
+### Common Pitfalls
+- ⚠️ Short baselines amplify noise.
+- ⚠️ Daily climatologies with <30yr baseline are noisy → use monthly grouping.
+- ⚠️ Be explicit: spatial anomaly vs temporal anomaly.
+### Interpretation
+- Positive = warmer/wetter/windier than normal.
+- ±1σ = common, ±2σ = unusual (5%), ±3σ = extreme (0.3%).
+- Maps: MUST use `RdBu_r` centered at zero via `TwoSlopeNorm`.
+""",
+    "zscore": """
+## Z-Score Analysis (Standardized Anomalies)
+### When to use
+- Comparing extremity across different variables
+- Standardizing for regions with different variability
+- Identifying statistically significant departures
+### Workflow
+1. **Compute baseline mean** — Grouped by month for seasonality.
+2. **Compute baseline std** — Same period, same grouping.
+3. **Standardize** — `z = (value - mean) / std`.
+### Quality Checklist
+- [ ] Standard deviation is non-zero everywhere
+- [ ] Baseline period matches for mean and std
+### Common Pitfalls
+- ⚠️ Precipitation is NOT normally distributed — use SPI or percentiles instead of raw Z-scores.
+- ⚠️ Z-scores near coastlines can be extreme due to mixed land/ocean std.
+### Interpretation
+- Z = 0: average. ±1: normal (68%). ±2: unusual (5%). ±3: extreme (0.3%).
+""",
+    "trend_analysis": """
+## Linear Trend Analysis
+### When to use
+- "Is it getting warmer/wetter over time?"
+- Detecting long-term climate change signals
+### Workflow
+1. **Downsample** — Convert to annual/seasonal means first.
+2. **Regress** — `scipy.stats.linregress` or `np.polyfit(degree=1)`.
+3. **Significance** — Extract p-value for the slope.
+4. **Scale** — Multiply annual slope by 10 → "per decade".
+### Quality Checklist
+- [ ] Period ≥20-30 years for meaningful trends
+- [ ] Seasonal cycle removed before fitting
+- [ ] Significance tested (p < 0.05)
+- [ ] Report trend as units/decade
+### Common Pitfalls
+- ⚠️ Trend on daily data without removing seasonality → dominated by summer/winter swings.
+- ⚠️ Short series have uncertain trends — report confidence intervals.
+- ⚠️ Autocorrelation can inflate significance — consider using Mann-Kendall test.
+### Interpretation
+- Report as °C/decade. Use stippling on maps for significant areas.
+""",
+    "eof_analysis": """
+## EOF/PCA Analysis
+### When to use
+- Finding dominant spatial patterns (ENSO, NAO, PDO)
+- Dimensionality reduction of spatiotemporal data
+### Workflow
+1. **Deseasonalize** — Compute anomalies to remove the seasonal cycle.
+2. **Latitude weighting** — Multiply by `np.sqrt(np.cos(np.deg2rad(lat)))`.
+3. **Decompose** — PCA on flattened space dimensions.
+4. **Reconstruct** — Map PCs back to spatial grid (EOFs).
+### Quality Checklist
+- [ ] Seasonal cycle removed
+- [ ] Latitude weighting applied
+- [ ] Variance explained (%) calculated per mode
+- [ ] Physical interpretation attempted for leading modes
+### Common Pitfalls
+- ⚠️ Unweighted EOFs inflate polar regions artificially.
+- ⚠️ EOFs are mathematical constructs — not guaranteed to correspond to physical modes.
+### Interpretation
+- EOF1: dominant spatial pattern. PC1: its temporal evolution.
+- If EOF1 explains >20% variance, it's highly dominant.
+""",
+    "correlation_analysis": """
+## Correlation Analysis
+### When to use
+- Spatial/temporal correlation mapping
+- Lead-lag analysis (e.g., SST vs downstream precipitation)
+- Teleconnection exploration
+### Workflow
+1. **Deseasonalize both variables** — Remove seasonal cycle from both.
+2. **Align time coordinates** — Ensure identical time axes.
+3. **Correlate** — `xr.corr(var1, var2, dim='time')`.
+4. **Lead-lag** — Use `.shift(time=N)` month offsets to test delayed responses.
+5. **Significance** — Compute p-values, mask insignificant areas.
+### Quality Checklist
+- [ ] Both variables deseasonalized
+- [ ] p-values computed (p < 0.05 for significance)
+- [ ] Sample size adequate (≥30 time points)
+### Common Pitfalls
+- ⚠️ Correlating raw data captures the seasonal cycle — everything correlates with summer.
+- ⚠️ Spatial autocorrelation inflates field significance — apply Bonferroni or FDR correction.
+### Interpretation
+- R² gives variance explained. Lead-lag peak indicates response time.
+- Plot spatial R maps with `RdBu_r`, stipple significant areas.
+""",
+    "composite_analysis": """
+## Composite Analysis
+### When to use
+- Average conditions during El Niño vs La Niña years
+- Spatial fingerprint of specific extreme events
+- "What does the atmosphere look like when X happens?"
+### Workflow
+1. **Define events** — Boolean mask of times exceeding a threshold (e.g., Niño3.4 > 0.5°C).
+2. **Subset data** — `.where(mask, drop=True)`.
+3. **Average** — Time mean of the subset = composite.
+4. **Compare** — Subtract climatological mean → composite anomaly.
+### Quality Checklist
+- [ ] Sample size ≥10 events for robustness
+- [ ] Baseline climatology matches the season of the events
+- [ ] Significance tested via bootstrap or t-test
+### Common Pitfalls
+- ⚠️ Compositing n=2 events → noise, not a physical signal.
+- ⚠️ Mixing seasons in composite (El Niño in DJF vs JJA) obscures the signal.
+### Interpretation
+- Shows the typical anomaly expected when event occurs.
+- Plot with `RdBu_r` diverging colormap. Stipple significant areas.
+""",
+    "diurnal_cycle": """
+## Diurnal Cycle Analysis
+### When to use
+- Hourly variability within days (afternoon convection, nighttime cooling)
+- Solar radiation patterns
+### Workflow
+1. **Group by hour** — `ds.groupby('time.hour').mean('time')`.
+2. **Convert to local time** — ERA5 is UTC. `Local = UTC + Longitude/15`.
+3. **Calculate amplitude** — `diurnal_range = max('hour') - min('hour')`.
+### Quality Checklist
+- [ ] Input data is hourly (not daily/monthly)
+- [ ] UTC → local time conversion applied before labeling "afternoon"/"morning"
+### Common Pitfalls
+- ⚠️ Averaging global data by UTC hour mixes day and night across longitudes.
+- ⚠️ Cloud cover (`tcc`) and radiation (`ssrd`) have strong diurnal signals — always check.
+### Interpretation
+- `blh` and `t2` peak mid-afternoon. Convective precip (`cp`) peaks late afternoon over land, early morning over oceans.
+""",
+    "seasonal_decomposition": """
+## Seasonal Decomposition
+### When to use
+- Separating the seasonal cycle from interannual variability
+- Visualizing how a specific year deviates from the normal curve
+### Workflow
+1. **Compute climatology** — `.groupby('time.month').mean('time')`.
+2. **Extract anomalies** — Subtract climatology from raw data.
+3. **Smooth trend** — Apply 12-month rolling mean to extract multi-year trends.
+### Quality Checklist
+- [ ] Baseline robust (≥10 years)
+- [ ] Residual = raw - seasonal - trend (should be ~white noise)
+### Common Pitfalls
+- ⚠️ Day-of-year climatologies over short baselines are noisy — smooth with 15-day window.
+### Interpretation
+- Separates variance into: seasonal (predictable), trend (long-term), residual (weather noise).
+""",
+    "spectral_analysis": """
+## Spectral Analysis
+### When to use
+- Periodicity detection (ENSO 3-7yr, MJO 30-60d, annual/semi-annual)
+- Confirming suspected oscillatory behavior
+### Workflow
+1. **Prepare 1D series** — Spatial average or single point.
+2. **Detrend** — Remove linear trend AND seasonal cycle.
+3. **Compute spectrum** — `scipy.signal.welch` or `periodogram`.
+4. **Plot as Period** — X-axis = 1/frequency (years or days), not raw frequency.
+### Quality Checklist
+- [ ] No NaNs in time series (interpolate or drop)
+- [ ] Time coordinate evenly spaced
+- [ ] Seasonal cycle removed
+### Common Pitfalls
+- ⚠️ Seasonal cycle dominates spectrum if not removed — drowns everything else.
+- ⚠️ Short records can't resolve low-frequency oscillations (need ≥3× the period).
+### Interpretation
+- Peaks = dominant cycles. ENSO: 3-7yr. QBO: ~28mo. MJO: 30-60d. Annual: 12mo.
+""",
+    "spatial_statistics": """
+## Spatial Statistics & Area Averaging
+### When to use
+- Computing a single time series for a geographic region
+- Area-weighted means for reporting
+- Field significance testing
+### Workflow
+1. **Latitude weights** — `weights = np.cos(np.deg2rad(ds.latitude))`.
+2. **Apply** — `ds.weighted(weights).mean(dim=['latitude', 'longitude'])`.
+3. **Land/sea mask** — Apply if needed (e.g., ocean-only SST average).
+### Quality Checklist
+- [ ] Latitude weighting applied BEFORE spatial averaging
+- [ ] Land-sea mask applied where relevant
+- [ ] Units preserved correctly
+### Common Pitfalls
+- ⚠️ Unweighted averages bias toward poles (smaller grid cells over-counted).
+- ⚠️ Global mean SST must exclude land points.
+### Interpretation
+- Produces physically accurate area-averaged time series.
+""",
+    "multi_variable": """
+## Multi-Variable Derived Quantities
+### When to use
+- Combining ERA5 variables for derived metrics
+### Common Derivations
+1. **Wind speed** — `wspd = np.sqrt(u10**2 + v10**2)` (or u100/v100 for hub-height).
+2. **Wind direction** — `wdir = (270 - np.degrees(np.arctan2(v10, u10))) % 360`.
+3. **Relative humidity** — From `t2` and `d2` using Magnus formula.
+4. **Heat index** — Combine `t2` and `d2` (Steadman formula).
+5. **Vapour transport** — `IVT ≈ tcwv * wspd` (surface proxy).
+6. **Total precip check** — `tp ≈ cp + lsp`.
+### Quality Checklist
+- [ ] Variables share identical grids (time, lat, lon)
+- [ ] Units matched before combining (both in °C, both in m/s, etc.)
+### Common Pitfalls
+- ⚠️ `mean(speed) ≠ speed_of_means` — always compute speed FIRST, then average.
+- ⚠️ Wind direction requires proper 4-quadrant atan2, not naive arctan.
+### Interpretation
+- Derived metrics often better represent human/environmental impact than raw fields.
+""",
+    "climatology_normals": """
+## Climatology Normals (WMO Standard)
+### When to use
+- Computing 30-year normals
+- Calculating "departure from normal"
+### Workflow
+1. **Select base period** — Standard WMO epoch: 1991-2020 (or 1981-2010).
+2. **Compute monthly averages** — `normals = baseline.groupby('time.month').mean('time')`.
+3. **Departure** — `departure = current.groupby('time.month') - normals`.
+### Quality Checklist
+- [ ] Exactly 30 years used
+- [ ] Same months compared (don't mix Feb normals with March data)
+### Common Pitfalls
+- ⚠️ Moving baselines make comparisons with WMO climate reports inconsistent.
+### Interpretation
+- "Normal" = statistical baseline. Departures express how much current conditions deviate.
+""",
+    # -------------------------------------------------------------------------
+    # CLIMATE INDICES & EXTREMES
+    # -------------------------------------------------------------------------
+    "climate_indices": """
+## Climate Indices
+### When to use
+- Assessing ENSO, NAO, PDO, AMO teleconnections
+- Correlating local weather with large-scale modes
+### Key Indices
+- **ENSO (Niño 3.4)**: `sst` anomaly, 5°S-5°N, 170°W-120°W. El Niño > +0.5°C, La Niña < -0.5°C.
+- **NAO**: `mslp` difference, Azores High minus Icelandic Low. Positive → mild European winters.
+- **PDO**: Leading EOF of North Pacific `sst` (north of 20°N). 20-30yr phases.
+- **AMO**: Detrended North Atlantic `sst` average. ~60-70yr cycle.
+### Workflow
+1. **Extract region** — Use standard geographic bounds.
+2. **Compute anomaly** — Area-averaged, against 30yr baseline.
+3. **Smooth** — 3-to-5 month rolling mean.
+### Quality Checklist
+- [ ] Standard geographic bounds strictly followed
+- [ ] Rolling mean applied to filter weather noise
+- [ ] Latitude-weighted area average
+### Common Pitfalls
+- ⚠️ Without rolling mean, the index is too noisy for classification.
+- ⚠️ Using incorrect region bounds produces a different (invalid) index.
+""",
+    "extremes": """
+## Extreme Event Analysis
+### When to use
+- Heat/cold extremes, heavy precipitation, tail-risk assessment
+- Threshold exceedance frequency
+### Workflow
+1. **Define threshold** — Absolute (e.g., T > 35°C) or percentile-based (> 95th pctl of baseline).
+2. **Create mask** — Boolean where condition is met.
+3. **Count** — Sum over time for extreme days per year/month.
+4. **Trend** — Check if frequency is increasing over time.
+### Quality Checklist
+- [ ] Percentiles from robust baseline (≥30 years)
+- [ ] Use daily data, not monthly averages
+- [ ] Units converted before applying thresholds
+### Common Pitfalls
+- ⚠️ 99th percentile on monthly averages misses true daily extremes entirely.
+- ⚠️ Absolute thresholds (e.g., 35°C) are region-dependent — 35°C is normal in Sahara, extreme in London.
+### Interpretation
+- Increasing frequency of extremes = non-linear climate change impact.
+- Report as "N days/year exceeding threshold" or "return period shortened from X to Y years".
+""",
+    "drought_analysis": """
+## Drought Analysis
+### When to use
+- Prolonged precipitation deficits
+- Agricultural/hydrological impact assessment
+- SPI (Standardized Precipitation Index) proxy
+### Workflow
+1. **Extract precip** — Use `tp` in mm (×1000 from meters).
+2. **Accumulate** — Rolling sums: `tp.rolling(time=3).sum()` for 3-month SPI.
+3. **Standardize** — `(accumulated - mean) / std` → SPI proxy.
+4. **Cross-check** — Verify with `swvl1` (soil moisture) for ground-truth.
+### Quality Checklist
+- [ ] Monthly data used (not hourly)
+- [ ] Baseline ≥30 years for stable statistics
+- [ ] Multiple accumulation periods tested (1, 3, 6, 12 months)
+### Common Pitfalls
+- ⚠️ Absolute precipitation deficits are meaningless in deserts — always standardize.
+- ⚠️ Gamma distribution fit (proper SPI) is better than raw Z-score for precip.
+### Interpretation
+- SPI < -1.0: Moderate drought. < -1.5: Severe. < -2.0: Extreme.
+""",
+    "heatwave_detection": """
+## Heatwave Detection
+### When to use
+- Identifying heatwave events using standard definitions
+- Assessing heat-related risk periods
+### Workflow
+1. **Daily data** — Must be daily resolution (resample hourly if needed).
+2. **Threshold** — 90th percentile of `t2` per calendar day from baseline.
+3. **Exceedance mask** — `is_hot = t2_daily > threshold_90`.
+4. **Streak detection** — Find ≥3 consecutive hot days using rolling sum ≥ 3.
+### Quality Checklist
+- [ ] Daily data (not monthly!)
+- [ ] `t2` converted to °C
+- [ ] Threshold is per-calendar-day (not a single annual value)
+- [ ] Duration criterion applied (≥3 days)
+### Common Pitfalls
+- ⚠️ Monthly data — physically impossible to detect heatwaves.
+- ⚠️ A single hot day is not a heatwave — duration matters.
+- ⚠️ Nighttime temperatures (`t2` at 00/06 UTC) also matter for health impact.
+### Interpretation
+- Heatwaves require BOTH intensity (high T) AND duration (consecutive days).
+- Report: number of events per year, mean duration, max intensity.
+""",
+    "atmospheric_rivers": """
+## Atmospheric Rivers Detection
+### When to use
+- Detecting AR events from integrated vapour transport proxy
+- Extreme precipitation risk at landfall
+### Workflow
+1. **Extract** — `tcwv` + `u10`, `v10`.
+2. **Compute IVT proxy** — `ivt = tcwv * np.sqrt(u10**2 + v10**2)`.
+3. **Threshold** — IVT proxy > 250 kg/m/s (approximate).
+4. **Shape check** — Feature should be elongated (>2000km long, <1000km wide).
+### Quality Checklist
+- [ ] Acknowledge this is surface-wind proxy (true IVT needs pressure-level data)
+- [ ] Cross-validate with heavy `tp` at landfall
+- [ ] Check for persistent (���24h) plume features
+### Common Pitfalls
+- ⚠️ Tropical moisture pools are NOT ARs — wind-speed multiplier is essential to distinguish.
+- ⚠️ This surface proxy underestimates true IVT — use conservative thresholds.
+### Interpretation
+- High `tcwv` + strong directed wind at coast = extreme flood risk.
+- Map with `YlGnBu` for moisture intensity.
+""",
+    "blocking_events": """
+## Atmospheric Blocking Detection
+### When to use
+- Identifying persistent high-pressure blocks from MSLP
+- Explaining prolonged heatwaves, droughts, or cold spells
+### Workflow
+1. **Extract** — `mslp` in hPa (÷100 from Pa).
+2. **Compute anomalies** — Daily anomalies from climatology.
+3. **Detect** — Find positive anomalies > 1.5σ persisting ≥5 days.
+4. **Location** — Focus on mid-to-high latitudes (40-70°N typically).
+### Quality Checklist
+- [ ] 3-5 day rolling mean applied to filter transient ridges
+- [ ] Persistence criterion enforced (≥5 days)
+- [ ] Mid-latitude focus
+### Common Pitfalls
+- ⚠️ Fast-moving ridges are NOT blocks — persistence is key.
+- ⚠️ Blocks in the Southern Hemisphere are rarer and weaker.
+### Interpretation
+- Blocks force storms to detour, causing prolonged rain on flanks and drought/heat underneath.
+""",
+    "energy_budget": """
+## Surface Energy Budget
+### When to use
+- Analyzing radiation balance and surface heating
+- Solar energy potential assessment
+### Workflow
+1. **Extract radiation** — `ssrd` (incoming solar), `ssr` (net solar after reflection).
+2. **Convert units** — J/m² to W/m² by dividing by accumulation period (3600s for hourly).
+3. **Compute albedo proxy** — `albedo ≈ 1 - (ssr / ssrd)` where ssrd > 0.
+4. **Seasonal patterns** — Group by month to see radiation cycle.
+### Quality Checklist
+- [ ] Accumulation period properly accounted for (hourly vs daily sums)
+- [ ] Division by zero protected (nighttime ssrd = 0)
+- [ ] Units clearly stated: W/m² or MJ/m²/day
+### Common Pitfalls
+- ⚠️ ERA5 radiation is ACCUMULATED over the forecast step — must difference consecutive steps for instantaneous values.
+- ⚠️ `ssr` already accounts for clouds and albedo — don't double-correct.
+### Interpretation
+- Higher `ssrd` - High solar potential. Low `ssr/ssrd` ratio → high cloudiness or reflective surface (snow/ice).
+""",
+    "wind_energy": """
+## Wind Energy Assessment
+### When to use
+- Wind power density analysis
+- Turbine hub-height wind resource mapping
+### Workflow
+1. **Use hub-height winds** — `u100`, `v100` (100m, not 10m surface winds).
+2. **Compute speed** — `wspd100 = np.sqrt(u100**2 + v100**2)`.
+3. **Power density** — `P = 0.5 * rho * wspd100**3` where rho ≈ 1.225 kg/m³.
+4. **Capacity factor** — Fraction of time wind exceeds cut-in speed (~3 m/s) and stays below cut-out (~25 m/s).
+5. **Weibull fit** — Fit shape (k) and scale (A) parameters to the wind speed distribution.
+### Quality Checklist
+- [ ] Using 100m winds, NOT 10m (turbines don't operate at surface)
+- [ ] Power density in W/m²
+- [ ] Seasonal variation checked (winter vs summer)
+### Common Pitfalls
+- ⚠️ Using 10m winds severely underestimates wind energy potential.
+- ⚠️ Mean wind speed misleads — power depends on speed CUBED, so variability matters enormously.
+### Interpretation
+- Power density >400 W/m² = excellent wind resource.
+- Report Weibull k parameter: k < 2 = gusty/variable, k > 3 = steady flow.
+""",
+    "moisture_budget": """
+## Moisture Budget Analysis
+### When to use
+- Understanding precipitation sources
+- Tracking moisture plumes and convergence zones
+### Workflow
+1. **Extract** — `tcwv` (precipitable water), `tcw` (total column water incl. liquid/ice).
+2. **Temporal evolution** — Track `tcwv` changes to infer moisture convergence.
+3. **Relate to precip** — Compare `tcwv` peaks with `tp` to see conversion efficiency.
+4. **Spatial patterns** — Map `tcwv` to identify moisture corridors.
+### Quality Checklist
+- [ ] Distinguish `tcwv` (vapour only) from `tcw` (vapour + liquid + ice)
+- [ ] Units: kg/m² (equivalent to mm of water)
+### Common Pitfalls
+- ⚠️ High `tcwv` doesn't guarantee rain — need a lifting mechanism.
+- ⚠️ `tcw - tcwv` gives cloud water + ice content (proxy for cloud thickness).
+### Interpretation
+- `tcwv` > 50 kg/m² in tropics = moisture-laden atmosphere primed for heavy precip.
+""",
+    "convective_potential": """
+## Convective Potential (Thunderstorm Risk)
+### When to use
+- Thunderstorm forecasting and climatology
+- Severe weather risk assessment
+### Workflow
+1. **Extract CAPE** — Already available as `cape` variable (J/kg).
+2. **Classify risk** — Low (<300), Moderate (300-1000), High (1000-2500), Extreme (>2500 J/kg).
+3. **Combine with moisture** — High CAPE + high `tcwv` → heavy convective storms.
+4. **Check trigger** — Fronts, orography, or strong daytime heating (`t2` diurnal cycle).
+### Quality Checklist
+- [ ] CAPE alone is insufficient — need a trigger mechanism
+- [ ] Check `blh` (boundary layer height) — deep BLH aids convective initiation
+### Common Pitfalls
+- ⚠️ CAPE = potential energy, not a guarantee. High CAPE + strong capping inversion = no storms.
+- ⚠️ CAPE is most meaningful in afternoon hours — avoid pre-dawn values.
+### Interpretation
+- CAPE > 1000 J/kg with deep BLH (>2km) and high `tcwv` = significant thunderstorm risk.
+""",
+    "snow_cover": """
+## Snow Cover & Melt Analysis
+### When to use
+- Tracking snow accumulation and melt timing
+- Climate change impacts on snowpack
+### Workflow
+1. **Extract** — `sd` (Snow Depth in m water equivalent).
+2. **Seasonal cycle** — Track start/end of snow season per grid point.
+3. **Melt timing** — Find the date when `sd` drops below threshold.
+4. **Trend** — Check if snow season is shortening over decades.
+5. **Compare with `stl1`/`t2`** — Warming soil accelerates melt.
+### Quality Checklist
+- [ ] Units: meters of water equivalent
+- [ ] Focus on mid/high latitudes and mountain regions
+- [ ] Inter-annual variability large — use multi-year analysis
+### Common Pitfalls
+- ⚠️ ERA5 snow depth is modeled, not observed — cross-reference with station data.
+- ⚠️ Rain-on-snow events can cause rapid melt not captured well in reanalysis.
+### Interpretation
+- Earlier melt = less summer water supply. Map with `Blues`, reversed for snowless areas.
+""",
+    # -------------------------------------------------------------------------
+    # VISUALIZATION
+    # -------------------------------------------------------------------------
+    "visualization_spatial": """
+## Spatial Map Visualization
+### When to use
+- Mapping absolute climate fields (Temp, Wind, Precip, Pressure)
+### Workflow
+1. **Figure** — `fig, ax = plt.subplots(figsize=(12, 8))`.
+2. **Meshgrid** — `lons, lats = np.meshgrid(data.longitude, data.latitude)`.
+3. **Plot** — `ax.pcolormesh(lons, lats, data, cmap=..., shading='auto')`.
+4. **Colorbar** — ALWAYS: `plt.colorbar(mesh, ax=ax, label='Units', shrink=0.8)`.
+5. **Cartopy** — Optional: add coastlines, land fill. Graceful fallback if not installed.
+### Quality Checklist
+- [ ] Figure 12×8 for maps
+- [ ] Colormap matches variable:
+  - Temp: `RdYlBu_r` | Wind: `YlOrRd` | Precip: `YlGnBu`
+  - Pressure: `viridis` | Cloud: `Greys` | Anomalies: `RdBu_r`
+- [ ] NEVER use `jet`
+- [ ] Colorbar has label with units
+### Common Pitfalls
+- ⚠️ Diverging cmap on absolute data is misleading — diverging only for anomalies.
+- ⚠️ Missing `shading='auto'` triggers deprecation warning.
+""",
+    "visualization_timeseries": """
+## Time Series Visualization
+### When to use
+- Temporal evolution of a variable at a point or region
+### Workflow
+1. **Area average** — `ts = data.mean(dim=['latitude', 'longitude'])` (with lat weighting!).
+2. **Figure** — `fig, ax = plt.subplots(figsize=(10, 6))`.
+3. **Raw line** — `ax.plot(ts.time, ts, linewidth=1.5)`.
+4. **Smoothing** — Add rolling mean overlay with contrasting color.
+5. **Date formatting** — `fig.autofmt_xdate(rotation=30)`.
+### Quality Checklist
+- [ ] Figure 10×6
+- [ ] Y-axis has explicit units
+- [ ] Legend included if multiple lines
+- [ ] Trend line if requested: dashed with slope annotation
+### Enhancements
+- **Uncertainty band**: `ax.fill_between(time, mean-std, mean+std, alpha=0.2)`
+- **Event markers**: `ax.axvline(date, color='red', ls='--')`
+- **Twin axis**: `ax2 = ax.twinx()` for second variable
+### Common Pitfalls
+- ⚠️ Hourly data over 10+ years → unreadable block of ink. Resample to daily first.
+""",
+    "visualization_anomaly_map": """
+## Anomaly Map Visualization
+### When to use
+- Diverging data: departures, trends, z-scores
+- Any map that has positive AND negative values
+### Workflow
+1. **Center at zero** — `from matplotlib.colors import TwoSlopeNorm`.
+2. **Norm** — `norm = TwoSlopeNorm(vmin=data.min(), vcenter=0, vmax=data.max())`.
+3. **Plot** — `pcolormesh(..., cmap='RdBu_r', norm=norm)`.
+4. **Stippling** — Overlay significance: `contourf(..., levels=[0, 0.05], hatches=['...'], colors='none')`.
+### Quality Checklist
+- [ ] Zero is EXACTLY white/neutral in the colorbar
+- [ ] Warm/dry = Red; Cool/wet = Blue
+- [ ] Precip anomalies: consider `BrBG` instead of `RdBu_r`
+### Common Pitfalls
+- ⚠️ Without `TwoSlopeNorm`, skewed data makes 0 appear colored → reader is misled.
+- ⚠️ Symmetric vmin/vmax (`vmax = max(abs(data))`) can also work but wastes color range.
+""",
+    "visualization_wind": """
+## Wind & Vector Visualization
+### When to use
+- Circulation patterns, wind fields, quiver/streamline plots
+### Workflow
+1. **Speed background** — `wspd` with `pcolormesh` + `YlOrRd`.
+2. **Subsample vectors** — `skip = (slice(None, None, 5), slice(None, None, 5))` to avoid solid black.
+3. **Quiver** — `ax.quiver(lons[skip], lats[skip], u[skip], v[skip], color='black')`.
+4. **Alternative** — `ax.streamplot()` for flow visualization (less cluttered).
+### Quality Checklist
+- [ ] Background heatmap shows magnitude
+- [ ] Vectors sparse enough to be readable
+- [ ] Wind barbs: `ax.barbs()` for meteorological display
+### Common Pitfalls
+- ⚠️ Full-resolution quiver = completely black, unreadable mess.
+- ⚠️ Check arrow scaling — default autoscale can make light winds invisible.
+### Interpretation
+- Arrows = direction, background color = magnitude. Cyclonic rotation = storm.
+""",
+    "visualization_comparison": """
+## Multi-Panel Comparison
+### When to use
+- Before/after, two periods, difference maps
+- Multi-variable side-by-side
+### Workflow
+1. **Grid** — `fig, axes = plt.subplots(1, 3, figsize=(18, 6))`.
+2. **Panels 1 & 2** — Absolute values with SHARED `vmin`/`vmax`.
+3. **Panel 3** — Difference (A-B) with diverging cmap centered at zero.
+### Quality Checklist
+- [ ] Panels 1 & 2 share EXACT same vmin/vmax (otherwise visual comparison is invalid)
+- [ ] Panel 3 has its own divergent colorbar centered at zero
+- [ ] Titles clearly label what each panel shows
+### Common Pitfalls
+- ⚠️ Auto-scaled panels = impossible to compare visually. Always lock limits.
+""",
+    "visualization_profile": """
+## Hovmöller Diagrams
+### When to use
+- Lat-time or lon-time cross-sections
+- Tracking wave propagation, ITCZ migration, monsoon onset
+### Workflow
+1. **Average out one dimension** — e.g., average across latitudes to get (lon, time).
+2. **Transpose** — X=Time, Y=Lon/Lat.
+3. **Plot** — `contourf` or `pcolormesh`, figure 12×6.
+### Quality Checklist
+- [ ] X-axis uses date formatting
+- [ ] Y-axis labels state the averaged geographic slice
+- [ ] Colormap matches variable type
+### Common Pitfalls
+- ⚠️ Swapping axes makes the diagram unintuitive. Time → X-axis convention.
+### Interpretation
+- Diagonal banding = propagating waves/systems. Vertical banding = stationary patterns.
+""",
+    "visualization_distribution": """
+## Distribution Visualization
+### When to use
+- Histograms, PDFs, box plots
+- Comparing two time periods or regions
+### Workflow
+1. **Flatten** — `.values.flatten()`, drop NaNs.
+2. **Shared bins** — `np.linspace(min, max, 50)`.
+3. **Plot** — `ax.hist(data, bins=bins, alpha=0.5, density=True, label='Period')`.
+4. **Median/mean markers** — Vertical lines with annotation.
+### Quality Checklist
+- [ ] `density=True` for comparing different-sized samples
+- [ ] `alpha=0.5` for overlapping distributions
+- [ ] Legend when comparing multiple distributions
+### Common Pitfalls
+- ⚠️ Raw counts (not density) skew comparison between periods with different sample sizes.
+- ⚠️ Too few bins = lost detail. Too many = noisy. 30-50 bins is usually good.
+### Interpretation
+- Rightward shift = warming. Flatter + wider = more variability = more extremes.
+""",
+    "visualization_animation": """
+## Animated/Sequential Maps
+### When to use
+- Monthly/seasonal evolution of a field
+- Event lifecycle (genesis → peak → decay)
+### Workflow
+1. **Global limits** — Find absolute vmin/vmax across ALL timesteps.
+2. **Multi-panel grid** — `fig, axes = plt.subplots(2, 3, figsize=(18, 12))` for 6 timesteps.
+3. **Lock colorbars** — Same vmin/vmax on every panel.
+4. **Shared colorbar** — Remove per-panel colorbars, add one at the bottom.
+### Quality Checklist
+- [ ] Colorbar limits LOCKED across all panels (no jumping colors)
+- [ ] Timestamps clearly labeled on each panel
+- [ ] Static grid preferred over video (headless environment)
+### Common Pitfalls
+- ⚠️ Auto-scaled panels flash/jump between frames — always lock limits.
+- ⚠️ MP4/GIF generation may fail in headless — use PNG grids instead.
+""",
+    "visualization_dashboard": """
+## Summary Dashboard
+### When to use
+- Comprehensive overview: map + time series + statistics in one figure
+- Publication-ready event summaries
+### Workflow
+1. **Layout** — `fig = plt.figure(figsize=(16, 10))` + `matplotlib.gridspec`.
+2. **Top row** — Large spatial map (anomaly or mean field).
+3. **Bottom left** — Time series of regional mean.
+4. **Bottom right** — Distribution histogram or box plot.
+### Quality Checklist
+- [ ] `plt.tight_layout()` or `constrained_layout=True` to prevent overlap
+- [ ] Consistent color theme across all panels
+- [ ] Clear panel labels (a, b, c)
+### Common Pitfalls
+- ⚠️ Cramming too much into small figure → illegible text. Scale figure size up.
+- ⚠️ Different aspect ratios between map and time series need explicit gridspec ratios.
+""",
+    "visualization_contour": """
+## Contour & Isobar Plots
+### When to use
+- Pressure maps with isobars
+- Temperature isotherms
+- Any smoothly varying field where specific levels matter
+### Workflow
+1. **Define levels** — `levels = np.arange(990, 1040, 4)` for MSLP isobars.
+2. **Filled contour** — `ax.contourf(lons, lats, data, levels=levels, cmap=...)`.
+3. **Contour lines** — `cs = ax.contour(lons, lats, data, levels=levels, colors='black', linewidths=0.5)`.
+4. **Labels** — `ax.clabel(cs, inline=True, fontsize=8)`.
+### Quality Checklist
+- [ ] Level spacing is physically meaningful (e.g., 4 hPa for MSLP)
+- [ ] Contour labels don't overlap
+- [ ] Filled + line contours combined for best readability
+### Common Pitfalls
+- ⚠️ Too many levels → cluttered, unreadable. 10-15 levels max.
+- ⚠️ Non-uniform level spacing requires manual colorbar ticks.
+### Interpretation
+- Tightly packed isobars = strong pressure gradient = high winds.
+""",
+    "visualization_correlation_map": """
+## Spatial Correlation Maps
+### When to use
+- Showing where a variable correlates with an index (e.g., ENSO vs global precip)
+- Teleconnection mapping
+### Workflow
+1. **Compute index** — 1D time series (e.g., Niño3.4 SST anomaly).
+2. **Correlate** — `xr.corr(index, spatial_field, dim='time')` → 2D R-map.
+3. **Significance** — Compute p-values from sample size and R.
+4. **Plot** — Map R values with `RdBu_r` centered at zero. Stipple p < 0.05.
+### Quality Checklist
+- [ ] Both index and field deseasonalized
+- [ ] R-map centered at zero (TwoSlopeNorm or symmetric limits)
+- [ ] Significant areas stippled or hatched
+- [ ] Sample size ≥30 stated
+### Common Pitfalls
+- ⚠️ Raw data correlations dominated by shared seasonal cycle.
+- ⚠️ Field significance: many grid points → some will be significant by chance. Apply FDR correction.
+### Interpretation
+- R > 0: in-phase with index. R < 0: out-of-phase. |R| > 0.5 = strong relationship.
+""",
+    # -------------------------------------------------------------------------
+    # MARITIME ANALYSIS
+    # -------------------------------------------------------------------------
+    "maritime_route": """
+## Maritime Route Risk Analysis
+### When to use
+- Analyzing weather risks along calculated shipping lanes
+- Voyage planning and hazard assessment
+### Workflow
+1. **Route** — Call `calculate_maritime_route` → waypoints + bounding box.
+2. **Data** — Download `u10`, `v10` for route bbox, target month, last 3 years.
+3. **Wind speed** — `wspd = np.sqrt(u10**2 + v10**2)`.
+4. **Extract** — Loop waypoints: `.sel(lat=lat, lon=lon, method='nearest')`.
+5. **Risk classify** — Safe (<10), Caution (10-17), Danger (17-24), Extreme (>24 m/s).
+6. **Statistics** — P95 wind speed at each waypoint, % time in each risk category.
+### Quality Checklist
+- [ ] Bounding box from route tool used DIRECTLY (don't convert coords)
+- [ ] 3-year period for climatological context, not just one date
+- [ ] Risk categories applied at waypoint level
+### Common Pitfalls
+- ⚠️ Global hourly downloads → timeout. Subset tightly to route bbox.
+- ⚠️ Don't use bounding box mean — extract AT waypoints for route-specific risk.
+""",
+    "maritime_visualization": """
+## Maritime Route Risk Visualization
+### When to use
+- Plotting route risk maps with waypoint-level risk coloring
+### Workflow
+1. **Background** — Map mean `wspd` with `pcolormesh` + `YlOrRd`.
+2. **Route line** — Dashed line connecting waypoints.
+3. **Waypoint scatter** — Color by risk: Green (<10), Amber (10-17), Coral (17-24), Red (>24 m/s).
+4. **Labels** — "ORIGIN" and "DEST" annotations.
+5. **Legend** — Custom 4-category legend (mandatory).
+### Quality Checklist
+- [ ] 4-category risk legend ALWAYS included
+- [ ] Origin/Destination labeled
+- [ ] Colormap: `YlOrRd` for wind speed
+- [ ] Saved to PLOTS_DIR
+### Common Pitfalls
+- ⚠️ No legend → colored dots are meaningless to the user.
+- ⚠️ Route line + waypoints must be on top (high zorder) to not be hidden by background.
+""",
+}
+# =============================================================================
+# ARGUMENT SCHEMA
+# =============================================================================
+class AnalysisGuideArgs(BaseModel):
+    """Arguments for analysis guide retrieval."""
+    topic: Literal[
+        # Data operations
+        "load_data",
+        "spatial_subset",
+        "temporal_subset",
+        # Statistical analysis
+        "anomalies",
+        "zscore",
+        "trend_analysis",
+        "eof_analysis",
+        # Advanced analysis
+        "correlation_analysis",
+        "composite_analysis",
+        "diurnal_cycle",
+        "seasonal_decomposition",
+        "spectral_analysis",
+        "spatial_statistics",
+        "multi_variable",
+        "climatology_normals",
+        # Climate indices & extremes
+        "climate_indices",
+        "extremes",
+        "drought_analysis",
+        "heatwave_detection",
+        "atmospheric_rivers",
+        "blocking_events",
+        # Domain-specific
+        "energy_budget",
+        "wind_energy",
+        "moisture_budget",
+        "convective_potential",
+        "snow_cover",
+        # Visualization
+        "visualization_spatial",
+        "visualization_timeseries",
+        "visualization_anomaly_map",
+        "visualization_wind",
+        "visualization_comparison",
+        "visualization_profile",
+        "visualization_distribution",
+        "visualization_animation",
+        "visualization_dashboard",
+        "visualization_contour",
+        "visualization_correlation_map",
+        # Maritime
+        "maritime_route",
+        "maritime_visualization",
+    ] = Field(
+        description="Analysis topic to get guidance for"
+    )
+# =============================================================================
+# TOOL FUNCTION
+# =============================================================================
+def get_analysis_guide(topic: str) -> str:
+    """
+    Get methodological guidance for climate data analysis.
+    Returns text instructions for using python_repl to perform the analysis.
+    """
+    guide = ANALYSIS_GUIDES.get(topic)
+    if not guide:
+        available = ", ".join(sorted(ANALYSIS_GUIDES.keys()))
+        return f"Unknown topic: {topic}. Available: {available}"
+    return f"""
+# Analysis Guide: {topic.replace('_', ' ').title()}
+{guide}
+---
+Use python_repl to implement this analysis with your downloaded ERA5 data.
+"""
+# =============================================================================
+# TOOL DEFINITIONS
+# =============================================================================
+analysis_guide_tool = StructuredTool.from_function(
+    func=get_analysis_guide,
+    name="get_analysis_guide",
+    description="""
+    Get methodological guidance for climate data analysis.
+    Returns workflow steps, quality checklists, and pitfall warnings for:
+    - Data: load_data, spatial_subset, temporal_subset
+    - Statistics: anomalies, zscore, trend_analysis, eof_analysis
+    - Advanced: correlation_analysis, composite_analysis, diurnal_cycle,
+      seasonal_decomposition, spectral_analysis, spatial_statistics,
+      multi_variable, climatology_normals
+    - Climate: climate_indices, extremes, drought_analysis, heatwave_detection,
+      atmospheric_rivers, blocking_events
+    - Domain: energy_budget, wind_energy, moisture_budget, convective_potential, snow_cover
+    - Visualization: visualization_spatial, visualization_timeseries,
+      visualization_anomaly_map, visualization_wind, visualization_comparison,
+      visualization_profile, visualization_distribution, visualization_animation,
+      visualization_dashboard, visualization_contour, visualization_correlation_map
+    - Maritime: maritime_route, maritime_visualization
+    Use this BEFORE writing analysis code in python_repl.
+    """,
+    args_schema=AnalysisGuideArgs,
+)
+# Visualization guide - alias for backward compatibility
+visualization_guide_tool = StructuredTool.from_function(
+    func=get_analysis_guide,
+    name="get_visualization_guide",
+    description="""
+    Get publication-grade visualization instructions for ERA5 climate data.
+    CALL THIS BEFORE creating any plot to get:
+    - Correct colormap choices
+    - Standard value ranges
+    - Required map elements
+    - Best practices
+    Available visualization topics:
+    - visualization_spatial: Maps with proper projections
+    - visualization_timeseries: Time series plots
+    - visualization_anomaly_map: Diverging anomaly maps
+    - visualization_wind: Quiver/streamline plots
+    - visualization_comparison: Multi-panel comparisons
+    - visualization_profile: Hovmöller diagrams
+    - visualization_distribution: Histograms/PDFs
+    - visualization_animation: Sequential map grids
+    - visualization_dashboard: Multi-panel summaries
+    - visualization_contour: Isobar/isotherm plots
+    - visualization_correlation_map: Spatial correlation maps
+    - maritime_visualization: Route risk maps
+    """,
+    args_schema=AnalysisGuideArgs,
+)

src/eurus/tools/era5.py ADDED Viewed

	@@ -0,0 +1,204 @@

+"""
+ERA5 Data Retrieval Tool (Wrapper)
+==================================
+LangChain tool definition. Imports core logic from ..retrieval
+This is a THIN WRAPPER - all retrieval logic lives in eurus/retrieval.py
+QUERY_TYPE IS AUTO-DETECTED based on time/area rules:
+- TEMPORAL: time > 1 day AND area < 30°×30°
+- SPATIAL:  time ≤ 1 day OR  area ≥ 30°×30°
+"""
+import logging
+from typing import Optional
+from datetime import datetime
+from pydantic import BaseModel, Field, field_validator
+from langchain_core.tools import StructuredTool
+# IMPORT CORE LOGIC FROM RETRIEVAL MODULE - SINGLE SOURCE OF TRUTH
+from ..retrieval import retrieve_era5_data as _retrieve_era5_data
+from ..config import get_short_name
+logger = logging.getLogger(__name__)
+# ============================================================================
+# ARGUMENT SCHEMA (NO query_type - it's auto-detected!)
+# ============================================================================
+class ERA5RetrievalArgs(BaseModel):
+    """Arguments for ERA5 data retrieval. query_type is AUTO-DETECTED."""
+    variable_id: str = Field(
+        description=(
+            "ERA5 variable short name. Available variables (22 total):\n"
+            "Ocean: sst (Sea Surface Temperature)\n"
+            "Temperature: t2 (2m Air Temp), d2 (2m Dewpoint), skt (Skin Temp)\n"
+            "Wind 10m: u10 (Eastward), v10 (Northward)\n"
+            "Wind 100m: u100 (Eastward), v100 (Northward)\n"
+            "Pressure: sp (Surface), mslp (Mean Sea Level)\n"
+            "Boundary Layer: blh (BL Height), cape (CAPE)\n"
+            "Cloud/Precip: tcc (Cloud Cover), cp (Convective), lsp (Large-scale), tp (Total Precip)\n"
+            "Radiation: ssr (Net Solar), ssrd (Solar Downwards)\n"
+            "Moisture: tcw (Total Column Water), tcwv (Water Vapour)\n"
+            "Land: sd (Snow Depth), stl1 (Soil Temp L1), swvl1 (Soil Water L1)"
+        )
+    )
+    start_date: str = Field(
+        description="Start date in YYYY-MM-DD format (e.g., '2021-02-01')"
+    )
+    end_date: str = Field(
+        description="End date in YYYY-MM-DD format (e.g., '2023-02-28')"
+    )
+    min_latitude: float = Field(
+        ge=-90.0, le=90.0,
+        description="Southern latitude bound (-90 to 90)"
+    )
+    max_latitude: float = Field(
+        ge=-90.0, le=90.0,
+        description="Northern latitude bound (-90 to 90)"
+    )
+    min_longitude: float = Field(
+        ge=-180.0, le=360.0,
+        description="Western longitude bound. Use -180 to 180 for Europe/Atlantic."
+    )
+    max_longitude: float = Field(
+        ge=-180.0, le=360.0,
+        description="Eastern longitude bound. Use -180 to 180 for Europe/Atlantic."
+    )
+    region: Optional[str] = Field(
+        default=None,
+        description=(
+            "Optional predefined region (overrides lat/lon if specified):\n"
+            "north_atlantic, mediterranean, nino34, global"
+        )
+    )
+    @field_validator('start_date', 'end_date')
+    @classmethod
+    def validate_date_format(cls, v: str) -> str:
+        try:
+            datetime.strptime(v, '%Y-%m-%d')
+        except ValueError:
+            raise ValueError(f"Date must be in YYYY-MM-DD format, got: {v}")
+        return v
+    @field_validator('variable_id')
+    @classmethod
+    def validate_variable(cls, v: str) -> str:
+        from ..config import get_all_short_names
+        short_name = get_short_name(v)
+        valid_vars = get_all_short_names()  # DRY: use config as single source of truth
+        if short_name not in valid_vars:
+            logger.warning(f"Variable '{v}' may not be available. Will attempt anyway.")
+        return v
+# ============================================================================
+# AUTO-DETECT QUERY TYPE
+# ============================================================================
+def _auto_detect_query_type(
+    start_date: str,
+    end_date: str,
+    min_lat: float,
+    max_lat: float,
+    min_lon: float,
+    max_lon: float
+) -> str:
+    """
+    Auto-detect optimal query_type based on time/area rules.
+    RULES:
+    - TEMPORAL: time > 1 day AND area < 30°×30° (900 sq degrees)
+    - SPATIAL:  time ≤ 1 day OR  area ≥ 30°×30°
+    """
+    # Calculate time span in days
+    start = datetime.strptime(start_date, '%Y-%m-%d')
+    end = datetime.strptime(end_date, '%Y-%m-%d')
+    time_days = (end - start).days + 1  # inclusive
+    # Calculate area in square degrees
+    lat_span = abs(max_lat - min_lat)
+    lon_span = abs(max_lon - min_lon)
+    area = lat_span * lon_span
+    # Decision logic
+    if time_days > 1 and area < 900:
+        query_type = "temporal"
+    else:
+        query_type = "spatial"
+    logger.info(f"Auto-detected query_type: {query_type} "
+                f"(time={time_days}d, area={area:.0f}sq°)")
+    return query_type
+# ============================================================================
+# WRAPPER FUNCTION (auto-adds query_type)
+# ============================================================================
+def retrieve_era5_data(
+    variable_id: str,
+    start_date: str,
+    end_date: str,
+    min_latitude: float,
+    max_latitude: float,
+    min_longitude: float,
+    max_longitude: float,
+    region: Optional[str] = None
+) -> str:
+    """
+    Wrapper that auto-detects query_type and calls the real retrieval function.
+    """
+    # Auto-detect query type
+    query_type = _auto_detect_query_type(
+        start_date, end_date,
+        min_latitude, max_latitude,
+        min_longitude, max_longitude
+    )
+    # Call the real retrieval function
+    return _retrieve_era5_data(
+        query_type=query_type,
+        variable_id=variable_id,
+        start_date=start_date,
+        end_date=end_date,
+        min_latitude=min_latitude,
+        max_latitude=max_latitude,
+        min_longitude=min_longitude,
+        max_longitude=max_longitude,
+        region=region
+    )
+# ============================================================================
+# LANGCHAIN TOOL CREATION
+# ============================================================================
+era5_tool = StructuredTool.from_function(
+    func=retrieve_era5_data,
+    name="retrieve_era5_data",
+    description=(
+        "Retrieves ERA5 climate reanalysis data from Earthmover's cloud archive.\n\n"
+        "⚠️ query_type is AUTO-DETECTED - you don't need to specify it!\n\n"
+        "Just provide:\n"
+        "- variable_id: one of 22 ERA5 variables (sst, t2, d2, skt, u10, v10, u100, v100, "
+        "sp, mslp, blh, cape, tcc, cp, lsp, tp, ssr, ssrd, tcw, tcwv, sd, stl1, swvl1)\n"
+        "- start_date, end_date: YYYY-MM-DD format\n"
+        "- lat/lon bounds: Use values from maritime route bounding box!\n\n"
+        "DATA: 1975-2024.\n"
+        "Returns file path. Load with: xr.open_zarr('PATH')"
+    ),
+    args_schema=ERA5RetrievalArgs
+)

src/eurus/tools/repl.py ADDED Viewed

	@@ -0,0 +1,564 @@

+"""
+Superb Python REPL Tool
+=======================
+A persistent Python execution environment for the agent.
+Uses a SUBPROCESS for true process isolation — can be cleanly killed on timeout.
+PLOT CAPTURE: When running in web mode, plots are captured via callback.
+"""
+import sys
+import io
+import json
+import logging
+import gc
+import os
+import re
+import base64
+import tempfile
+import subprocess
+import threading
+import traceback
+import matplotlib
+# Force non-interactive backend to prevent crashes on headless servers
+matplotlib.use('Agg')
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors  # Pre-import for custom colormaps
+logger = logging.getLogger(__name__)
+import matplotlib.cm as cm  # Pre-import for colormap access
+# =============================================================================
+# PUBLICATION-GRADE LIGHT THEME (white background for academic papers)
+# =============================================================================
+_EURUS_STYLE = {
+    # ── Figure ──
+    "figure.figsize": (10, 6),
+    "figure.dpi": 150,
+    "figure.facecolor": "white",
+    "figure.edgecolor": "white",
+    "savefig.facecolor": "white",
+    "savefig.edgecolor": "white",
+    "savefig.dpi": 300,          # 300 DPI for print-quality
+    "savefig.bbox": "tight",
+    "savefig.pad_inches": 0.15,
+    # ── Axes ──
+    "axes.facecolor": "white",
+    "axes.edgecolor": "#333333",
+    "axes.labelcolor": "#1a1a1a",
+    "axes.titlecolor": "#000000",
+    "axes.labelsize": 12,
+    "axes.titlesize": 14,
+    "axes.titleweight": "bold",
+    "axes.titlepad": 12,
+    "axes.grid": True,
+    "axes.spines.top": False,
+    "axes.spines.right": False,
+    "axes.linewidth": 0.8,
+    # ── Grid ──
+    "grid.color": "#d0d0d0",
+    "grid.alpha": 0.5,
+    "grid.linewidth": 0.5,
+    "grid.linestyle": "--",
+    # ── Ticks ──
+    "xtick.color": "#333333",
+    "ytick.color": "#333333",
+    "xtick.labelsize": 10,
+    "ytick.labelsize": 10,
+    "xtick.direction": "out",
+    "ytick.direction": "out",
+    # ── Text ──
+    "text.color": "#1a1a1a",
+    "font.family": "sans-serif",
+    "font.sans-serif": ["DejaVu Sans", "Arial", "Helvetica"],
+    "font.size": 11,
+    # ── Lines ──
+    "lines.linewidth": 1.8,
+    "lines.antialiased": True,
+    "lines.markersize": 5,
+    # ── Legend ──
+    "legend.facecolor": "white",
+    "legend.edgecolor": "#cccccc",
+    "legend.fontsize": 10,
+    "legend.framealpha": 0.95,
+    "legend.shadow": False,
+    # ── Colorbar ──
+    "image.cmap": "viridis",
+    # ── Patches ──
+    "patch.edgecolor": "#333333",
+}
+matplotlib.rcParams.update(_EURUS_STYLE)
+# Curated color cycle for white backgrounds (high-contrast, publication-safe)
+_EURUS_COLORS = [
+    "#1f77b4",  # steel blue
+    "#d62728",  # brick red
+    "#2ca02c",  # forest green
+    "#ff7f0e",  # orange
+    "#9467bd",  # muted purple
+    "#17becf",  # cyan
+    "#e377c2",  # pink
+    "#8c564b",  # brown
+]
+matplotlib.rcParams["axes.prop_cycle"] = matplotlib.cycler(color=_EURUS_COLORS)
+from typing import Dict, Optional, Type, Callable
+from pathlib import Path
+from pydantic import BaseModel, Field
+from langchain_core.tools import BaseTool
+# Import PLOTS_DIR for correct plot saving location
+from eurus.config import PLOTS_DIR
+# Pre-import common scientific libraries for convenience (parent-side only)
+import pandas as pd
+import numpy as np
+import xarray as xr
+from datetime import datetime, timedelta
+# =============================================================================
+# PERSISTENT SUBPROCESS REPL
+# =============================================================================
+# The Python script that runs inside the subprocess.
+# It receives JSON commands on stdin and sends JSON responses on stdout.
+_SUBPROCESS_SCRIPT = r'''
+import sys
+import os
+import json
+import gc
+from io import StringIO
+# Apply Eurus matplotlib style INSIDE the subprocess
+import matplotlib
+matplotlib.use("Agg")
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+import matplotlib.cm as cm
+_style = json.loads(os.environ.get("EURUS_MPL_STYLE", "{}"))
+if _style:
+    matplotlib.rcParams.update(_style)
+_colors = json.loads(os.environ.get("EURUS_MPL_COLORS", "[]"))
+if _colors:
+    matplotlib.rcParams["axes.prop_cycle"] = matplotlib.cycler(color=_colors)
+# Pre-import scientific stack
+import pandas as pd
+import numpy as np
+import xarray as xr
+from datetime import datetime, timedelta
+# Set up execution globals with pre-loaded libraries
+exec_globals = {
+    "__builtins__": __builtins__,
+    "pd": pd,
+    "np": np,
+    "xr": xr,
+    "plt": plt,
+    "mcolors": mcolors,
+    "cm": cm,
+    "datetime": datetime,
+    "timedelta": timedelta,
+    "PLOTS_DIR": os.environ.get("EURUS_PLOTS_DIR", "plots"),
+}
+# Signal readiness
+print("SUBPROCESS_READY", flush=True)
+while True:
+    try:
+        line = input()
+        if line == "EXIT_SUBPROCESS":
+            break
+        cmd = json.loads(line)
+        if cmd["type"] == "exec":
+            code = cmd["code"]
+            stdout_capture = StringIO()
+            stderr_capture = StringIO()
+            old_stdout, old_stderr = sys.stdout, sys.stderr
+            try:
+                sys.stdout = stdout_capture
+                sys.stderr = stderr_capture
+                # Try eval first (expression mode), fall back to exec
+                try:
+                    compiled = compile(code, "<repl>", "eval")
+                    result = eval(compiled, exec_globals)
+                    output = stdout_capture.getvalue()
+                    if result is not None:
+                        output += repr(result)
+                    if not output.strip():
+                        output = repr(result) if result is not None else "(No output)"
+                except SyntaxError:
+                    # Jupyter-style: auto-print last expression in multi-line code
+                    import ast as _ast
+                    try:
+                        tree = _ast.parse(code)
+                        if tree.body and isinstance(tree.body[-1], _ast.Expr):
+                            # Separate the last expression from preceding stmts
+                            last_expr_node = tree.body.pop()
+                            if tree.body:
+                                exec(compile(_ast.Module(body=tree.body, type_ignores=[]), "<repl>", "exec"), exec_globals)
+                            result = eval(compile(_ast.Expression(body=last_expr_node.value), "<repl>", "eval"), exec_globals)
+                            output = stdout_capture.getvalue()
+                            if result is not None:
+                                output += repr(result) if not output.strip() else "\n" + repr(result)
+                        else:
+                            exec(code, exec_globals)
+                            output = stdout_capture.getvalue()
+                    except SyntaxError:
+                        exec(code, exec_globals)
+                        output = stdout_capture.getvalue()
+                    if not output.strip():
+                        output = "(Executed successfully. Use print() to see results.)"
+                sys.stdout, sys.stderr = old_stdout, old_stderr
+                result_json = {
+                    "status": "success",
+                    "stdout": output.strip(),
+                    "stderr": stderr_capture.getvalue(),
+                }
+            except Exception as e:
+                sys.stdout, sys.stderr = old_stdout, old_stderr
+                import traceback
+                result_json = {
+                    "status": "error",
+                    "error": f"Error: {str(e)}\n{traceback.format_exc()}",
+                    "stdout": stdout_capture.getvalue(),
+                    "stderr": stderr_capture.getvalue(),
+                }
+            finally:
+                plt.close("all")
+                gc.collect()
+            print(json.dumps(result_json), flush=True)
+    except EOFError:
+        break
+    except Exception as e:
+        # Fatal error in the communication loop itself
+        old_stdout = sys.__stdout__
+        sys.stdout = old_stdout
+        print(json.dumps({"status": "fatal", "error": str(e)}), flush=True)
+'''
+class PersistentREPL:
+    """
+    Manages a persistent Python subprocess for code execution.
+    Provides true process isolation with clean kill on timeout.
+    """
+    def __init__(self, working_dir: str = "."):
+        self._working_dir = working_dir
+        self._process: Optional[subprocess.Popen] = None
+        self._temp_script: Optional[str] = None
+        self._lock = threading.Lock()  # Serialize access per instance
+        self._start_subprocess()
+    def _start_subprocess(self):
+        """Start a new Python subprocess with Eurus environment."""
+        # Write the subprocess script to a temp file
+        with tempfile.NamedTemporaryFile(
+            mode="w", suffix=".py", delete=False, prefix="eurus_repl_"
+        ) as f:
+            f.write(_SUBPROCESS_SCRIPT)
+            self._temp_script = f.name
+        # Build env: inject matplotlib style + PLOTS_DIR
+        env = os.environ.copy()
+        env["EURUS_MPL_STYLE"] = json.dumps(
+            {k: v for k, v in _EURUS_STYLE.items() if isinstance(v, (int, float, str, bool))}
+        )
+        env["EURUS_MPL_COLORS"] = json.dumps(_EURUS_COLORS)
+        env["EURUS_PLOTS_DIR"] = str(PLOTS_DIR)
+        env["MPLBACKEND"] = "Agg"
+        env["PYTHONUNBUFFERED"] = "1"
+        self._process = subprocess.Popen(
+            [sys.executable, "-u", self._temp_script],
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=0,
+            cwd=self._working_dir if os.path.isdir(self._working_dir) else None,
+            env=env,
+        )
+        # Wait for ready signal
+        ready_line = self._process.stdout.readline()
+        if "SUBPROCESS_READY" not in ready_line:
+            raise RuntimeError(f"Subprocess failed to start: {ready_line!r}")
+        logger.info("Started REPL subprocess (PID: %d)", self._process.pid)
+    def _ensure_alive(self):
+        """Restart subprocess if it has died."""
+        if self._process is None or self._process.poll() is not None:
+            logger.warning("REPL subprocess died — restarting")
+            self._cleanup_process()
+            self._start_subprocess()
+    def run(self, code: str, timeout: int = 300) -> str:
+        """Execute code in the subprocess. Returns output string."""
+        with self._lock:
+            self._ensure_alive()
+            cmd = json.dumps({"type": "exec", "code": code}) + "\n"
+            try:
+                self._process.stdin.write(cmd)
+                self._process.stdin.flush()
+            except (BrokenPipeError, OSError) as e:
+                logger.error("Subprocess stdin broken: %s — restarting", e)
+                self._cleanup_process()
+                self._start_subprocess()
+                return f"Error: REPL subprocess crashed. Please re-run your code."
+            # Read response with timeout
+            result_line = self._read_with_timeout(timeout)
+            if result_line is None:
+                # Timeout — kill subprocess and restart
+                logger.warning("REPL execution timed out after %ds — killing subprocess", timeout)
+                self._kill_subprocess()
+                self._start_subprocess()
+                return (
+                    "TIMEOUT ERROR: Execution exceeded "
+                    f"{timeout} seconds ({timeout // 60} min). "
+                    "TIP: Resample data to daily/monthly before plotting "
+                    "(e.g., ds.resample(time='D').mean())."
+                )
+            try:
+                result = json.loads(result_line)
+            except json.JSONDecodeError:
+                return f"Error: Malformed response from subprocess: {result_line!r}"
+            if result["status"] == "success":
+                output = result.get("stdout", "")
+                stderr = result.get("stderr", "")
+                if stderr:
+                    output = f"{output}\n{stderr}" if output else stderr
+                return output or "(No output)"
+            elif result["status"] == "error":
+                return result.get("error", "Unknown error")
+            else:
+                return f"Fatal subprocess error: {result.get('error', 'Unknown')}"
+    def _read_with_timeout(self, timeout: int) -> Optional[str]:
+        """Read one line from subprocess stdout with a timeout."""
+        result = [None]
+        def _reader():
+            try:
+                result[0] = self._process.stdout.readline()
+            except Exception:
+                pass
+        reader_thread = threading.Thread(target=_reader, daemon=True)
+        reader_thread.start()
+        reader_thread.join(timeout=timeout)
+        if reader_thread.is_alive():
+            return None  # Timed out
+        return result[0] if result[0] else None
+    def _kill_subprocess(self):
+        """Force-kill the subprocess."""
+        if self._process:
+            try:
+                self._process.terminate()
+                try:
+                    self._process.wait(timeout=3)
+                except subprocess.TimeoutExpired:
+                    self._process.kill()
+                    self._process.wait(timeout=2)
+            except Exception as e:
+                logger.error("Error killing subprocess: %s", e)
+            self._process = None
+    def _cleanup_process(self):
+        """Clean up subprocess and temp files."""
+        self._kill_subprocess()
+        if self._temp_script and os.path.exists(self._temp_script):
+            try:
+                os.unlink(self._temp_script)
+            except OSError:
+                pass
+            self._temp_script = None
+    def _update_plots_dir(self, plots_dir: str):
+        """Update the PLOTS_DIR used by the subprocess."""
+        if self._process and self._process.poll() is None:
+            try:
+                # Send a command to update the plots directory in the subprocess
+                cmd = f"import os; os.environ['EURUS_PLOTS_DIR'] = {plots_dir!r}; PLOTS_DIR = {plots_dir!r}\n"
+                self._process.stdin.write(cmd)
+                self._process.stdin.flush()
+                # Clear the response
+                self._read_response(timeout=2)
+            except Exception as e:
+                logger.warning("Failed to update plots_dir in subprocess: %s", e)
+    def close(self):
+        """Gracefully shutdown the subprocess."""
+        if self._process and self._process.poll() is None:
+            try:
+                self._process.stdin.write("EXIT_SUBPROCESS\n")
+                self._process.stdin.flush()
+                self._process.wait(timeout=3)
+                logger.info("REPL subprocess exited gracefully (PID: %d)", self._process.pid)
+            except Exception:
+                self._kill_subprocess()
+        self._cleanup_process()
+# =============================================================================
+# LANGCHAIN TOOL
+# =============================================================================
+class PythonREPLInput(BaseModel):
+    code: str = Field(description="The Python code to execute.")
+class PythonREPLTool(BaseTool):
+    name: str = "python_repl"
+    description: str = (
+        "A Python REPL for data analysis and visualization.\n\n"
+        "CRITICAL PLOTTING RULES:\n"
+        "1. ALWAYS save to PLOTS_DIR: plt.savefig(f'{PLOTS_DIR}/filename.png')\n"
+        "2. Use descriptive filenames (e.g., 'route_risk_map.png')\n"
+        "\n\n"
+        "MEMORY RULES:\n"
+        "1. NEVER use .load() or .compute() on large datasets\n"
+        "2. Resample multi-year data first: ds.resample(time='D').mean()\n"
+        "3. Use .sel() to subset data before operations\n\n"
+        "Pre-loaded: pd, np, xr, plt, mcolors, cm, datetime, timedelta, PLOTS_DIR (string path)"
+    )
+    args_schema: Type[BaseModel] = PythonREPLInput
+    working_dir: str = "."
+    _repl: Optional[PersistentREPL] = None
+    _plot_callback: Optional[Callable] = None  # For web interface
+    _displayed_plots: set = set()
+    _plots_dir: Optional[str] = None  # Session-specific plot directory
+    def __init__(self, working_dir: str = ".", plots_dir: Optional[str] = None, **kwargs):
+        super().__init__(**kwargs)
+        self.working_dir = working_dir
+        self._plot_callback = None
+        self._displayed_plots = set()
+        self._plots_dir = plots_dir or str(PLOTS_DIR)
+        # Ensure the plots directory exists
+        Path(self._plots_dir).mkdir(parents=True, exist_ok=True)
+        self._repl = PersistentREPL(working_dir=working_dir)
+        # Override the subprocess PLOTS_DIR env var to use session-specific dir
+        if plots_dir:
+            self._repl._update_plots_dir(plots_dir)
+    def set_plot_callback(self, callback: Callable):
+        """Set callback for plot capture (used by web interface)."""
+        self._plot_callback = callback
+    def close(self):
+        """Clean up subprocess resources."""
+        if self._repl:
+            self._repl.close()
+            self._repl = None
+    def _display_image_in_terminal(self, filepath: str, base64_data: str):
+        """Display image in terminal — iTerm2/VSCode inline, or macOS Preview fallback."""
+        # Skip if already displayed this file in this session
+        if filepath in self._displayed_plots:
+            return
+        self._displayed_plots.add(filepath)
+        try:
+            term_program = os.environ.get("TERM_PROGRAM", "")
+            # iTerm2 inline image protocol (only iTerm2 supports this)
+            if "iTerm.app" in term_program:
+                sys.stdout.write(f"\033]1337;File=inline=1;width=auto;preserveAspectRatio=1:{base64_data}\a\n")
+                sys.stdout.flush()
+                return
+            # Fallback: open in Preview on macOS (only in CLI, not web)
+            if not self._plot_callback and os.path.exists(filepath):
+                import subprocess as _sp
+                _sp.Popen(["open", filepath], stdout=_sp.DEVNULL, stderr=_sp.DEVNULL)
+        except Exception as e:
+            logger.warning(f"Failed to display image in terminal: {e}")
+    def _capture_and_notify_plots(self, saved_files: list, code: str = ""):
+        """Capture plots and notify via callback."""
+        for filepath in saved_files:
+            try:
+                if os.path.exists(filepath):
+                    with open(filepath, 'rb') as f:
+                        img_data = f.read()
+                    b64_data = base64.b64encode(img_data).decode('utf-8')
+                    # Display in terminal
+                    self._display_image_in_terminal(filepath, b64_data)
+                    # Send to web UI via callback
+                    if self._plot_callback:
+                        self._plot_callback(b64_data, filepath, code)
+            except Exception as e:
+                print(f"Warning: Failed to capture plot {filepath}: {e}")
+    def _run(self, code: str) -> str:
+        """Execute the python code in the subprocess and return the output."""
+        plots_dir = self._plots_dir or str(PLOTS_DIR)
+        # Snapshot plots directory BEFORE execution
+        image_exts = {'.png', '.jpg', '.jpeg', '.svg', '.pdf', '.gif', '.webp'}
+        try:
+            before_files = {
+                f: os.path.getmtime(os.path.join(plots_dir, f))
+                for f in os.listdir(plots_dir)
+                if os.path.splitext(f)[1].lower() in image_exts
+            }
+        except FileNotFoundError:
+            before_files = {}
+        # Execute in subprocess
+        output = self._repl.run(code, timeout=300)
+        # Detect NEW plot files by comparing directory snapshots
+        try:
+            after_files = {
+                f: os.path.getmtime(os.path.join(plots_dir, f))
+                for f in os.listdir(plots_dir)
+                if os.path.splitext(f)[1].lower() in image_exts
+            }
+        except FileNotFoundError:
+            after_files = {}
+        new_files = []
+        for fname, mtime in after_files.items():
+            full_path = os.path.join(plots_dir, fname)
+            if fname not in before_files or mtime > before_files[fname]:
+                if full_path not in self._displayed_plots:
+                    new_files.append(full_path)
+        if new_files:
+            print(f"📊 {len(new_files)} plot(s) saved")
+            self._capture_and_notify_plots(new_files, code)
+        return output
+    async def _arun(self, code: str) -> str:
+        """Use the tool asynchronously — avoids blocking the event loop."""
+        import asyncio
+        loop = asyncio.get_event_loop()
+        return await loop.run_in_executor(None, self._run, code)

src/eurus/tools/routing.py ADDED Viewed

	@@ -0,0 +1,289 @@

+"""
+Maritime Routing Tool
+=====================
+Strictly calculates maritime routes using global shipping lane graphs.
+Does NOT perform weather analysis. Returns waypoints for the Agent to analyze.
+Dependencies:
+- scgraph (for maritime pathfinding)
+"""
+import logging
+from datetime import datetime, timedelta
+from typing import List, Tuple, Any
+from pydantic import BaseModel, Field
+from langchain_core.tools import StructuredTool
+logger = logging.getLogger(__name__)
+# Check for optional dependencies
+HAS_ROUTING_DEPS = False
+try:
+    import scgraph
+    from scgraph.geographs.marnet import marnet_geograph
+    HAS_ROUTING_DEPS = True
+except ImportError:
+    pass
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+def _normalize_lon(lon: float) -> float:
+    """Convert longitude to -180 to 180 range (scgraph format)."""
+    # Efficient modulo operation - prevents infinite loop on extreme values
+    return ((lon + 180) % 360) - 180
+def _get_maritime_path(origin: Tuple[float, float], dest: Tuple[float, float]) -> List[Tuple[float, float]]:
+    """Calculate shortest maritime path using scgraph."""
+    if not HAS_ROUTING_DEPS:
+        raise ImportError("Dependency 'scgraph' is missing.")
+    # Normalize longitudes for scgraph (-180 to 180)
+    origin_lon = _normalize_lon(origin[1])
+    dest_lon = _normalize_lon(dest[1])
+    graph = marnet_geograph
+    path_dict = graph.get_shortest_path(
+        origin_node={"latitude": origin[0], "longitude": origin_lon},
+        destination_node={"latitude": dest[0], "longitude": dest_lon}
+    )
+    return [(p[0], p[1]) for p in path_dict.get('coordinate_path', [])]
+def _interpolate_route(
+    path: List[Tuple[float, float]],
+    speed_knots: float,
+    departure: datetime
+) -> List[dict]:
+    """Convert path to waypoints with timestamps. Keeps ALL points for risk assessment."""
+    try:
+        from geopy.distance import great_circle
+    except ImportError:
+        # Proper Haversine fallback for accurate distance at all latitudes
+        import math
+        from collections import namedtuple
+        Distance = namedtuple('Distance', ['km'])
+        def great_circle(p1, p2):
+            lat1, lon1 = math.radians(p1[0]), math.radians(p1[1])
+            lat2, lon2 = math.radians(p2[0]), math.radians(p2[1])
+            dlat = lat2 - lat1
+            dlon = lon2 - lon1
+            a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
+            c = 2 * math.asin(math.sqrt(min(1.0, a)))
+            return Distance(km=6371 * c)  # Earth radius in km
+    speed_kmh = speed_knots * 1.852
+    waypoints = []
+    current_time = departure
+    # Add ALL points from scgraph - each is a navigation waypoint
+    # Risk assessment needs every geographic point, not time-filtered ones
+    for i, point in enumerate(path):
+        if i == 0:
+            step = "Origin"
+        elif i == len(path) - 1:
+            step = "Destination"
+        else:
+            step = f"Waypoint {i}"
+        # Calculate time to reach this point
+        if i > 0:
+            prev = path[i-1]
+            dist = great_circle(prev, point).km
+            hours = dist / speed_kmh if speed_kmh > 0 else 0
+            current_time += timedelta(hours=hours)
+        waypoints.append({
+            "lat": point[0],
+            "lon": point[1],
+            "time": current_time.strftime("%Y-%m-%d %H:%M"),
+            "step": step
+        })
+    return waypoints
+# ============================================================================
+# TOOL FUNCTION
+# ============================================================================
+def calculate_maritime_route(
+    origin_lat: float,
+    origin_lon: float,
+    dest_lat: float,
+    dest_lon: float,
+    month: int,
+    year: int = None,
+    speed_knots: float = 14.0
+) -> str:
+    """
+    Calculates the detailed maritime route waypoints.
+    """
+    if not HAS_ROUTING_DEPS:
+        return "Error: 'scgraph' not installed."
+    if not (1 <= month <= 12):
+        return f"Error: month must be 1-12, got {month}."
+    try:
+        path = _get_maritime_path((origin_lat, origin_lon), (dest_lat, dest_lon))
+        # Use provided year or calculate based on current date
+        if year is None:
+            now = datetime.now()
+            year = now.year if month >= now.month else now.year + 1
+        departure = datetime(year, month, 15)
+        waypoints = _interpolate_route(path, speed_knots, departure)
+        # Calculate bounding box with buffer for weather data
+        lats = [w['lat'] for w in waypoints]
+        lons = [w['lon'] for w in waypoints]
+        min_lat = max(-90, min(lats) - 5)
+        max_lat = min(90, max(lats) + 5)
+        # Detect dateline crossing: if lon range > 180°, the route crosses -180/+180
+        lon_range = max(lons) - min(lons)
+        if lon_range > 180:
+            # Route crosses dateline - need to recalculate
+            # Split lons into positive and negative, find the gap
+            pos_lons = [l for l in lons if l >= 0]
+            neg_lons = [l for l in lons if l < 0]
+            if pos_lons and neg_lons:
+                # Route goes from ~+179 to ~-179 - use 0-360 system
+                lons_360 = [(l + 360) if l < 0 else l for l in lons]
+                min_lon = max(0, min(lons_360) - 5)
+                max_lon = min(360, max(lons_360) + 5)
+            else:
+                min_lon = max(-180, min(lons) - 5)
+                max_lon = min(180, max(lons) + 5)
+        else:
+            min_lon = max(-180, min(lons) - 5)
+            max_lon = min(180, max(lons) + 5)
+        # Format waypoints as Python-ready list (keep original -180/+180 format)
+        waypoint_list = "[\n" + ",\n".join([
+            f"    ({w['lat']:.2f}, {w['lon']:.2f})"
+            for w in waypoints
+        ]) + "\n]"
+        # Calculate total distance
+        total_nm = 0
+        try:
+            from geopy.distance import great_circle
+            for i in range(1, len(waypoints)):
+                d = great_circle(
+                    (waypoints[i-1]['lat'], waypoints[i-1]['lon']),
+                    (waypoints[i]['lat'], waypoints[i]['lon'])
+                ).nautical
+                total_nm += d
+        except ImportError:
+            # Haversine fallback for distance calculation
+            import math
+            for i in range(1, len(waypoints)):
+                lat1, lon1 = math.radians(waypoints[i-1]['lat']), math.radians(waypoints[i-1]['lon'])
+                lat2, lon2 = math.radians(waypoints[i]['lat']), math.radians(waypoints[i]['lon'])
+                dlat, dlon = lat2 - lat1, lon2 - lon1
+                a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
+                c = 2 * math.asin(math.sqrt(min(1.0, a)))
+                total_nm += 6371 * c / 1.852  # km to nm
+        eta_days = total_nm / (speed_knots * 24)
+        output = f"""
+================================================================================
+                        MARITIME ROUTE CALCULATION COMPLETE
+================================================================================
+ROUTE SUMMARY:
+  Origin:      ({origin_lat:.2f}, {origin_lon:.2f})
+  Destination: ({dest_lat:.2f}, {dest_lon:.2f})
+  Distance:    ~{total_nm:.0f} nautical miles
+  Speed:       {speed_knots} knots
+  ETA:         ~{eta_days:.1f} days
+  Waypoints:   {len(waypoints)} checkpoints
+WAYPOINT COORDINATES (for risk analysis):
+{waypoint_list}
+DATA REGION (with 5° buffer):
+  Latitude:  [{min_lat:.1f}, {max_lat:.1f}]
+  Longitude: [{min_lon:.1f}, {max_lon:.1f}]
+================================================================================
+                        MANDATORY RISK ASSESSMENT PROTOCOL
+================================================================================
+STEP 1: DOWNLOAD CLIMATOLOGICAL DATA
+  Call `retrieve_era5_data` with:
+    - variable: 'u10' and 'v10' (10m wind components) for wind speed analysis
+    - query_type: 'spatial'
+    - region bounds: lat=[{min_lat:.1f}, {max_lat:.1f}], lon=[{min_lon:.1f}, {max_lon:.1f}]
+    - dates: Month {month} for LAST 3 YEARS (e.g., {month}/2021, {month}/2022, {month}/2023)
+  ⚠️ WARNING: Large bounding boxes can cause OOM/timeout!
+  If (max_lon - min_lon) > 60° or (max_lat - min_lat) > 40°:
+    - Do NOT download spatial data for the whole route at once
+    - Instead, iterate through waypoints and download small chunks
+    - Or sample every Nth waypoint for point-based temporal queries
+  WHY 3 YEARS? To build climatological statistics, not just one snapshot.
+STEP 2: GET ANALYSIS PROTOCOL
+  Call `get_analysis_guide(topic='maritime_visualization')`
+  Or for full workflow: `get_analysis_guide(topic='maritime_route')`
+  This will provide methodology for:
+    - Lagrangian risk assessment (ship vs. stationary climate data)
+    - Threshold definitions (what wind speed is dangerous)
+    - Risk aggregation formulas
+    - Route deviation recommendations
+STEP 3: EXECUTE ANALYSIS
+  Use python_repl to:
+    1. Load the downloaded data
+    2. Extract values at each waypoint
+    3. Calculate risk metrics per the methodology
+    4. Generate risk map and report
+================================================================================
+"""
+        return output
+    except Exception as e:
+        return f"Routing Calculation Failed: {str(e)}"
+# ============================================================================
+# ARGUMENT SCHEMA
+# ============================================================================
+class RouteArgs(BaseModel):
+    origin_lat: float = Field(description="Latitude of origin")
+    origin_lon: float = Field(description="Longitude of origin")
+    dest_lat: float = Field(description="Latitude of destination")
+    dest_lon: float = Field(description="Longitude of destination")
+    month: int = Field(description="Month of travel (1-12)")
+    year: int = Field(default=None, description="Year for analysis. Defaults to upcoming occurrence of month.")
+    speed_knots: float = Field(default=14.0, description="Speed in knots")
+# ============================================================================
+# LANGCHAIN TOOL
+# ============================================================================
+routing_tool = StructuredTool.from_function(
+    func=calculate_maritime_route,
+    name="calculate_maritime_route",
+    description="Calculates a realistic maritime route (avoiding land). Returns a list of time-stamped waypoints. DOES NOT check weather.",
+    args_schema=RouteArgs
+)

tests/test_config.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import pytest
+from eurus.config import ERA5_VARIABLES, VARIABLE_ALIASES, get_variable_info, get_short_name
+# All 22 variables in the Arraylake dataset
+ALL_ARRAYLAKE_VARS = [
+    "blh", "cape", "cp", "d2", "lsp", "mslp", "sd", "skt", "sp",
+    "ssr", "ssrd", "sst", "stl1", "swvl1", "t2", "tcc", "tcw",
+    "tcwv", "u10", "u100", "v10", "v100",
+]
+# tp is a derived/accumulated variable kept for convenience
+ALL_CATALOG_VARS = sorted(ALL_ARRAYLAKE_VARS + ["tp"])
+def test_variable_catalog_has_all_22():
+    """Every Arraylake variable must appear in ERA5_VARIABLES."""
+    for var in ALL_ARRAYLAKE_VARS:
+        assert var in ERA5_VARIABLES, f"Missing variable: {var}"
+def test_total_variable_count():
+    """Catalog should contain at least 22 variables (22 Arraylake + tp)."""
+    assert len(ERA5_VARIABLES) >= 22
+def test_variable_loading():
+    """Test that ERA5 variables are loaded correctly."""
+    assert "sst" in ERA5_VARIABLES
+    assert "t2" in ERA5_VARIABLES
+    assert "u10" in ERA5_VARIABLES
+    sst_info = ERA5_VARIABLES["sst"]
+    assert sst_info.units == "K"
+    assert sst_info.short_name == "sst"
+def test_new_variables_metadata():
+    """Spot-check metadata on newly added variables."""
+    # Boundary layer height
+    blh = ERA5_VARIABLES["blh"]
+    assert blh.units == "m"
+    assert blh.category == "atmosphere"
+    # Dewpoint
+    d2 = ERA5_VARIABLES["d2"]
+    assert d2.units == "K"
+    # Soil moisture
+    swvl1 = ERA5_VARIABLES["swvl1"]
+    assert "m³/m³" in swvl1.units
+    assert swvl1.category == "land_surface"
+    # 100m wind
+    u100 = ERA5_VARIABLES["u100"]
+    assert u100.units == "m/s"
+    # Radiation
+    ssrd = ERA5_VARIABLES["ssrd"]
+    assert "J/m²" in ssrd.units
+    assert ssrd.category == "radiation"
+def test_get_variable_info():
+    """Test helper function for retrieving variable info."""
+    # Test case insensitive
+    assert get_variable_info("SST") == ERA5_VARIABLES["sst"]
+    assert get_variable_info("Sea_Surface_Temperature") == ERA5_VARIABLES["sst"]
+    assert get_variable_info("non_existent_var") is None
+    # Test new aliases
+    assert get_variable_info("dewpoint") == ERA5_VARIABLES["d2"]
+    assert get_variable_info("soil_moisture") == ERA5_VARIABLES["swvl1"]
+    assert get_variable_info("boundary_layer_height") == ERA5_VARIABLES["blh"]
+    assert get_variable_info("snow_depth") == ERA5_VARIABLES["sd"]
+def test_get_short_name():
+    """Test retrieval of short names."""
+    assert get_short_name("SST") == "sst"
+    assert get_short_name("Sea_Surface_Temperature") == "sst"
+    # Fallback to lower case input
+    assert get_short_name("UNKNOWN_VAR") == "unknown_var"
+    # New aliases
+    assert get_short_name("skin_temperature") == "skt"
+    assert get_short_name("100m_u_component_of_wind") == "u100"
+    assert get_short_name("total_column_water_vapour") == "tcwv"
+def test_agent_prompt_branding():
+    """Test that the system prompt contains the Eurus branding."""
+    from eurus.config import AGENT_SYSTEM_PROMPT
+    assert "Eurus" in AGENT_SYSTEM_PROMPT
+    assert "Comrade Copernicus" not in AGENT_SYSTEM_PROMPT
+    assert "PANGAEA" not in AGENT_SYSTEM_PROMPT
+def test_agent_prompt_lists_all_variables():
+    """System prompt should mention all 22 Arraylake variable short names."""
+    from eurus.config import AGENT_SYSTEM_PROMPT
+    for var in ALL_ARRAYLAKE_VARS:
+        assert var in AGENT_SYSTEM_PROMPT, (
+            f"System prompt missing variable: {var}"
+        )

tests/test_e2e.py ADDED Viewed

	@@ -0,0 +1,368 @@

+"""
+End-to-End Tests for Eurus
+===========================
+These tests use REAL API calls to verify the complete workflow.
+Requires valid API keys in .env file.
+Run with: pytest tests/test_e2e.py -v -s
+Use -s flag to see output from data retrieval.
+"""
+import os
+import pytest
+import tempfile
+import shutil
+from pathlib import Path
+from datetime import datetime, timedelta
+from dotenv import load_dotenv
+# Load .env file
+load_dotenv()
+# ============================================================================
+# FIXTURES
+# ============================================================================
+@pytest.fixture(scope="module")
+def temp_data_dir():
+    """Create temporary data directory for tests."""
+    temp_dir = tempfile.mkdtemp(prefix="eurus_e2e_")
+    yield temp_dir
+    # Cleanup after all tests
+    shutil.rmtree(temp_dir, ignore_errors=True)
+@pytest.fixture(scope="module")
+def has_arraylake_key():
+    """Check if Arraylake API key is available."""
+    key = os.environ.get("ARRAYLAKE_API_KEY")
+    if not key:
+        pytest.skip("ARRAYLAKE_API_KEY not found in environment")
+    return True
+# ============================================================================
+# E2E: ERA5 DATA RETRIEVAL
+# ============================================================================
+class TestERA5Retrieval:
+    """End-to-end tests for ERA5 data retrieval."""
+    @pytest.mark.slow
+    def test_retrieve_sst_temporal_small_region(self, has_arraylake_key, temp_data_dir):
+        """
+        E2E Test: Retrieve SST data for a small region and short time period.
+        This tests the complete retrieval pipeline.
+        """
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.memory import reset_memory
+        # Reset memory for clean state
+        reset_memory()
+        # Use a small request to minimize download time
+        result = retrieve_era5_data(
+            query_type="temporal",
+            variable_id="sst",
+            start_date="2023-01-01",
+            end_date="2023-01-07",  # Just 1 week
+            min_latitude=25.0,
+            max_latitude=30.0,
+            min_longitude=260.0,  # Gulf of Mexico
+            max_longitude=265.0,
+        )
+        print(f"\n=== ERA5 Retrieval Result ===\n{result}\n")
+        # Verify success
+        assert "SUCCESS" in result or "CACHE HIT" in result
+        assert "sst" in result.lower()
+        assert ".zarr" in result
+    @pytest.mark.slow
+    def test_retrieve_t2m_spatial(self, has_arraylake_key, temp_data_dir):
+        """
+        E2E Test: Retrieve 2m temperature as spatial data.
+        Tests spatial query type.
+        """
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.memory import reset_memory
+        reset_memory()
+        result = retrieve_era5_data(
+            query_type="spatial",
+            variable_id="t2",  # 2m temperature
+            start_date="2023-06-01",
+            end_date="2023-06-03",  # Just 3 days
+            min_latitude=40.0,
+            max_latitude=50.0,
+            min_longitude=0.0,
+            max_longitude=10.0,  # Western Europe
+        )
+        print(f"\n=== T2M Spatial Result ===\n{result}\n")
+        assert "SUCCESS" in result or "CACHE HIT" in result
+    @pytest.mark.slow
+    def test_retrieve_and_load_dataset(self, has_arraylake_key, temp_data_dir):
+        """
+        E2E Test: Retrieve data and verify it can be loaded with xarray.
+        Tests the full data integrity pipeline.
+        """
+        import xarray as xr
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.memory import reset_memory, get_memory
+        reset_memory()
+        result = retrieve_era5_data(
+            query_type="temporal",
+            variable_id="sst",
+            start_date="2023-02-01",
+            end_date="2023-02-05",
+            min_latitude=20.0,
+            max_latitude=25.0,
+            min_longitude=270.0,
+            max_longitude=275.0,
+        )
+        assert "SUCCESS" in result or "CACHE HIT" in result
+        # Extract path from result
+        # Look for the path in the result string
+        lines = result.split('\n')
+        path = None
+        for line in lines:
+            if "Path:" in line:
+                path = line.split("Path:")[-1].strip()
+                break
+            if ".zarr" in line and "Load with" not in line:
+                # Try to find zarr path
+                parts = line.split()
+                for part in parts:
+                    if ".zarr" in part:
+                        path = part.strip()
+                        break
+        if path and os.path.exists(path):
+            # Load and verify dataset
+            ds = xr.open_dataset(path, engine='zarr')
+            print(f"\n=== Loaded Dataset ===")
+            print(f"Variables: {list(ds.data_vars)}")
+            print(f"Dimensions: {dict(ds.dims)}")
+            print(f"Time range: {ds.time.values[0]} to {ds.time.values[-1]}")
+            assert 'sst' in ds.data_vars
+            assert 'time' in ds.dims
+            assert ds.dims['time'] > 0
+            ds.close()
+# ============================================================================
+# E2E: PYTHON REPL ANALYSIS
+# ============================================================================
+class TestREPLAnalysis:
+    """End-to-end tests for REPL-based data analysis."""
+    def test_repl_numpy_computation(self):
+        """
+        E2E Test: Use REPL to perform numpy computation.
+        """
+        from eurus.tools.repl import PythonREPLTool
+        repl = PythonREPLTool()
+        code = """
+import numpy as np
+data = np.random.randn(100)
+mean = np.mean(data)
+std = np.std(data)
+print(f"Mean: {mean:.4f}, Std: {std:.4f}")
+"""
+        result = repl._run(code)
+        print(f"\n=== REPL Result ===\n{result}\n")
+        assert "Mean:" in result
+        assert "Std:" in result
+        assert "Error" not in result
+    def test_repl_pandas_dataframe(self):
+        """
+        E2E Test: Use REPL to create and manipulate pandas DataFrame.
+        """
+        from eurus.tools.repl import PythonREPLTool
+        repl = PythonREPLTool()
+        code = """
+import pandas as pd
+import numpy as np
+df = pd.DataFrame({
+    'date': pd.date_range('2023-01-01', periods=10),
+    'temperature': np.random.randn(10) * 5 + 20,
+    'humidity': np.random.randn(10) * 10 + 60
+})
+print("DataFrame created:")
+print(df.head())
+print(f"\\nStats: Mean temp = {df['temperature'].mean():.2f}")
+"""
+        result = repl._run(code)
+        print(f"\n=== Pandas Result ===\n{result}\n")
+        assert "DataFrame created" in result
+        assert "temperature" in result
+        assert "Error" not in result
+    @pytest.mark.slow
+    def test_repl_load_and_analyze_data(self, has_arraylake_key):
+        """
+        E2E Test: Retrieve ERA5 data, then analyze it in REPL.
+        Full workflow test.
+        """
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.tools.repl import PythonREPLTool
+        from eurus.memory import reset_memory
+        import xarray as xr
+        reset_memory()
+        # Step 1: Retrieve data
+        result = retrieve_era5_data(
+            query_type="temporal",
+            variable_id="sst",
+            start_date="2023-03-01",
+            end_date="2023-03-05",
+            min_latitude=25.0,
+            max_latitude=28.0,
+            min_longitude=265.0,
+            max_longitude=268.0,
+        )
+        assert "SUCCESS" in result or "CACHE HIT" in result
+        # Extract path
+        path = None
+        for line in result.split('\n'):
+            if "Path:" in line:
+                path = line.split("Path:")[-1].strip()
+                break
+        if not path or not os.path.exists(path):
+            pytest.skip("Could not extract data path")
+        # Step 2: Analyze in REPL
+        repl = PythonREPLTool()
+        analysis_code = f"""
+import xarray as xr
+import numpy as np
+# Load the dataset
+ds = xr.open_dataset('{path}', engine='zarr')
+data = ds['sst']
+# Calculate statistics
+spatial_mean = data.mean(dim=['latitude', 'longitude'])
+time_mean = data.mean(dim='time')
+print("=== SST Analysis ===")
+print(f"Time points: {{len(data.time)}}")
+print(f"Spatial shape: {{data.shape}}")
+print(f"Overall mean: {{float(data.mean()):.2f}} K")
+print(f"Overall std: {{float(data.std()):.2f}} K")
+print(f"Min: {{float(data.min()):.2f}} K, Max: {{float(data.max()):.2f}} K")
+"""
+        analysis_result = repl._run(analysis_code)
+        print(f"\n=== Analysis Result ===\n{analysis_result}\n")
+        assert "SST Analysis" in analysis_result
+        assert "Error" not in analysis_result or "Security" not in analysis_result
+# ============================================================================
+# E2E: MEMORY PERSISTENCE
+# ============================================================================
+class TestMemoryPersistence:
+    """End-to-end tests for memory and dataset tracking."""
+    @pytest.mark.slow
+    def test_memory_tracks_downloaded_data(self, has_arraylake_key):
+        """
+        E2E Test: Verify memory tracks downloaded datasets.
+        """
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.memory import reset_memory, get_memory
+        reset_memory()
+        memory = get_memory()
+        # Initial state - no datasets
+        initial_datasets = memory.list_datasets()
+        # Download data
+        result = retrieve_era5_data(
+            query_type="temporal",
+            variable_id="sst",
+            start_date="2023-04-01",
+            end_date="2023-04-03",
+            min_latitude=30.0,
+            max_latitude=32.0,
+            min_longitude=275.0,
+            max_longitude=278.0,
+        )
+        # Check memory registered the dataset
+        datasets = memory.list_datasets()
+        print(f"\n=== Registered Datasets ===\n{datasets}\n")
+        # Should have at least one dataset now
+        if "SUCCESS" in result:
+            assert len(datasets) > len(initial_datasets)
+# ============================================================================
+# E2E: ROUTING (if scgraph installed)
+# ============================================================================
+class TestRouting:
+    """End-to-end tests for maritime routing."""
+    def test_routing_without_deps(self):
+        """
+        E2E Test: Verify routing handles missing dependencies gracefully.
+        """
+        from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route
+        if not HAS_ROUTING_DEPS:
+            # Should return helpful error message
+            result = calculate_maritime_route(
+                origin_lat=53.5,
+                origin_lon=8.5,
+                dest_lat=52.4,
+                dest_lon=4.9,
+                month=6
+            )
+            print(f"\n=== Routing (no deps) ===\n{result}\n")
+            assert "scgraph" in result.lower() or "install" in result.lower()
+        else:
+            pytest.skip("scgraph is installed, skipping no-deps test")
+# ============================================================================
+# RUN WITH: pytest tests/test_e2e.py -v -s --tb=short
+# Add -m "not slow" to skip slow tests
+# ============================================================================
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s", "--tb=short"])

tests/test_edge_cases.py ADDED Viewed

	@@ -0,0 +1,210 @@

+"""
+Edge-Case & Hardening Tests for Eurus
+=======================================
+Focused on retrieval edge cases discovered during manual testing:
+prime-meridian crossing, future dates, invalid variables, filename
+generation, cache behaviour, and routing with real dependencies.
+Run with: pytest tests/test_edge_cases.py -v -s
+"""
+import os
+import pytest
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv()
+# ============================================================================
+# RETRIEVAL HELPERS — pure-logic, no API calls
+# ============================================================================
+class TestFilenameGeneration:
+    """Tests for generate_filename edge cases."""
+    def test_negative_longitude_in_filename(self):
+        from eurus.retrieval import generate_filename
+        name = generate_filename(
+            "sst", "temporal", "2023-01-01", "2023-01-31",
+            min_latitude=30.0, max_latitude=46.0,
+            min_longitude=-6.0, max_longitude=36.0,
+        )
+        assert name.endswith(".zarr")
+        assert "lat30.00_46.00" in name
+        assert "lon-6.00_36.00" in name
+    def test_region_tag_overrides_coords(self):
+        from eurus.retrieval import generate_filename
+        name = generate_filename(
+            "sst", "temporal", "2023-07-01", "2023-07-31",
+            min_latitude=30, max_latitude=46,
+            min_longitude=354, max_longitude=42,
+            region="mediterranean",
+        )
+        assert "mediterranean" in name
+        assert "lat" not in name  # region tag replaces coord string
+    def test_format_coord_near_zero(self):
+        from eurus.retrieval import _format_coord
+        assert _format_coord(0.003) == "0.00"
+        assert _format_coord(-0.004) == "0.00"
+        assert _format_coord(0.01) == "0.01"
+class TestFutureDateRejection:
+    """Ensure retrieval rejects future start dates without touching the API."""
+    def test_future_date_returns_error(self):
+        from eurus.retrieval import retrieve_era5_data
+        result = retrieve_era5_data(
+            query_type="temporal",
+            variable_id="sst",
+            start_date="2099-01-01",
+            end_date="2099-01-31",
+            min_latitude=0, max_latitude=10,
+            min_longitude=250, max_longitude=260,
+        )
+        assert "future" in result.lower()
+        assert "Error" in result
+# ============================================================================
+# E2E RETRIEVAL — require ARRAYLAKE_API_KEY
+# ============================================================================
+@pytest.fixture(scope="module")
+def has_arraylake_key():
+    key = os.environ.get("ARRAYLAKE_API_KEY")
+    if not key:
+        pytest.skip("ARRAYLAKE_API_KEY not set")
+    return True
+class TestPrimeMeridianCrossing:
+    """Verify data integrity when the request spans the 0° meridian."""
+    @pytest.mark.slow
+    def test_cross_meridian_longitude_continuity(self, has_arraylake_key):
+        """
+        Request u10 from -10°E to 15°E and check that the returned
+        longitude axis has no gaps (step ≈ 0.25° everywhere).
+        """
+        import numpy as np
+        import xarray as xr
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.memory import reset_memory
+        reset_memory()
+        result = retrieve_era5_data(
+            query_type="temporal",
+            variable_id="u10",
+            start_date="2024-01-15",
+            end_date="2024-01-17",  # small window
+            min_latitude=50.0,
+            max_latitude=55.0,
+            min_longitude=-10.0,
+            max_longitude=15.0,
+        )
+        assert "SUCCESS" in result or "CACHE HIT" in result
+        # Extract path and load
+        path = None
+        for line in result.split("\n"):
+            if "Path:" in line:
+                path = line.split("Path:")[-1].strip()
+                break
+        assert path and os.path.exists(path)
+        ds = xr.open_dataset(path, engine="zarr")
+        lons = ds["u10"].longitude.values
+        diffs = np.diff(lons)
+        # uniform step — no jump across 0°
+        assert diffs.max() < 1.0, f"Gap in longitude: max step = {diffs.max()}"
+        ds.close()
+class TestInvalidVariableHandling:
+    """Ensure retrieval returns a clear error for unavailable variables."""
+    @pytest.mark.slow
+    def test_swh_not_available(self, has_arraylake_key):
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.memory import reset_memory
+        reset_memory()
+        result = retrieve_era5_data(
+            query_type="temporal",
+            variable_id="swh",
+            start_date="2023-06-01",
+            end_date="2023-06-07",
+            min_latitude=40, max_latitude=50,
+            min_longitude=0, max_longitude=10,
+        )
+        assert "not found" in result.lower() or "Error" in result
+        assert "Available variables" in result or "available" in result.lower()
+class TestCacheHitBehaviour:
+    """Verify that repeated identical requests return CACHE HIT."""
+    @pytest.mark.slow
+    def test_second_request_is_cache_hit(self, has_arraylake_key):
+        from eurus.retrieval import retrieve_era5_data
+        from eurus.memory import reset_memory
+        reset_memory()
+        params = dict(
+            query_type="temporal",
+            variable_id="sst",
+            start_date="2023-08-01",
+            end_date="2023-08-03",
+            min_latitude=35.0, max_latitude=37.0,
+            min_longitude=15.0, max_longitude=18.0,
+        )
+        first = retrieve_era5_data(**params)
+        assert "SUCCESS" in first or "CACHE HIT" in first
+        second = retrieve_era5_data(**params)
+        assert "CACHE HIT" in second
+# ============================================================================
+# ROUTING WITH REAL DEPENDENCIES
+# ============================================================================
+class TestRoutingIntegration:
+    """Tests that use real scgraph (if installed)."""
+    def test_hamburg_rotterdam_route(self):
+        from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route
+        if not HAS_ROUTING_DEPS:
+            pytest.skip("scgraph not installed")
+        result = calculate_maritime_route(
+            origin_lat=53.5, origin_lon=8.5,
+            dest_lat=52.4, dest_lon=4.9,
+            month=6,
+        )
+        assert "MARITIME ROUTE CALCULATION COMPLETE" in result
+        assert "Waypoints" in result or "waypoints" in result.lower()
+        # distance should be reasonable (100–500 nm)
+        assert "nautical miles" in result.lower()
+    def test_long_route_across_atlantic(self):
+        from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route
+        if not HAS_ROUTING_DEPS:
+            pytest.skip("scgraph not installed")
+        result = calculate_maritime_route(
+            origin_lat=40.7, origin_lon=-74.0,   # New York
+            dest_lat=51.9, dest_lon=4.5,          # Rotterdam
+            month=1,
+        )
+        assert "MARITIME ROUTE CALCULATION COMPLETE" in result
+        # trans-Atlantic should produce plenty of waypoints
+        assert "nautical miles" in result.lower()
+if __name__ == "__main__":
+    pytest.main([__file__, "-v", "-s", "--tb=short"])

tests/test_server_integration.py ADDED Viewed

	@@ -0,0 +1,365 @@

+"""
+Server and Integration Tests
+============================
+Tests for server module, retrieval helpers, and integration scenarios.
+"""
+import pytest
+from unittest.mock import patch, MagicMock
+from datetime import datetime
+import tempfile
+from pathlib import Path
+import json
+import os
+# ============================================================================
+# SERVER MODULE TESTS
+# ============================================================================
+class TestServerModule:
+    """Tests for eurus.server module."""
+    def test_server_class_exists(self):
+        """Test Server class can be imported."""
+        from eurus.server import Server
+        assert Server is not None
+    def test_server_instance_exists(self):
+        """Test server instance can be imported."""
+        from eurus.server import server
+        assert server is not None
+# ============================================================================
+# RETRIEVAL HELPERS TESTS
+# ============================================================================
+class TestRetrievalHelpers:
+    """Tests for retrieval helper functions."""
+    def test_format_coord_positive(self):
+        """Test coordinate formatting for positive values."""
+        from eurus.retrieval import _format_coord
+        assert _format_coord(25.5) == "25.50"
+    def test_format_coord_negative(self):
+        """Test coordinate formatting for negative values."""
+        from eurus.retrieval import _format_coord
+        assert _format_coord(-10.333) == "-10.33"
+    def test_format_coord_zero(self):
+        """Test coordinate formatting for near-zero values."""
+        from eurus.retrieval import _format_coord
+        # Values very close to zero should be formatted as 0.00
+        result = _format_coord(0.001)
+        assert "0.00" in result or "0.01" in result
+    def test_format_file_size_bytes(self):
+        """Test file size formatting for bytes."""
+        from eurus.retrieval import format_file_size
+        assert "B" in format_file_size(500)
+    def test_format_file_size_kb(self):
+        """Test file size formatting for kilobytes."""
+        from eurus.retrieval import format_file_size
+        assert "KB" in format_file_size(2048)
+    def test_format_file_size_mb(self):
+        """Test file size formatting for megabytes."""
+        from eurus.retrieval import format_file_size
+        assert "MB" in format_file_size(5 * 1024 * 1024)
+    def test_format_file_size_gb(self):
+        """Test file size formatting for gigabytes."""
+        from eurus.retrieval import format_file_size
+        assert "GB" in format_file_size(5 * 1024 * 1024 * 1024)
+    def test_ensure_aws_region_sets_env_from_repo_metadata(self, monkeypatch):
+        """Auto-populate AWS vars when metadata includes region_name."""
+        import eurus.retrieval as _retrieval
+        from eurus.retrieval import _ensure_aws_region
+        # Reset one-shot flag so the function actually runs
+        _retrieval._aws_region_set = False
+        for key in ("AWS_REGION", "AWS_DEFAULT_REGION", "AWS_ENDPOINT_URL", "AWS_S3_ENDPOINT"):
+            monkeypatch.delenv(key, raising=False)
+        response = MagicMock()
+        response.read.return_value = json.dumps(
+            {"bucket": {"extra_config": {"region_name": "eu-north-1"}}}
+        ).encode("utf-8")
+        context_manager = MagicMock()
+        context_manager.__enter__.return_value = response
+        with patch("eurus.retrieval.urlopen", return_value=context_manager) as mock_urlopen:
+            _ensure_aws_region("token", "earthmover-public/era5-surface-aws")
+        assert os.environ["AWS_REGION"] == "eu-north-1"
+        assert os.environ["AWS_DEFAULT_REGION"] == "eu-north-1"
+        assert os.environ["AWS_ENDPOINT_URL"] == "https://s3.eu-north-1.amazonaws.com"
+        assert os.environ["AWS_S3_ENDPOINT"] == "https://s3.eu-north-1.amazonaws.com"
+        req = mock_urlopen.call_args.args[0]
+        assert req.full_url == "https://api.earthmover.io/repos/earthmover-public/era5-surface-aws"
+    def test_ensure_aws_region_does_not_override_existing_env(self, monkeypatch):
+        """Keep explicit user-provided AWS endpoint config untouched."""
+        import eurus.retrieval as _retrieval
+        from eurus.retrieval import _ensure_aws_region
+        # Reset one-shot flag so the function actually runs
+        _retrieval._aws_region_set = False
+        monkeypatch.setenv("AWS_REGION", "custom-region")
+        monkeypatch.setenv("AWS_DEFAULT_REGION", "custom-default")
+        monkeypatch.setenv("AWS_ENDPOINT_URL", "https://custom.endpoint")
+        monkeypatch.setenv("AWS_S3_ENDPOINT", "https://custom.s3.endpoint")
+        response = MagicMock()
+        response.read.return_value = json.dumps(
+            {"bucket": {"extra_config": {"region_name": "us-west-2"}}}
+        ).encode("utf-8")
+        context_manager = MagicMock()
+        context_manager.__enter__.return_value = response
+        with patch("eurus.retrieval.urlopen", return_value=context_manager):
+            _ensure_aws_region("token")
+        assert os.environ["AWS_REGION"] == "custom-region"
+        assert os.environ["AWS_DEFAULT_REGION"] == "custom-default"
+        assert os.environ["AWS_ENDPOINT_URL"] == "https://custom.endpoint"
+        assert os.environ["AWS_S3_ENDPOINT"] == "https://custom.s3.endpoint"
+# ============================================================================
+# ANALYSIS GUIDE TESTS
+# ============================================================================
+class TestAnalysisGuide:
+    """Tests for analysis guide module."""
+    def test_analysis_guide_tool_exists(self):
+        """Test analysis guide tool can be imported."""
+        from eurus.tools.analysis_guide import analysis_guide_tool
+        assert analysis_guide_tool is not None
+    def test_analysis_guide_returns_content(self):
+        """Test analysis guide returns useful content."""
+        from eurus.tools.analysis_guide import get_analysis_guide
+        result = get_analysis_guide("timeseries")
+        assert len(result) > 100  # Should have substantial content
+# ============================================================================
+# ERA5 TOOL EXTENDED TESTS
+# ============================================================================
+class TestERA5ToolValidation:
+    """Tests for ERA5 tool validation and edge cases."""
+    def test_era5_args_date_validation(self):
+        """Test date format validation works."""
+        from eurus.tools.era5 import ERA5RetrievalArgs
+        # Valid dates should work
+        args = ERA5RetrievalArgs(
+            variable_id="sst",
+            start_date="2023-01-01",
+            end_date="2023-12-31",
+            min_latitude=20.0,
+            max_latitude=30.0,
+            min_longitude=260.0,
+            max_longitude=280.0
+        )
+        assert args.start_date == "2023-01-01"
+    def test_era5_args_latitude_range(self):
+        """Test latitude range parameters."""
+        from eurus.tools.era5 import ERA5RetrievalArgs
+        args = ERA5RetrievalArgs(
+            variable_id="t2",
+            start_date="2023-01-01",
+            end_date="2023-01-31",
+            min_latitude=-90.0,
+            max_latitude=90.0,
+            min_longitude=0.0,
+            max_longitude=360.0
+        )
+        assert args.min_latitude == -90.0
+        assert args.max_latitude == 90.0
+    def test_era5_args_query_type_field(self):
+        """Test that ERA5 args handles optional query_type correctly."""
+        from eurus.tools.era5 import ERA5RetrievalArgs
+        args = ERA5RetrievalArgs(
+            variable_id="sst",
+            start_date="2023-01-01",
+            end_date="2023-12-31",
+            min_latitude=20.0,
+            max_latitude=30.0,
+            min_longitude=260.0,
+            max_longitude=280.0
+        )
+        # Just verify args created successfully
+        assert args.variable_id == "sst"
+# ============================================================================
+# CONFIG EXTENDED TESTS
+# ============================================================================
+class TestConfigRegions:
+    """Tests for region configuration."""
+    def test_get_region_valid(self):
+        """Test getting valid predefined region."""
+        from eurus.config import get_region
+        region = get_region("gulf_of_mexico")
+        assert region is not None
+        assert hasattr(region, 'min_lat')
+        assert hasattr(region, 'max_lat')
+    def test_get_region_case_insensitive(self):
+        """Test region lookup is case insensitive."""
+        from eurus.config import get_region
+        region = get_region("GULF_OF_MEXICO")
+        assert region is not None
+    def test_list_regions_output(self):
+        """Test list_regions returns formatted string."""
+        from eurus.config import list_regions
+        output = list_regions()
+        assert "gulf" in output.lower() or "region" in output.lower()
+# ============================================================================
+# MEMORY MODULE INTEGRATION
+# ============================================================================
+class TestMemoryIntegration:
+    """Integration tests for memory management."""
+    def test_memory_manager_create(self):
+        """Test MemoryManager can be created."""
+        from eurus.memory import MemoryManager, reset_memory
+        reset_memory()
+        mm = MemoryManager()
+        assert mm is not None
+    def test_memory_add_conversation(self):
+        """Test adding to conversation history."""
+        from eurus.memory import MemoryManager, reset_memory
+        reset_memory()
+        mm = MemoryManager()
+        mm.add_message("user", "Hello")
+        history = mm.get_conversation_history()
+        assert len(history) >= 1
+    def test_memory_dataset_registration(self):
+        """Test dataset registration."""
+        from eurus.memory import MemoryManager, reset_memory
+        reset_memory()
+        mm = MemoryManager()
+        mm.register_dataset(
+            path="/tmp/test.zarr",
+            variable="sst",
+            query_type="temporal",
+            start_date="2023-01-01",
+            end_date="2023-12-31",
+            lat_bounds=(20.0, 30.0),
+            lon_bounds=(260.0, 280.0),
+            file_size_bytes=1024
+        )
+        datasets = mm.list_datasets()
+        assert len(datasets) >= 1
+# ============================================================================
+# ROUTING TOOL EXTENDED TESTS
+# ============================================================================
+class TestRoutingTool:
+    """Extended tests for routing functionality."""
+    def test_routing_tool_exists(self):
+        """Test routing tool can be imported."""
+        from eurus.tools.routing import routing_tool
+        assert routing_tool is not None
+        assert routing_tool.name == "calculate_maritime_route"
+    def test_has_routing_deps_flag(self):
+        """Test HAS_ROUTING_DEPS flag exists."""
+        from eurus.tools.routing import HAS_ROUTING_DEPS
+        assert isinstance(HAS_ROUTING_DEPS, bool)
+# ============================================================================
+# REPL TOOL COMPREHENSIVE SECURITY TESTS
+# ============================================================================
+class TestREPLSecurityComprehensive:
+    """REPL tests — Docker is the sandbox, all imports allowed."""
+    def test_repl_allows_sys(self):
+        """Test REPL allows sys module (Docker sandbox)."""
+        from eurus.tools.repl import PythonREPLTool
+        repl = PythonREPLTool()
+        result = repl._run("import sys; print(sys.version_info.major)")
+        assert result is not None
+        assert "Error" not in result
+        repl.close()
+    def test_repl_allows_os(self):
+        """Test REPL allows os module (Docker sandbox)."""
+        from eurus.tools.repl import PythonREPLTool
+        repl = PythonREPLTool()
+        result = repl._run("import os; print(os.getcwd())")
+        assert result is not None
+        assert "Error" not in result
+        repl.close()
+    def test_repl_allows_xarray(self):
+        """Test REPL allows xarray operations."""
+        from eurus.tools.repl import PythonREPLTool
+        repl = PythonREPLTool()
+        result = repl._run("import xarray as xr; print(type(xr))")
+        assert "module" in result.lower() or "xarray" in result.lower()
+        repl.close()
+    def test_repl_allows_pandas(self):
+        """Test REPL allows pandas operations."""
+        from eurus.tools.repl import PythonREPLTool
+        repl = PythonREPLTool()
+        result = repl._run("import pandas as pd; print(pd.DataFrame({'a': [1, 2]}))")
+        assert "Error" not in result
+        repl.close()
+# ============================================================================
+# EDGE CASES AND ERROR HANDLING
+# ============================================================================
+class TestEdgeCases:
+    """Tests for edge cases and error handling."""
+    def test_get_short_name_unknown(self):
+        """Test get_short_name with unknown variable returns input."""
+        from eurus.config import get_short_name
+        result = get_short_name("completely_unknown_variable_xyz")
+        # Should return the input as-is for unknown variables
+        assert "completely_unknown_variable_xyz" in result or result is not None
+    def test_variable_info_none_for_unknown(self):
+        """Test get_variable_info returns None for unknown."""
+        from eurus.config import get_variable_info
+        result = get_variable_info("unknown_var_xyz")
+        assert result is None
+    def test_era5_tool_has_description(self):
+        """Test ERA5 tool has comprehensive description."""
+        from eurus.tools.era5 import era5_tool
+        assert len(era5_tool.description) > 100

web/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""
+Eurus Web Interface
+====================
+A browser-based chat interface for the Eurus Climate Agent.
+"""
+__version__ = "1.0.0"

web/agent_wrapper.py ADDED Viewed

	@@ -0,0 +1,306 @@

+"""
+Agent Wrapper for Web Interface
+===============================
+Wraps the LangChain agent for WebSocket streaming.
+"""
+import os
+import sys
+import asyncio
+import logging
+from pathlib import Path
+from typing import Optional, Callable, Any, List, Dict
+from queue import Queue
+# Add src directory to path for eurus package
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "src"))
+from dotenv import load_dotenv
+load_dotenv()
+from langchain_openai import ChatOpenAI
+from langchain.agents import create_agent
+# IMPORT FROM EURUS PACKAGE - SINGLE SOURCE OF TRUTH
+from eurus.config import CONFIG, AGENT_SYSTEM_PROMPT
+from eurus.memory import get_memory, SmartConversationMemory  # Singleton for datasets, per-session for chat
+from eurus.tools import get_all_tools
+from eurus.tools.repl import PythonREPLTool
+logger = logging.getLogger(__name__)
+class AgentSession:
+    """
+    Manages a single agent session with streaming support.
+    """
+    def __init__(self, api_keys: Optional[Dict[str, str]] = None):
+        self._agent = None
+        self._repl_tool: Optional[PythonREPLTool] = None
+        self._messages: List[Dict] = []
+        self._initialized = False
+        self._api_keys = api_keys or {}
+        # Global singleton keeps the dataset cache (shared across sessions)
+        self._memory = get_memory()
+        # Per-session conversation memory — never touches other sessions
+        self._conversation = SmartConversationMemory()
+        # Queue for captured plots (thread-safe)
+        self._plot_queue: Queue = Queue()
+        self._initialize()
+    def _initialize(self):
+        """Initialize the agent and tools."""
+        logger.info("Initializing agent session...")
+        # Resolve API keys: user-provided take priority over env vars
+        openai_key = self._api_keys.get("openai_api_key") or os.environ.get("OPENAI_API_KEY")
+        arraylake_key = self._api_keys.get("arraylake_api_key") or os.environ.get("ARRAYLAKE_API_KEY")
+        if not arraylake_key:
+            logger.warning("ARRAYLAKE_API_KEY not found")
+        elif not os.environ.get("ARRAYLAKE_API_KEY"):
+            # Only set env var if not already configured (avoid overwriting
+            # server-configured keys with user-provided ones in multi-user scenarios)
+            os.environ["ARRAYLAKE_API_KEY"] = arraylake_key
+        if not openai_key:
+            logger.error("OPENAI_API_KEY not found")
+            return
+        try:
+            # Initialize REPL tool with working directory
+            logger.info("Starting Python kernel...")
+            self._repl_tool = PythonREPLTool(working_dir=os.getcwd())
+            # Set up plot callback using the proper method
+            def on_plot_captured(base64_data: str, filepath: str, code: str = ""):
+                logger.info(f"Plot captured, adding to queue: {filepath}")
+                self._plot_queue.put((base64_data, filepath, code))
+            self._repl_tool.set_plot_callback(on_plot_captured)
+            logger.info("Plot callback registered")
+            # Get ALL tools from centralized registry (no SCIENCE_TOOLS!)
+            tools = get_all_tools(enable_routing=True, enable_guide=True)
+            # Replace the default REPL with our configured one
+            tools = [t for t in tools if t.name != "python_repl"] + [self._repl_tool]
+            # Initialize LLM with resolved key
+            logger.info("Connecting to LLM...")
+            llm = ChatOpenAI(
+                model=CONFIG.model_name,
+                temperature=CONFIG.temperature,
+                api_key=openai_key,
+            )
+            # Use session-local memory for datasets (NOT global!)
+            datasets = self._memory.list_datasets()
+            enhanced_prompt = AGENT_SYSTEM_PROMPT
+            if datasets != "No datasets in cache.":
+                enhanced_prompt += f"\n\n## CACHED DATASETS\n{datasets}"
+            # Create agent
+            logger.info("Creating agent...")
+            self._agent = create_agent(
+                model=llm,
+                tools=tools,
+                system_prompt=enhanced_prompt,
+                debug=False
+            )
+            # FRESH conversation - no old messages!
+            self._messages = []
+            self._initialized = True
+            logger.info("Agent session initialized successfully")
+        except Exception as e:
+            logger.exception(f"Failed to initialize agent: {e}")
+            self._initialized = False
+    def is_ready(self) -> bool:
+        """Check if the agent is ready."""
+        return self._initialized and self._agent is not None
+    def clear_messages(self):
+        """Clear conversation messages."""
+        self._messages = []
+    def get_pending_plots(self) -> List[tuple]:
+        """Get all pending plots from queue."""
+        plots = []
+        while not self._plot_queue.empty():
+            try:
+                plots.append(self._plot_queue.get_nowait())
+            except Exception:
+                break
+        return plots
+    async def process_message(
+        self,
+        user_message: str,
+        stream_callback: Callable
+    ) -> str:
+        """
+        Process a user message and stream the response.
+        """
+        if not self.is_ready():
+            raise RuntimeError("Agent not initialized")
+        # Clear any old plots from queue
+        self.get_pending_plots()
+        # Add user message to history (session-local memory)
+        self._conversation.add_message("user", user_message)
+        self._messages.append({"role": "user", "content": user_message})
+        try:
+            # Send status: analyzing
+            await stream_callback("status", "🔍 Analyzing your request...")
+            await asyncio.sleep(0.3)
+            # Invoke the agent in executor (~15 tool calls max)
+            config = {"recursion_limit": 35}
+            # Stream status updates while agent is working
+            await stream_callback("status", "🤖 Processing with AI...")
+            result = await asyncio.get_event_loop().run_in_executor(
+                None,
+                lambda: self._agent.invoke({"messages": self._messages}, config=config)
+            )
+            # Update messages
+            self._messages = result["messages"]
+            # Parse messages to show tool calls made
+            tool_calls_made = []
+            for msg in self._messages:
+                if hasattr(msg, 'tool_calls') and msg.tool_calls:
+                    for tc in msg.tool_calls:
+                        tool_name = tc.get('name', 'unknown')
+                        if tool_name not in tool_calls_made:
+                            tool_calls_made.append(tool_name)
+            if tool_calls_made:
+                tools_str = ", ".join(tool_calls_made)
+                await stream_callback("status", f"🛠️ Used tools: {tools_str}")
+                await asyncio.sleep(0.5)
+            # Extract response
+            last_message = self._messages[-1]
+            if hasattr(last_message, 'content') and last_message.content:
+                response_text = last_message.content
+            elif isinstance(last_message, dict) and last_message.get('content'):
+                response_text = last_message['content']
+            else:
+                response_text = str(last_message)
+            # Send status: generating response
+            await stream_callback("status", "✍️ Generating response...")
+            await asyncio.sleep(0.2)
+            # Stream the response in chunks
+            chunk_size = 50
+            for i in range(0, len(response_text), chunk_size):
+                chunk = response_text[i:i + chunk_size]
+                await stream_callback("chunk", chunk)
+                await asyncio.sleep(0.01)
+            # Send any captured media (plots and videos)
+            plots = self.get_pending_plots()
+            # NOTE: Only use session-specific _plot_queue, NOT shared folder scan (privacy!)
+            if plots:
+                await stream_callback("status", f"📊 Rendering {len(plots)} visualization(s)...")
+                await asyncio.sleep(0.3)
+            logger.info(f"Sending {len(plots)} media items to client")
+            for plot_data in plots:
+                base64_data, filepath = plot_data[0], plot_data[1]
+                code = plot_data[2] if len(plot_data) > 2 else ""
+                # Determine if this is a video or image
+                ext = filepath.lower().split('.')[-1] if filepath else ''
+                if ext in ('gif',):
+                    await stream_callback("video", "", data=base64_data, path=filepath, mimetype="image/gif")
+                elif ext in ('webm',):
+                    await stream_callback("video", "", data=base64_data, path=filepath, mimetype="video/webm")
+                elif ext in ('mp4',):
+                    await stream_callback("video", "", data=base64_data, path=filepath, mimetype="video/mp4")
+                else:
+                    # Default to plot (png, jpg, etc.)
+                    await stream_callback("plot", "", data=base64_data, path=filepath, code=code)
+            # Save to memory
+            self._conversation.add_message("assistant", response_text)
+            return response_text
+        except Exception as e:
+            logger.exception(f"Error processing message: {e}")
+            raise
+    def close(self):
+        """Clean up resources."""
+        logger.info("Closing agent session...")
+        if self._repl_tool:
+            try:
+                self._repl_tool.close()
+            except Exception as e:
+                logger.error(f"Error closing REPL: {e}")
+# Per-connection sessions (NOT global singleton!)
+# Key: unique connection ID, Value: AgentSession
+_sessions: Dict[str, AgentSession] = {}
+def create_session(connection_id: str, api_keys: Optional[Dict[str, str]] = None) -> AgentSession:
+    """Create a new session for a connection."""
+    if connection_id in _sessions:
+        # Close existing session first
+        _sessions[connection_id].close()
+    session = AgentSession(api_keys=api_keys)
+    _sessions[connection_id] = session
+    logger.info(f"Created session for connection: {connection_id}")
+    return session
+def get_session(connection_id: str) -> Optional[AgentSession]:
+    """Get session for a connection."""
+    return _sessions.get(connection_id)
+def close_session(connection_id: str):
+    """Close and remove session for a connection."""
+    if connection_id in _sessions:
+        _sessions[connection_id].close()
+        del _sessions[connection_id]
+        logger.info(f"Closed session for connection: {connection_id}")
+# DEPRECATED: Keep for backward compatibility during migration
+def get_agent_session() -> AgentSession:
+    """DEPRECATED: Use create_session/get_session with connection_id instead."""
+    logger.warning("get_agent_session() is deprecated - use create_session(connection_id)")
+    # Create default session for CLI/testing
+    if "_default" not in _sessions:
+        _sessions["_default"] = AgentSession()
+    return _sessions["_default"]
+def shutdown_agent_session():
+    """Shutdown all agent sessions."""
+    count = len(_sessions)
+    for conn_id in list(_sessions.keys()):
+        close_session(conn_id)
+    logger.info(f"Shutdown {count} sessions")

web/app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+"""
+Eurus Web Application
+======================
+FastAPI application factory and main entry point.
+"""
+import os
+import sys
+import logging
+from pathlib import Path
+from contextlib import asynccontextmanager
+from fastapi import FastAPI
+from fastapi.staticfiles import StaticFiles
+from fastapi.templating import Jinja2Templates
+# Add parent and src directory to path for eurus package
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "src"))
+# IMPORT FROM EURUS PACKAGE
+from eurus.config import CONFIG, PLOTS_DIR
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
+    datefmt='%H:%M:%S'
+)
+logger = logging.getLogger(__name__)
+# Paths
+WEB_DIR = Path(__file__).parent
+TEMPLATES_DIR = WEB_DIR / "templates"
+STATIC_DIR = WEB_DIR / "static"
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Application lifespan handler for startup/shutdown."""
+    # Startup
+    logger.info("Starting Eurus Web Interface...")
+    logger.info(f"Templates: {TEMPLATES_DIR}")
+    logger.info(f"Static files: {STATIC_DIR}")
+    logger.info(f"Plots directory: {PLOTS_DIR}")
+    # Sessions are created per-connection in websocket.py
+    logger.info("Ready to accept connections")
+    yield
+    # Shutdown
+    logger.info("Shutting down Eurus Web Interface...")
+    from web.agent_wrapper import shutdown_agent_session
+    shutdown_agent_session()
+def create_app() -> FastAPI:
+    """Create and configure the FastAPI application."""
+    app = FastAPI(
+        title="Eurus Climate Agent",
+        description="Interactive web interface for ERA5 climate data analysis",
+        version="1.0.0",
+        lifespan=lifespan,
+    )
+    # Mount static files
+    app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
+    # Mount plots directory for serving generated plots
+    PLOTS_DIR.mkdir(parents=True, exist_ok=True)
+    app.mount("/plots", StaticFiles(directory=str(PLOTS_DIR)), name="plots")
+    # Include routers
+    from web.routes import api_router, websocket_router, pages_router
+    app.include_router(api_router, prefix="/api", tags=["api"])
+    app.include_router(websocket_router, tags=["websocket"])
+    app.include_router(pages_router, tags=["pages"])
+    return app
+# Create the app instance
+app = create_app()
+def main():
+    """Main entry point for running the web server."""
+    import uvicorn
+    host = getattr(CONFIG, 'web_host', '127.0.0.1')
+    port = getattr(CONFIG, 'web_port', 8000)
+    print(f"""
+╔═══════════════════════════════════════════════════════════════════════════╗
+║                                                                           ║
+║    ██╗   ██╗ ██████╗ ███████╗████████╗ ██████╗ ██╗  ██╗                   ║
+║    ██║   ██║██╔═══██╗██╔════╝╚══██╔══╝██╔═══██╗██║ ██╔╝                   ║
+║    ██║   ██║██║   ██║███████╗   ██║   ██║   ██║█████╔╝                    ║
+║    ╚██╗ ██╔╝██║   ██║╚════██║   ██║   ██║   ██║██╔═██╗                    ║
+║     ╚████╔╝ ╚██████╔╝███████║   ██║   ╚██████╔╝██║  ██╗                   ║
+║      ╚═══╝   ╚═════╝ ╚══════╝   ╚═╝    ╚═════╝ ╚═╝  ╚═╝                   ║
+║                                                                           ║
+║                      Eurus Web Interface v1.0                            ║
+║                 ─────────────────────────────────────                     ║
+║                                                                           ║
+║   Starting server at: http://{host}:{port}                              ║
+║                                                                           ║
+╚═══════════════════════════════════════════════════════════════════════════╝
+""")
+    uvicorn.run(
+        "web.app:app",
+        host=host,
+        port=port,
+        reload=False,
+        log_level="info",
+    )
+if __name__ == "__main__":
+    main()

web/routes/__init__.py ADDED Viewed

	@@ -0,0 +1,7 @@

+"""Web routes package."""
+from .api import router as api_router
+from .websocket import router as websocket_router
+from .pages import router as pages_router
+__all__ = ["api_router", "websocket_router", "pages_router"]

web/routes/api.py ADDED Viewed

	@@ -0,0 +1,173 @@

+"""
+REST API Routes
+===============
+Health checks, cache management, and configuration endpoints.
+"""
+import os
+import sys
+from pathlib import Path
+from typing import List, Dict, Any
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+# Add project root and src/ to path for eurus package
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+sys.path.insert(0, str(PROJECT_ROOT / "src"))
+# IMPORT FROM EURUS PACKAGE
+from eurus.config import CONFIG, ERA5_VARIABLES, GEOGRAPHIC_REGIONS
+router = APIRouter()
+class HealthResponse(BaseModel):
+    status: str
+    version: str
+    agent_ready: bool
+class DatasetInfo(BaseModel):
+    variable: str
+    query_type: str
+    start_date: str
+    end_date: str
+    lat_bounds: tuple
+    lon_bounds: tuple
+    file_size_bytes: int
+    path: str
+class CacheResponse(BaseModel):
+    datasets: List[Dict[str, Any]]
+    total_size_bytes: int
+class ConfigResponse(BaseModel):
+    variables: List[Dict[str, str]]
+    regions: List[str]
+    model: str
+@router.get("/keys-status")
+async def keys_status():
+    """Check which API keys are configured via environment variables."""
+    return {
+        "openai": bool(os.environ.get("OPENAI_API_KEY")),
+        "arraylake": bool(os.environ.get("ARRAYLAKE_API_KEY")),
+    }
+@router.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Check if the server and agent are healthy."""
+    from web.agent_wrapper import get_agent_session
+    try:
+        session = get_agent_session()
+        agent_ready = session is not None and session.is_ready()
+    except Exception:
+        agent_ready = False
+    return HealthResponse(
+        status="ok",
+        version="1.0.0",
+        agent_ready=agent_ready
+    )
+@router.get("/cache", response_model=CacheResponse)
+async def list_cache():
+    """List all cached datasets."""
+    from eurus.memory import get_memory
+    memory = get_memory()
+    datasets = []
+    total_size = 0
+    for path, record in memory.datasets.items():
+        if os.path.exists(path):
+            size = record.file_size_bytes
+            if size == 0:
+                # Calculate size if not recorded
+                if os.path.isdir(path):
+                    size = sum(
+                        os.path.getsize(os.path.join(dp, f))
+                        for dp, _, files in os.walk(path)
+                        for f in files
+                    )
+                else:
+                    size = os.path.getsize(path)
+            datasets.append({
+                "variable": record.variable,
+                "query_type": record.query_type,
+                "start_date": record.start_date,
+                "end_date": record.end_date,
+                "lat_bounds": record.lat_bounds,
+                "lon_bounds": record.lon_bounds,
+                "file_size_bytes": size,
+                "path": path
+            })
+            total_size += size
+    return CacheResponse(datasets=datasets, total_size_bytes=total_size)
+@router.get("/config", response_model=ConfigResponse)
+async def get_config():
+    """Get available variables and regions."""
+    # Get unique variables
+    seen_vars = set()
+    variables = []
+    for var_id, var_info in ERA5_VARIABLES.items():
+        if var_info.short_name not in seen_vars:
+            seen_vars.add(var_info.short_name)
+            variables.append({
+                "name": var_info.short_name,
+                "long_name": var_info.long_name,
+                "units": var_info.units,
+                "description": var_info.description
+            })
+    regions = list(GEOGRAPHIC_REGIONS.keys())
+    return ConfigResponse(
+        variables=variables,
+        regions=regions,
+        model=CONFIG.model_name
+    )
+@router.delete("/conversation")
+async def clear_conversation():
+    """Clear the conversation history."""
+    from eurus.memory import get_memory
+    from web.agent_wrapper import get_agent_session
+    memory = get_memory()
+    memory.clear_conversation()
+    # Also clear the agent session messages
+    session = get_agent_session()
+    if session:
+        session.clear_messages()
+    return {"status": "ok", "message": "Conversation cleared"}
+@router.get("/memory")
+async def get_memory_summary():
+    """Get memory summary."""
+    from eurus.memory import get_memory
+    memory = get_memory()
+    return {
+        "conversation_count": len(memory.conversations),
+        "dataset_count": len([p for p in memory.datasets if os.path.exists(p)]),
+        "analysis_count": len(memory.analyses),
+        "context_summary": memory.get_context_summary()
+    }

web/routes/pages.py ADDED Viewed

	@@ -0,0 +1,27 @@

+"""
+Page Routes
+===========
+HTML page rendering endpoints.
+"""
+import sys
+from pathlib import Path
+from fastapi import APIRouter, Request
+from fastapi.responses import HTMLResponse
+from fastapi.templating import Jinja2Templates
+# Templates directory
+TEMPLATES_DIR = Path(__file__).parent.parent / "templates"
+templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
+router = APIRouter()
+@router.get("/", response_class=HTMLResponse)
+async def index(request: Request):
+    """Render the main chat page."""
+    return templates.TemplateResponse(
+        "index.html",
+        {"request": request}
+    )

web/routes/websocket.py ADDED Viewed

	@@ -0,0 +1,131 @@

+"""
+WebSocket Chat Handler
+======================
+Handles real-time chat via WebSocket with streaming responses.
+"""
+import json
+import asyncio
+import logging
+from typing import Optional
+from fastapi import APIRouter, WebSocket, WebSocketDisconnect
+router = APIRouter()
+logger = logging.getLogger(__name__)
+class ConnectionManager:
+    """Manages WebSocket connections."""
+    def __init__(self):
+        self.active_connections: list[WebSocket] = []
+    async def connect(self, websocket: WebSocket):
+        await websocket.accept()
+        self.active_connections.append(websocket)
+        logger.info(f"WebSocket connected. Total: {len(self.active_connections)}")
+    def disconnect(self, websocket: WebSocket):
+        if websocket in self.active_connections:
+            self.active_connections.remove(websocket)
+        logger.info(f"WebSocket disconnected. Total: {len(self.active_connections)}")
+    async def send_json(self, websocket: WebSocket, data: dict):
+        try:
+            await websocket.send_json(data)
+        except Exception as e:
+            logger.error(f"Failed to send message: {e}")
+manager = ConnectionManager()
+@router.websocket("/ws/chat")
+async def websocket_chat(websocket: WebSocket):
+    """WebSocket endpoint for chat."""
+    import uuid
+    connection_id = str(uuid.uuid4())  # Unique ID for this connection
+    await manager.connect(websocket)
+    logger.info(f"New connection: {connection_id}")
+    try:
+        # Session created lazily after we receive API keys
+        from web.agent_wrapper import create_session, get_session, close_session
+        session = None
+        while True:
+            data = await websocket.receive_json()
+            message = data.get("message", "").strip()
+            # Handle API key configuration from client
+            if data.get("type") == "configure_keys":
+                api_keys = {
+                    "openai_api_key": data.get("openai_api_key", ""),
+                    "arraylake_api_key": data.get("arraylake_api_key", ""),
+                }
+                session = create_session(connection_id, api_keys=api_keys)
+                ready = session.is_ready()
+                await manager.send_json(websocket, {
+                    "type": "keys_configured",
+                    "ready": ready,
+                })
+                continue
+            # Create default session if not yet created (keys from env)
+            if session is None:
+                session = create_session(connection_id)
+            if not message:
+                continue
+            logger.info(f"[{connection_id[:8]}] Received: {message[:100]}...")
+            # Handle /clear command — clear session memory + UI
+            if message.strip() == "/clear":
+                session = get_session(connection_id)
+                if session:
+                    session.clear_messages()
+                await manager.send_json(websocket, {"type": "clear"})
+                continue
+            # Send thinking indicator
+            await manager.send_json(websocket, {"type": "thinking"})
+            try:
+                # Get session for this connection
+                session = get_session(connection_id)
+                if not session:
+                    raise RuntimeError("Session not found")
+                # Callback for streaming
+                async def stream_callback(event_type: str, content: str, **kwargs):
+                    msg = {"type": event_type, "content": content}
+                    msg.update(kwargs)
+                    await manager.send_json(websocket, msg)
+                # Process message
+                response = await session.process_message(message, stream_callback)
+                # Send complete
+                await manager.send_json(websocket, {
+                    "type": "complete",
+                    "content": response
+                })
+            except Exception as e:
+                logger.exception(f"Error: {e}")
+                await manager.send_json(websocket, {
+                    "type": "error",
+                    "content": str(e)
+                })
+    except WebSocketDisconnect:
+        logger.info(f"Connection {connection_id[:8]} disconnected")
+        manager.disconnect(websocket)
+        close_session(connection_id)  # Clean up session
+    except Exception as e:
+        logger.exception(f"WebSocket error: {e}")
+        manager.disconnect(websocket)
+        close_session(connection_id)  # Clean up session

web/static/css/style.css ADDED Viewed

	@@ -0,0 +1,854 @@

+/* Eurus - Premium Interface with Eye Comfort */
+/* Inspired by Google Material, Apple HIG, and modern design systems */
+/* ===== DARK THEME (Refined Neosynth) ===== */
+:root,
+[data-theme="dark"] {
+    /* Softer dark base - not pure black for reduced eye strain */
+    --bg-primary: #0f1419;
+    --bg-secondary: #15202b;
+    --bg-tertiary: #1c2938;
+    /* Refined neon - softer cyan/purple, easier on eyes */
+    --accent-primary: #1d9bf0;
+    --accent-secondary: #8b5cf6;
+    --accent-tertiary: #22d3ee;
+    /* High contrast text - WCAG AA compliant */
+    --text-primary: #e7e9ea;
+    --text-secondary: #8899a6;
+    --text-muted: #5c6e7e;
+    /* Subtle glass effect */
+    --glass-bg: rgba(255, 255, 255, 0.04);
+    --glass-border: rgba(255, 255, 255, 0.1);
+    /* Messages */
+    --message-user-bg: linear-gradient(135deg, rgba(29, 155, 240, 0.12), rgba(139, 92, 246, 0.08));
+    --message-user-border: rgba(29, 155, 240, 0.25);
+    --message-assistant-bg: rgba(255, 255, 255, 0.04);
+    --message-assistant-border: rgba(255, 255, 255, 0.08);
+    /* Code - rich dark */
+    --code-bg: #0d1117;
+    /* Refined glow - subtle, not overwhelming */
+    --glow-primary: 0 2px 12px rgba(29, 155, 240, 0.2);
+    --glow-secondary: 0 2px 12px rgba(139, 92, 246, 0.15);
+    /* Subtle ambient gradient */
+    --bg-gradient:
+        radial-gradient(ellipse at 50% 0%, rgba(29, 155, 240, 0.04) 0%, transparent 50%);
+    /* Focus ring */
+    --focus-ring: 0 0 0 2px rgba(29, 155, 240, 0.5);
+}
+/* ===== LIGHT THEME (Clean & Professional) ===== */
+[data-theme="light"] {
+    /* Warm white - easier than pure white */
+    --bg-primary: #f7f9fa;
+    --bg-secondary: #ffffff;
+    --bg-tertiary: #eff3f4;
+    /* Professional blue - Google-inspired */
+    --accent-primary: #1a73e8;
+    --accent-secondary: #5f6368;
+    --accent-tertiary: #34a853;
+    /* High contrast text */
+    --text-primary: #202124;
+    --text-secondary: #5f6368;
+    --text-muted: #9aa0a6;
+    /* Soft shadows, minimal borders */
+    --glass-bg: rgba(255, 255, 255, 0.9);
+    --glass-border: rgba(0, 0, 0, 0.08);
+    /* Messages - clean and minimal */
+    --message-user-bg: linear-gradient(135deg, rgba(26, 115, 232, 0.08), rgba(26, 115, 232, 0.04));
+    --message-user-border: rgba(26, 115, 232, 0.15);
+    --message-assistant-bg: #ffffff;
+    --message-assistant-border: rgba(0, 0, 0, 0.06);
+    /* Code - readable dark */
+    --code-bg: #1f2937;
+    /* Soft elevation shadows */
+    --glow-primary: 0 1px 3px rgba(0, 0, 0, 0.1), 0 4px 12px rgba(26, 115, 232, 0.08);
+    --glow-secondary: 0 1px 3px rgba(0, 0, 0, 0.1);
+    /* Clean background */
+    --bg-gradient: none;
+    /* Focus ring */
+    --focus-ring: 0 0 0 2px rgba(26, 115, 232, 0.4);
+}
+/* ===== BASE STYLES ===== */
+* {
+    box-sizing: border-box;
+}
+body {
+    margin: 0;
+    padding: 0;
+    min-height: 100vh;
+    display: flex;
+    flex-direction: column;
+    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
+    font-size: 15px;
+    line-height: 1.5;
+    background: var(--bg-primary);
+    background-image: var(--bg-gradient);
+    color: var(--text-primary);
+    transition: background-color 0.2s ease, color 0.2s ease;
+    -webkit-font-smoothing: antialiased;
+    -moz-osx-font-smoothing: grayscale;
+}
+/* ===== HEADER ===== */
+header {
+    background: var(--bg-secondary);
+    border-bottom: 1px solid var(--glass-border);
+    padding: 0.75rem 1.25rem;
+    transition: background-color 0.2s ease;
+}
+header nav {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    max-width: 1200px;
+    margin: 0 auto;
+}
+header nav ul {
+    list-style: none;
+    margin: 0;
+    padding: 0;
+    display: flex;
+    align-items: center;
+    gap: 1rem;
+}
+header nav a {
+    color: var(--text-secondary);
+    text-decoration: none;
+    font-size: 0.875rem;
+    font-weight: 500;
+    padding: 0.5rem 0.75rem;
+    border-radius: 0.5rem;
+    transition: all 0.15s ease;
+}
+header nav a:hover {
+    color: var(--accent-primary);
+    background: var(--glass-bg);
+}
+.logo {
+    font-weight: 700;
+    font-size: 1.125rem;
+    color: var(--accent-primary);
+    letter-spacing: -0.01em;
+}
+/* Theme Toggle Button */
+.theme-toggle {
+    background: var(--glass-bg);
+    border: 1px solid var(--glass-border);
+    border-radius: 0.5rem;
+    padding: 0.5rem;
+    cursor: pointer;
+    transition: all 0.15s ease;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+}
+.theme-toggle:hover {
+    background: var(--bg-tertiary);
+}
+.theme-toggle:focus {
+    outline: none;
+    box-shadow: var(--focus-ring);
+}
+.theme-icon {
+    font-size: 1rem;
+    line-height: 1;
+}
+/* Connection status */
+.status-badge {
+    padding: 0.375rem 0.625rem;
+    border-radius: 1rem;
+    font-size: 0.6875rem;
+    font-weight: 600;
+    letter-spacing: 0.02em;
+    text-transform: uppercase;
+}
+.status-badge.connected {
+    background: rgba(52, 168, 83, 0.12);
+    color: #34a853;
+}
+.status-badge.disconnected {
+    background: rgba(234, 67, 53, 0.12);
+    color: #ea4335;
+}
+.status-badge.connecting {
+    background: rgba(251, 188, 4, 0.12);
+    color: #f9ab00;
+    animation: pulse 2s ease-in-out infinite;
+}
+@keyframes pulse {
+    0%,
+    100% {
+        opacity: 1;
+    }
+    50% {
+        opacity: 0.6;
+    }
+}
+/* ===== MAIN CONTENT ===== */
+main {
+    flex: 1;
+    display: flex;
+    flex-direction: column;
+    overflow: hidden;
+}
+/* Chat container */
+.chat-container {
+    display: flex;
+    flex-direction: column;
+    height: calc(100vh - 110px);
+    max-width: 800px;
+    margin: 0 auto;
+    width: 100%;
+    padding: 0 1rem;
+}
+/* Messages area */
+.chat-messages {
+    flex: 1;
+    overflow-y: auto;
+    padding: 1.25rem 0;
+    display: flex;
+    flex-direction: column;
+    gap: 0.875rem;
+}
+/* ===== MESSAGE STYLES ===== */
+.message {
+    max-width: 85%;
+    padding: 0.875rem 1rem;
+    border-radius: 1rem;
+    line-height: 1.6;
+    font-size: 0.9375rem;
+    animation: messageAppear 0.2s ease-out;
+}
+@keyframes messageAppear {
+    from {
+        opacity: 0;
+        transform: translateY(8px);
+    }
+    to {
+        opacity: 1;
+        transform: translateY(0);
+    }
+}
+.user-message {
+    background: var(--message-user-bg);
+    border: 1px solid var(--message-user-border);
+    align-self: flex-end;
+    border-bottom-right-radius: 0.25rem;
+}
+.assistant-message {
+    background: var(--message-assistant-bg);
+    border: 1px solid var(--message-assistant-border);
+    align-self: flex-start;
+    border-bottom-left-radius: 0.25rem;
+}
+[data-theme="light"] .assistant-message {
+    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
+}
+.system-message {
+    background: var(--bg-tertiary);
+    border: 1px solid var(--glass-border);
+    align-self: center;
+    max-width: 90%;
+    font-size: 0.875rem;
+    border-radius: 0.75rem;
+}
+.system-message h3 {
+    margin: 0 0 0.5rem 0;
+    font-size: 0.9375rem;
+    font-weight: 600;
+    color: var(--accent-primary);
+}
+.thinking-message {
+    background: transparent;
+    align-self: flex-start;
+    padding: 0.5rem;
+    border: none;
+}
+.error-message {
+    background: rgba(234, 67, 53, 0.1);
+    border: 1px solid rgba(234, 67, 53, 0.25);
+    align-self: center;
+    color: #ea4335;
+}
+.message-header {
+    display: flex;
+    align-items: center;
+    gap: 0.375rem;
+    margin-bottom: 0.375rem;
+    font-size: 0.75rem;
+    color: var(--text-secondary);
+}
+.avatar-icon {
+    width: 20px;
+    height: 20px;
+    border-radius: 50%;
+    object-fit: cover;
+}
+.message-role {
+    font-weight: 600;
+    text-transform: uppercase;
+    letter-spacing: 0.03em;
+    color: var(--accent-primary);
+}
+.message-content {
+    word-wrap: break-word;
+}
+.message-content p {
+    margin: 0 0 0.625rem 0;
+}
+.message-content p:last-child {
+    margin-bottom: 0;
+}
+.message-content pre {
+    margin: 0.625rem 0;
+    padding: 0.875rem;
+    border-radius: 0.5rem;
+    overflow-x: auto;
+    background: var(--code-bg);
+    border: 1px solid var(--glass-border);
+    font-size: 0.8125rem;
+    line-height: 1.5;
+}
+.message-content code {
+    font-family: 'SF Mono', Monaco, Consolas, 'Liberation Mono', monospace;
+    font-size: 0.8125rem;
+}
+.message-content ul,
+.message-content ol {
+    margin: 0.5rem 0;
+    padding-left: 1.5rem;
+}
+/* ===== PLOT DISPLAY ===== */
+.message-plots {
+    margin-top: 0.875rem;
+}
+.plot-figure {
+    margin: 0;
+    display: block;
+    max-width: 100%;
+}
+.plot-figure img {
+    max-width: 100%;
+    width: auto;
+    height: auto;
+    border-radius: 0.5rem;
+    border: 1px solid var(--glass-border);
+    cursor: pointer;
+    transition: all 0.15s ease;
+}
+.plot-figure img:hover {
+    box-shadow: var(--glow-primary);
+}
+.plot-actions {
+    margin-top: 0.625rem;
+    display: flex;
+    gap: 0.5rem;
+}
+.plot-actions button {
+    padding: 0.5rem 0.875rem;
+    font-size: 0.75rem;
+    font-weight: 500;
+    border: 1px solid var(--glass-border);
+    border-radius: 0.375rem;
+    background: var(--bg-tertiary);
+    color: var(--text-secondary);
+    cursor: pointer;
+    transition: all 0.15s ease;
+}
+.plot-actions button:hover {
+    border-color: var(--accent-primary);
+    color: var(--accent-primary);
+}
+/* Plot code display */
+.plot-code {
+    margin-top: 0.625rem;
+    border-radius: 0.5rem;
+    overflow: hidden;
+    border: 1px solid var(--glass-border);
+}
+.plot-code pre {
+    margin: 0;
+    padding: 0.875rem;
+    background: var(--code-bg);
+    overflow-x: auto;
+    font-size: 0.8125rem;
+    line-height: 1.5;
+}
+.plot-code code {
+    font-family: 'SF Mono', Monaco, Consolas, 'Liberation Mono', monospace;
+    color: #e6edf3;
+}
+/* ===== INPUT AREA ===== */
+.chat-input-container {
+    padding: 1rem 0;
+    border-top: 1px solid var(--glass-border);
+    background: var(--bg-primary);
+}
+.chat-form {
+    display: flex;
+    gap: 0.5rem;
+    align-items: flex-end;
+}
+.chat-form textarea {
+    flex: 1;
+    min-height: 2.75rem;
+    max-height: 8rem;
+    padding: 0.75rem 1rem;
+    border: 1px solid var(--glass-border);
+    border-radius: 1.5rem;
+    font-size: 0.9375rem;
+    font-family: inherit;
+    resize: none;
+    line-height: 1.4;
+    background: var(--bg-secondary);
+    color: var(--text-primary);
+    transition: all 0.15s ease;
+}
+.chat-form textarea::placeholder {
+    color: var(--text-muted);
+}
+.chat-form textarea:focus {
+    outline: none;
+    border-color: var(--accent-primary);
+    box-shadow: var(--focus-ring);
+}
+.chat-form button {
+    padding: 0.75rem 1.25rem;
+    background: var(--accent-primary);
+    color: #ffffff;
+    border: none;
+    border-radius: 1.5rem;
+    font-size: 0.875rem;
+    font-weight: 600;
+    cursor: pointer;
+    white-space: nowrap;
+    transition: all 0.15s ease;
+}
+.chat-form button:hover {
+    filter: brightness(1.1);
+    box-shadow: var(--glow-primary);
+}
+.chat-form button:focus {
+    outline: none;
+    box-shadow: var(--focus-ring);
+}
+.chat-form button:disabled {
+    background: var(--text-muted);
+    cursor: not-allowed;
+    box-shadow: none;
+    filter: none;
+}
+.input-hints {
+    margin-top: 0.5rem;
+    font-size: 0.75rem;
+    color: var(--text-muted);
+}
+.input-hints kbd {
+    background: var(--bg-tertiary);
+    padding: 0.125rem 0.375rem;
+    border-radius: 0.25rem;
+    border: 1px solid var(--glass-border);
+    font-size: 0.6875rem;
+    font-family: inherit;
+}
+/* ===== FOOTER ===== */
+footer {
+    background: var(--bg-secondary);
+    border-top: 1px solid var(--glass-border);
+    padding: 0.625rem 1rem;
+    text-align: center;
+    font-size: 0.75rem;
+    color: var(--text-muted);
+}
+/* ===== MODAL ===== */
+dialog {
+    border: none;
+    border-radius: 0.75rem;
+    padding: 0;
+    max-width: 560px;
+    width: 90%;
+    background: var(--bg-secondary);
+    color: var(--text-primary);
+    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
+}
+dialog::backdrop {
+    background: rgba(0, 0, 0, 0.5);
+}
+dialog header {
+    display: flex;
+    justify-content: space-between;
+    align-items: center;
+    padding: 1rem 1.25rem;
+    border-bottom: 1px solid var(--glass-border);
+}
+dialog header h3 {
+    margin: 0;
+    font-size: 1rem;
+    font-weight: 600;
+    color: var(--text-primary);
+}
+dialog .close-modal {
+    background: none;
+    border: none;
+    font-size: 1.25rem;
+    cursor: pointer;
+    color: var(--text-secondary);
+    padding: 0.25rem;
+    line-height: 1;
+    border-radius: 0.25rem;
+    transition: all 0.15s ease;
+}
+dialog .close-modal:hover {
+    background: var(--bg-tertiary);
+    color: var(--text-primary);
+}
+#cache-content {
+    padding: 1rem 1.25rem;
+    max-height: 400px;
+    overflow-y: auto;
+}
+#cache-content table {
+    width: 100%;
+    border-collapse: collapse;
+    font-size: 0.8125rem;
+}
+#cache-content th,
+#cache-content td {
+    padding: 0.625rem 0.5rem;
+    text-align: left;
+    border-bottom: 1px solid var(--glass-border);
+}
+#cache-content th {
+    font-weight: 600;
+    color: var(--text-secondary);
+    font-size: 0.75rem;
+    text-transform: uppercase;
+    letter-spacing: 0.03em;
+}
+/* ===== TYPING INDICATOR ===== */
+.typing-indicator {
+    display: flex;
+    gap: 0.25rem;
+    padding: 0.5rem;
+}
+.typing-indicator span {
+    width: 6px;
+    height: 6px;
+    background: var(--text-muted);
+    border-radius: 50%;
+    animation: typing 1.2s infinite ease-in-out;
+}
+.typing-indicator span:nth-child(1) {
+    animation-delay: 0s;
+}
+.typing-indicator span:nth-child(2) {
+    animation-delay: 0.15s;
+}
+.typing-indicator span:nth-child(3) {
+    animation-delay: 0.3s;
+}
+@keyframes typing {
+    0%,
+    60%,
+    100% {
+        transform: translateY(0);
+        opacity: 0.4;
+    }
+    30% {
+        transform: translateY(-4px);
+        opacity: 1;
+    }
+}
+/* ===== STATUS INDICATOR ===== */
+.status-indicator {
+    display: flex;
+    align-items: center;
+    gap: 0.625rem;
+    padding: 0.625rem 0.875rem;
+    background: var(--bg-tertiary);
+    border: 1px solid var(--glass-border);
+    border-radius: 0.5rem;
+    font-size: 0.8125rem;
+    color: var(--text-primary);
+    animation: statusAppear 0.2s ease-out;
+}
+.status-spinner {
+    width: 14px;
+    height: 14px;
+    border: 2px solid var(--glass-border);
+    border-top-color: var(--accent-primary);
+    border-radius: 50%;
+    animation: spin 0.8s linear infinite;
+}
+.status-text {
+    font-weight: 500;
+}
+@keyframes spin {
+    to {
+        transform: rotate(360deg);
+    }
+}
+@keyframes statusAppear {
+    from {
+        opacity: 0;
+    }
+    to {
+        opacity: 1;
+    }
+}
+/* ===== SCROLLBAR ===== */
+::-webkit-scrollbar {
+    width: 8px;
+    height: 8px;
+}
+::-webkit-scrollbar-track {
+    background: transparent;
+}
+::-webkit-scrollbar-thumb {
+    background: var(--text-muted);
+    border-radius: 4px;
+    border: 2px solid var(--bg-primary);
+}
+::-webkit-scrollbar-thumb:hover {
+    background: var(--text-secondary);
+}
+/* ===== RESPONSIVE ===== */
+@media (max-width: 640px) {
+    .chat-container {
+        padding: 0 0.75rem;
+    }
+    .message {
+        max-width: 92%;
+    }
+    .chat-form button {
+        padding: 0.75rem 1rem;
+    }
+    header nav ul {
+        gap: 0.5rem;
+    }
+}
+/* ===== ACCESSIBILITY ===== */
+@media (prefers-reduced-motion: reduce) {
+    *,
+    *::before,
+    *::after {
+        animation-duration: 0.01ms !important;
+        transition-duration: 0.01ms !important;
+    }
+}
+/* Focus visible for keyboard users */
+:focus-visible {
+    outline: none;
+    box-shadow: var(--focus-ring);
+}
+/* ===== SELECTION ===== */
+::selection {
+    background: rgba(29, 155, 240, 0.25);
+}
+[data-theme="light"] ::selection {
+    background: rgba(26, 115, 232, 0.2);
+}
+/* ===== API KEYS PANEL ===== */
+.api-keys-panel {
+    margin: 0 auto 16px;
+    max-width: 480px;
+    background: var(--bg-tertiary);
+    border: 1px solid var(--glass-border);
+    border-radius: 12px;
+    overflow: hidden;
+}
+.api-keys-header {
+    padding: 12px 16px;
+    font-weight: 600;
+    color: var(--text-primary);
+    background: var(--glass-bg);
+    border-bottom: 1px solid var(--glass-border);
+}
+.api-keys-body {
+    padding: 16px;
+}
+.api-keys-note {
+    font-size: 13px;
+    color: var(--text-secondary);
+    margin-bottom: 12px;
+    line-height: 1.4;
+}
+.api-key-field {
+    margin-bottom: 12px;
+}
+.api-key-field label {
+    display: block;
+    font-size: 13px;
+    font-weight: 500;
+    color: var(--text-secondary);
+    margin-bottom: 4px;
+}
+.api-key-field .required {
+    color: #ef4444;
+}
+.api-key-field input {
+    width: 100%;
+    padding: 8px 12px;
+    background: var(--bg-primary);
+    border: 1px solid var(--glass-border);
+    border-radius: 6px;
+    color: var(--text-primary);
+    font-family: monospace;
+    font-size: 13px;
+    box-sizing: border-box;
+}
+.api-key-field input:focus {
+    outline: none;
+    border-color: var(--accent-primary);
+    box-shadow: var(--focus-ring);
+}
+.save-keys-btn {
+    width: 100%;
+    padding: 10px;
+    background: var(--accent-primary);
+    color: #fff;
+    border: none;
+    border-radius: 6px;
+    font-size: 14px;
+    font-weight: 600;
+    cursor: pointer;
+    margin-top: 4px;
+}
+.save-keys-btn:hover {
+    opacity: 0.9;
+}
+.save-keys-btn:disabled {
+    opacity: 0.5;
+    cursor: not-allowed;
+}

web/static/eurus_avatar.png ADDED Viewed

web/static/favicon.jpeg ADDED Viewed

web/static/js/chat.js ADDED Viewed

	@@ -0,0 +1,734 @@

+/**
+ * Eurus Chat WebSocket Client
+ */
+class EurusChat {
+    constructor() {
+        this.ws = null;
+        this.messageId = 0;
+        this.currentAssistantMessage = null;
+        this.isConnected = false;
+        this.keysConfigured = false;
+        this.serverKeysPresent = { openai: false, arraylake: false };
+        this.reconnectAttempts = 0;
+        this.maxReconnectAttempts = 5;
+        this.reconnectDelay = 1000;
+        this.messagesContainer = document.getElementById('chat-messages');
+        this.messageInput = document.getElementById('message-input');
+        this.chatForm = document.getElementById('chat-form');
+        this.sendBtn = document.getElementById('send-btn');
+        this.connectionStatus = document.getElementById('connection-status');
+        this.clearBtn = document.getElementById('clear-btn');
+        this.cacheBtn = document.getElementById('cache-btn');
+        this.cacheModal = document.getElementById('cache-modal');
+        this.apiKeysPanel = document.getElementById('api-keys-panel');
+        this.saveKeysBtn = document.getElementById('save-keys-btn');
+        this.openaiKeyInput = document.getElementById('openai-key');
+        this.arraylakeKeyInput = document.getElementById('arraylake-key');
+        marked.setOptions({
+            highlight: (code, lang) => {
+                if (lang && hljs.getLanguage(lang)) {
+                    return hljs.highlight(code, { language: lang }).value;
+                }
+                return hljs.highlightAuto(code).value;
+            },
+            breaks: true,
+            gfm: true
+        });
+        this.themeToggle = document.getElementById('theme-toggle');
+        this.init();
+    }
+    init() {
+        this.checkKeysStatus();
+        this.connect();
+        this.setupEventListeners();
+        this.setupImageModal();
+        this.setupTheme();
+        this.setupKeysPanel();
+    }
+    async checkKeysStatus() {
+        try {
+            const resp = await fetch('/api/keys-status');
+            const data = await resp.json();
+            this.serverKeysPresent = data;
+            if (data.openai) {
+                // Keys pre-configured on server — hide the panel
+                this.apiKeysPanel.style.display = 'none';
+                this.keysConfigured = true;
+            } else {
+                // No server keys — check localStorage for saved keys
+                const savedOpenai = localStorage.getItem('eurus-openai-key');
+                const savedArraylake = localStorage.getItem('eurus-arraylake-key');
+                if (savedOpenai) {
+                    this.openaiKeyInput.value = savedOpenai;
+                }
+                if (savedArraylake) {
+                    this.arraylakeKeyInput.value = savedArraylake;
+                }
+                this.apiKeysPanel.style.display = 'block';
+                this.keysConfigured = false;
+            }
+        } catch (e) {
+            // Can't reach server yet, show panel
+            this.apiKeysPanel.style.display = 'block';
+        }
+    }
+    setupKeysPanel() {
+        this.saveKeysBtn.addEventListener('click', () => this.saveAndSendKeys());
+        // Allow Enter in key fields to submit
+        [this.openaiKeyInput, this.arraylakeKeyInput].forEach(input => {
+            input.addEventListener('keydown', (e) => {
+                if (e.key === 'Enter') {
+                    e.preventDefault();
+                    this.saveAndSendKeys();
+                }
+            });
+        });
+    }
+    saveAndSendKeys() {
+        const openaiKey = this.openaiKeyInput.value.trim();
+        const arraylakeKey = this.arraylakeKeyInput.value.trim();
+        if (!openaiKey) {
+            this.openaiKeyInput.focus();
+            return;
+        }
+        // Save to localStorage (client-side only)
+        localStorage.setItem('eurus-openai-key', openaiKey);
+        if (arraylakeKey) {
+            localStorage.setItem('eurus-arraylake-key', arraylakeKey);
+        }
+        // Send keys via WebSocket
+        if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+            this.saveKeysBtn.disabled = true;
+            this.saveKeysBtn.textContent = 'Connecting...';
+            this.ws.send(JSON.stringify({
+                type: 'configure_keys',
+                openai_api_key: openaiKey,
+                arraylake_api_key: arraylakeKey,
+            }));
+        }
+    }
+    setupTheme() {
+        // Load saved theme or default to dark (neosynth)
+        const savedTheme = localStorage.getItem('eurus-theme') || 'dark';
+        document.documentElement.setAttribute('data-theme', savedTheme);
+        this.updateThemeIcon(savedTheme);
+        // Theme toggle click handler
+        if (this.themeToggle) {
+            this.themeToggle.addEventListener('click', () => {
+                const currentTheme = document.documentElement.getAttribute('data-theme');
+                const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
+                document.documentElement.setAttribute('data-theme', newTheme);
+                localStorage.setItem('eurus-theme', newTheme);
+                this.updateThemeIcon(newTheme);
+            });
+        }
+    }
+    updateThemeIcon(theme) {
+        if (this.themeToggle) {
+            const icon = this.themeToggle.querySelector('.theme-icon');
+            if (icon) {
+                icon.textContent = theme === 'dark' ? '☀️' : '🌙';
+            }
+        }
+    }
+    connect() {
+        const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
+        const wsUrl = `${protocol}//${window.location.host}/ws/chat`;
+        this.updateConnectionStatus('connecting');
+        try {
+            this.ws = new WebSocket(wsUrl);
+            this.ws.onopen = () => {
+                this.isConnected = true;
+                this.reconnectAttempts = 0;
+                this.updateConnectionStatus('connected');
+                // If server has no keys, auto-send saved keys from localStorage
+                if (!this.serverKeysPresent.openai) {
+                    const savedOpenai = localStorage.getItem('eurus-openai-key');
+                    if (savedOpenai) {
+                        const savedArraylake = localStorage.getItem('eurus-arraylake-key') || '';
+                        this.ws.send(JSON.stringify({
+                            type: 'configure_keys',
+                            openai_api_key: savedOpenai,
+                            arraylake_api_key: savedArraylake,
+                        }));
+                    }
+                } else {
+                    this.sendBtn.disabled = false;
+                }
+            };
+            this.ws.onclose = () => {
+                this.isConnected = false;
+                this.updateConnectionStatus('disconnected');
+                this.sendBtn.disabled = true;
+                this.attemptReconnect();
+            };
+            this.ws.onerror = () => {
+                this.updateConnectionStatus('disconnected');
+            };
+            this.ws.onmessage = (event) => {
+                this.handleMessage(JSON.parse(event.data));
+            };
+        } catch (error) {
+            this.updateConnectionStatus('disconnected');
+        }
+    }
+    attemptReconnect() {
+        if (this.reconnectAttempts >= this.maxReconnectAttempts) return;
+        this.reconnectAttempts++;
+        const delay = this.reconnectDelay * Math.pow(2, this.reconnectAttempts - 1);
+        this.updateConnectionStatus('connecting');
+        setTimeout(() => this.connect(), delay);
+    }
+    updateConnectionStatus(status) {
+        this.connectionStatus.className = 'status-badge ' + status;
+        const text = { connected: 'Connected', disconnected: 'Disconnected', connecting: 'Connecting...' };
+        this.connectionStatus.textContent = text[status] || status;
+    }
+    setupEventListeners() {
+        this.chatForm.addEventListener('submit', (e) => {
+            e.preventDefault();
+            this.sendMessage();
+        });
+        this.messageInput.addEventListener('keydown', (e) => {
+            if (e.key === 'Enter' && !e.shiftKey) {
+                e.preventDefault();
+                this.sendMessage();
+            }
+        });
+        this.messageInput.addEventListener('input', () => {
+            this.messageInput.style.height = 'auto';
+            this.messageInput.style.height = Math.min(this.messageInput.scrollHeight, 150) + 'px';
+        });
+        this.clearBtn.addEventListener('click', (e) => {
+            e.preventDefault();
+            this.clearChat();
+        });
+        this.cacheBtn.addEventListener('click', (e) => {
+            e.preventDefault();
+            this.showCacheModal();
+        });
+        this.cacheModal.querySelector('.close-modal').addEventListener('click', () => {
+            this.cacheModal.close();
+        });
+    }
+    setupImageModal() {
+        // Create modal for enlarged images
+        const modal = document.createElement('div');
+        modal.id = 'image-modal';
+        modal.innerHTML = `
+            <div class="image-modal-backdrop"></div>
+            <div class="image-modal-content">
+                <img alt="Enlarged plot">
+                <div class="image-modal-actions">
+                    <button class="download-btn">Download</button>
+                    <button class="close-btn">Close</button>
+                </div>
+            </div>
+        `;
+        document.body.appendChild(modal);
+        // Add modal styles
+        const style = document.createElement('style');
+        style.textContent = `
+            #image-modal {
+                display: none;
+                position: fixed;
+                top: 0;
+                left: 0;
+                width: 100%;
+                height: 100%;
+                z-index: 1000;
+            }
+            #image-modal.active {
+                display: flex;
+                align-items: center;
+                justify-content: center;
+            }
+            .image-modal-backdrop {
+                position: absolute;
+                top: 0;
+                left: 0;
+                width: 100%;
+                height: 100%;
+                background: rgba(0,0,0,0.8);
+            }
+            .image-modal-content {
+                position: relative;
+                max-width: 90%;
+                max-height: 90%;
+                display: flex;
+                flex-direction: column;
+                align-items: center;
+            }
+            .image-modal-content img {
+                max-width: 100%;
+                max-height: calc(90vh - 60px);
+                border-radius: 4px;
+            }
+            .image-modal-actions {
+                margin-top: 12px;
+                display: flex;
+                gap: 8px;
+            }
+            .image-modal-actions button {
+                padding: 8px 16px;
+                border: none;
+                border-radius: 4px;
+                cursor: pointer;
+                font-size: 14px;
+            }
+            .image-modal-actions .download-btn {
+                background: #1976d2;
+                color: white;
+            }
+            .image-modal-actions .close-btn {
+                background: #757575;
+                color: white;
+            }
+        `;
+        document.head.appendChild(style);
+        // Event listeners
+        modal.querySelector('.image-modal-backdrop').addEventListener('click', () => {
+            modal.classList.remove('active');
+        });
+        modal.querySelector('.close-btn').addEventListener('click', () => {
+            modal.classList.remove('active');
+        });
+        modal.querySelector('.download-btn').addEventListener('click', () => {
+            const img = modal.querySelector('img');
+            const link = document.createElement('a');
+            link.href = img.src;
+            link.download = 'eurus_plot.png';
+            link.click();
+        });
+        document.addEventListener('keydown', (e) => {
+            if (e.key === 'Escape' && modal.classList.contains('active')) {
+                modal.classList.remove('active');
+            }
+        });
+        this.imageModal = modal;
+    }
+    showImageModal(src) {
+        this.imageModal.querySelector('img').src = src;
+        this.imageModal.classList.add('active');
+    }
+    sendMessage() {
+        const message = this.messageInput.value.trim();
+        if (!message || !this.isConnected) return;
+        this.addUserMessage(message);
+        this.ws.send(JSON.stringify({ message }));
+        this.messageInput.value = '';
+        this.messageInput.style.height = 'auto';
+        this.sendBtn.disabled = true;
+    }
+    handleMessage(data) {
+        switch (data.type) {
+            case 'keys_configured':
+                this.keysConfigured = data.ready;
+                if (data.ready) {
+                    this.apiKeysPanel.style.display = 'none';
+                    this.sendBtn.disabled = false;
+                } else {
+                    this.saveKeysBtn.disabled = false;
+                    this.saveKeysBtn.textContent = 'Connect';
+                    this.showError('Failed to initialize agent. Check your API keys.');
+                }
+                break;
+            case 'thinking':
+                this.showThinkingIndicator();
+                break;
+            case 'status':
+                this.updateStatusIndicator(data.content);
+                break;
+            case 'chunk':
+                this.appendToAssistantMessage(data.content);
+                break;
+            case 'plot':
+                this.addPlot(data.data, data.path, data.code || '');
+                break;
+            case 'video':
+                console.log('[WS] Video message received:', data);
+                this.addVideo(data.data, data.path, data.mimetype || 'video/mp4');
+                break;
+            case 'complete':
+                this.finalizeAssistantMessage(data.content);
+                this.sendBtn.disabled = false;
+                break;
+            case 'error':
+                this.showError(data.content);
+                this.sendBtn.disabled = false;
+                break;
+            case 'clear':
+                this.clearMessagesUI();
+                break;
+        }
+    }
+    addUserMessage(content) {
+        const div = document.createElement('div');
+        div.className = 'message user-message';
+        div.innerHTML = `
+            <div class="message-header">
+                <span class="message-role">You</span>
+            </div>
+            <div class="message-content">${this.escapeHtml(content)}</div>
+        `;
+        this.messagesContainer.appendChild(div);
+        this.scrollToBottom();
+    }
+    showThinkingIndicator() {
+        this.removeThinkingIndicator();
+        const div = document.createElement('div');
+        div.className = 'message thinking-message';
+        div.id = 'thinking-indicator';
+        div.innerHTML = `
+            <div class="typing-indicator">
+                <span></span><span></span><span></span>
+            </div>
+        `;
+        this.messagesContainer.appendChild(div);
+        this.scrollToBottom();
+    }
+    removeThinkingIndicator() {
+        const indicator = document.getElementById('thinking-indicator');
+        if (indicator) indicator.remove();
+    }
+    updateStatusIndicator(statusText) {
+        // Replace thinking dots with status message
+        let indicator = document.getElementById('thinking-indicator');
+        if (!indicator) {
+            indicator = document.createElement('div');
+            indicator.className = 'message thinking-message';
+            indicator.id = 'thinking-indicator';
+            this.messagesContainer.appendChild(indicator);
+        }
+        indicator.innerHTML = `
+            <div class="status-indicator">
+                <span class="status-spinner"></span>
+                <span class="status-text">${this.escapeHtml(statusText)}</span>
+            </div>
+        `;
+        this.scrollToBottom();
+    }
+    appendToAssistantMessage(content) {
+        this.removeThinkingIndicator();
+        if (!this.currentAssistantMessage) {
+            this.currentAssistantMessage = document.createElement('div');
+            this.currentAssistantMessage.className = 'message assistant-message';
+            this.currentAssistantMessage.innerHTML = `
+                <div class="message-header">
+                    <img src="/static/favicon.jpeg" class="avatar-icon" alt="">
+                    <span class="message-role">Eurus</span>
+                </div>
+                <div class="message-content markdown-content"></div>
+                <div class="message-plots"></div>
+            `;
+            this.messagesContainer.appendChild(this.currentAssistantMessage);
+        }
+        const contentDiv = this.currentAssistantMessage.querySelector('.message-content');
+        const raw = (contentDiv.getAttribute('data-raw') || '') + content;
+        contentDiv.setAttribute('data-raw', raw);
+        contentDiv.innerHTML = marked.parse(raw);
+        contentDiv.querySelectorAll('pre code').forEach(block => hljs.highlightElement(block));
+        this.scrollToBottom();
+    }
+    addPlot(base64Data, path, code = '') {
+        this.removeThinkingIndicator();
+        if (!this.currentAssistantMessage) {
+            this.appendToAssistantMessage('');
+        }
+        const plotsDiv = this.currentAssistantMessage.querySelector('.message-plots');
+        const figure = document.createElement('figure');
+        figure.className = 'plot-figure';
+        const imgSrc = `data:image/png;base64,${base64Data}`;
+        const codeId = `code-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
+        figure.innerHTML = `
+            <img src="${imgSrc}" alt="Generated plot">
+            <div class="plot-actions">
+                <button class="enlarge-btn" title="Enlarge">Enlarge</button>
+                <button class="download-btn" title="Download">Download</button>
+                ${code && code.trim() ? `<button class="code-btn" title="Show Code">Show Code</button>` : ''}
+            </div>
+        `;
+        // Add code block separately if code exists
+        if (code && code.trim()) {
+            const codeDiv = document.createElement('div');
+            codeDiv.className = 'plot-code';
+            codeDiv.style.display = 'none';
+            const pre = document.createElement('pre');
+            const codeEl = document.createElement('code');
+            codeEl.className = 'language-python hljs';
+            // Highlight immediately
+            try {
+                const highlighted = hljs.highlight(code, { language: 'python' });
+                codeEl.innerHTML = highlighted.value;
+            } catch (e) {
+                console.error('Highlight error:', e);
+                codeEl.textContent = code;
+            }
+            pre.appendChild(codeEl);
+            codeDiv.appendChild(pre);
+            figure.appendChild(codeDiv);
+        }
+        // Add enlarge action
+        figure.querySelector('.enlarge-btn').addEventListener('click', () => {
+            this.showImageModal(imgSrc);
+        });
+        // Add download action
+        figure.querySelector('.download-btn').addEventListener('click', () => {
+            const link = document.createElement('a');
+            link.href = imgSrc;
+            const filename = path ? path.split('/').pop() : 'eurus_plot.png';
+            link.download = filename;
+            link.click();
+        });
+        // Add show code toggle
+        const codeBtn = figure.querySelector('.code-btn');
+        if (codeBtn) {
+            const codeDiv = figure.querySelector('.plot-code');
+            codeBtn.addEventListener('click', () => {
+                if (codeDiv.style.display === 'none') {
+                    codeDiv.style.display = 'block';
+                    codeBtn.textContent = 'Hide Code';
+                } else {
+                    codeDiv.style.display = 'none';
+                    codeBtn.textContent = 'Show Code';
+                }
+            });
+        }
+        // Click on image to enlarge
+        figure.querySelector('img').addEventListener('click', () => {
+            this.showImageModal(imgSrc);
+        });
+        plotsDiv.appendChild(figure);
+        this.scrollToBottom();
+    }
+    addVideo(base64Data, path, mimetype = 'video/mp4') {
+        console.log('[VIDEO] addVideo called:', { path, mimetype, dataLength: base64Data?.length });
+        this.removeThinkingIndicator();
+        if (!this.currentAssistantMessage) {
+            this.appendToAssistantMessage('');
+        }
+        const plotsDiv = this.currentAssistantMessage.querySelector('.message-plots');
+        console.log('[VIDEO] plotsDiv found:', plotsDiv);
+        const figure = document.createElement('figure');
+        figure.className = 'plot-figure video-figure';
+        // Handle different formats
+        let videoSrc;
+        if (mimetype === 'image/gif') {
+            // GIFs display as img
+            videoSrc = `data:image/gif;base64,${base64Data}`;
+            figure.innerHTML = `
+                <img src="${videoSrc}" alt="Generated animation" class="video-gif" style="max-width: 100%; border-radius: 8px;">
+                <div class="plot-actions">
+                    <button class="enlarge-btn" title="Enlarge">Enlarge</button>
+                    <button class="download-btn" title="Download">Download</button>
+                </div>
+            `;
+            // Enlarge for GIF
+            figure.querySelector('.enlarge-btn').addEventListener('click', () => {
+                this.showImageModal(videoSrc);
+            });
+            figure.querySelector('img').addEventListener('click', () => {
+                this.showImageModal(videoSrc);
+            });
+        } else {
+            // Video formats (webm, mp4)
+            videoSrc = `data:${mimetype};base64,${base64Data}`;
+            figure.innerHTML = `
+                <video controls autoplay loop muted playsinline style="max-width: 100%; border-radius: 8px;">
+                    <source src="${videoSrc}" type="${mimetype}">
+                    Your browser does not support video playback.
+                </video>
+                <div class="plot-actions">
+                    <button class="download-btn" title="Download">Download</button>
+                </div>
+            `;
+        }
+        // Download button
+        figure.querySelector('.download-btn').addEventListener('click', () => {
+            const link = document.createElement('a');
+            link.href = videoSrc;
+            const ext = mimetype.includes('gif') ? 'gif' : mimetype.includes('webm') ? 'webm' : 'mp4';
+            const filename = path ? path.split('/').pop() : `eurus_animation.${ext}`;
+            link.download = filename;
+            link.click();
+        });
+        plotsDiv.appendChild(figure);
+        this.scrollToBottom();
+    }
+    finalizeAssistantMessage(content) {
+        this.removeThinkingIndicator();
+        if (content && !this.currentAssistantMessage) {
+            this.appendToAssistantMessage(content);
+        }
+        this.currentAssistantMessage = null;
+    }
+    showError(message) {
+        this.removeThinkingIndicator();
+        const div = document.createElement('div');
+        div.className = 'message error-message';
+        div.innerHTML = `
+            <div class="message-header">
+                <span class="message-role">Error</span>
+            </div>
+            <div class="message-content">${this.escapeHtml(message)}</div>
+        `;
+        this.messagesContainer.appendChild(div);
+        this.currentAssistantMessage = null;
+        this.scrollToBottom();
+    }
+    async clearChat() {
+        if (!confirm('Clear conversation?')) return;
+        // Send clear command through WebSocket so the agent session memory is also cleared
+        if (this.ws && this.ws.readyState === WebSocket.OPEN) {
+            this.ws.send(JSON.stringify({ message: '/clear' }));
+        } else {
+            // Fallback to REST if WS not available
+            try {
+                const response = await fetch('/api/conversation', { method: 'DELETE' });
+                if (response.ok) this.clearMessagesUI();
+            } catch (error) {
+                console.error('Error clearing:', error);
+            }
+        }
+    }
+    clearMessagesUI() {
+        const messages = this.messagesContainer.querySelectorAll('.message:not(.system-message)');
+        messages.forEach(msg => msg.remove());
+        this.currentAssistantMessage = null;
+    }
+    async showCacheModal() {
+        this.cacheModal.showModal();
+        const content = document.getElementById('cache-content');
+        content.innerHTML = '<p>Loading...</p>';
+        try {
+            const response = await fetch('/api/cache');
+            const data = await response.json();
+            if (data.datasets && data.datasets.length > 0) {
+                let html = '<table><thead><tr><th>Variable</th><th>Period</th><th>Type</th></tr></thead><tbody>';
+                for (const ds of data.datasets) {
+                    html += `<tr><td>${ds.variable}</td><td>${ds.start_date} to ${ds.end_date}</td><td>${ds.query_type}</td></tr>`;
+                }
+                html += '</tbody></table>';
+                content.innerHTML = html;
+            } else {
+                content.innerHTML = '<p>No cached datasets.</p>';
+            }
+        } catch (error) {
+            content.innerHTML = `<p>Error: ${error.message}</p>`;
+        }
+    }
+    scrollToBottom() {
+        this.messagesContainer.scrollTop = this.messagesContainer.scrollHeight;
+    }
+    escapeHtml(text) {
+        const div = document.createElement('div');
+        div.textContent = text;
+        return div.innerHTML;
+    }
+}
+document.addEventListener('DOMContentLoaded', () => {
+    window.eurusChat = new EurusChat();
+});

web/templates/base.html ADDED Viewed

	@@ -0,0 +1,59 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>{% block title %}Eurus{% endblock %}</title>
+    <link rel="icon" type="image/jpeg" href="/static/favicon.jpeg">
+    <!-- Custom styles only -->
+    <link rel="stylesheet" href="/static/css/style.css">
+    <!-- Marked.js for markdown -->
+    <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
+    <!-- Highlight.js for code -->
+    <link rel="stylesheet"
+        href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css">
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/python.min.js"></script>
+    {% block head %}{% endblock %}
+</head>
+<body>
+    <header>
+        <nav>
+            <ul>
+                <li><span class="logo">Eurus</span></li>
+            </ul>
+            <ul>
+                <li><a href="#" id="clear-btn">Clear</a></li>
+                <li><a href="#" id="cache-btn">Cache</a></li>
+                <li>
+                    <button id="theme-toggle" class="theme-toggle" title="Toggle theme">
+                        <span class="theme-icon">🌙</span>
+                    </button>
+                </li>
+                <li>
+                    <span id="connection-status" class="status-badge disconnected">
+                        Disconnected
+                    </span>
+                </li>
+            </ul>
+        </nav>
+    </header>
+    <main>
+        {% block content %}{% endblock %}
+    </main>
+    <footer>
+        Eurus Climate Agent
+    </footer>
+    {% block scripts %}{% endblock %}
+</body>
+</html>

web/templates/components/message.html ADDED Viewed

	@@ -0,0 +1,53 @@

+<!-- User message template -->
+<div class="message user-message" data-message-id="{{ message_id }}">
+    <div class="message-header">
+        <span class="message-role">You</span>
+        <span class="message-time">{{ timestamp }}</span>
+    </div>
+    <div class="message-content">
+        {{ content }}
+    </div>
+</div>
+<!-- Assistant message template -->
+<div class="message assistant-message" data-message-id="{{ message_id }}">
+    <div class="message-header">
+        <span class="message-role">Eurus</span>
+        <span class="message-time">{{ timestamp }}</span>
+    </div>
+    <div class="message-content markdown-content">
+        {{ content }}
+    </div>
+    {% if plots %}
+    <div class="message-plots">
+        {% for plot in plots %}
+        <figure class="plot-figure">
+            <img src="{{ plot.url }}" alt="Generated plot" loading="lazy">
+            {% if plot.path %}
+            <figcaption>{{ plot.path }}</figcaption>
+            {% endif %}
+        </figure>
+        {% endfor %}
+    </div>
+    {% endif %}
+</div>
+<!-- Thinking indicator template -->
+<div class="message thinking-message" data-message-id="{{ message_id }}">
+    <div class="message-header">
+        <span class="message-role">Eurus</span>
+    </div>
+    <div class="message-content">
+        <span aria-busy="true">Thinking...</span>
+    </div>
+</div>
+<!-- Code execution template -->
+<div class="message code-message" data-message-id="{{ message_id }}">
+    <div class="message-header">
+        <span class="message-role">Executing Code</span>
+    </div>
+    <div class="message-content">
+        <pre><code class="language-python">{{ code }}</code></pre>
+    </div>
+</div>

web/templates/index.html ADDED Viewed

	@@ -0,0 +1,63 @@

+{% extends "base.html" %}
+{% block title %}Eurus - Climate Data Analysis{% endblock %}
+{% block content %}
+<div class="chat-container">
+    <!-- API Keys panel — hidden when keys are pre-configured via env -->
+    <div id="api-keys-panel" class="api-keys-panel" style="display: none;">
+        <div class="api-keys-header">
+            <span>API Keys Required</span>
+        </div>
+        <div class="api-keys-body">
+            <p class="api-keys-note">Enter your API keys to use Eurus. Keys are stored in your browser only and never saved on the server.</p>
+            <div class="api-key-field">
+                <label for="openai-key">OpenAI API Key <span class="required">*</span></label>
+                <input type="password" id="openai-key" placeholder="sk-..." autocomplete="off">
+            </div>
+            <div class="api-key-field">
+                <label for="arraylake-key">Arraylake API Key</label>
+                <input type="password" id="arraylake-key" placeholder="ema_..." autocomplete="off">
+            </div>
+            <button id="save-keys-btn" class="save-keys-btn">Connect</button>
+        </div>
+    </div>
+    <div id="chat-messages" class="chat-messages">
+        <div class="message system-message">
+            <h3>Welcome to Eurus</h3>
+            <p>I can help you analyze ERA5 climate data. Try:</p>
+            <ul>
+                <li>"Show me SST for California coast, Jan 2024"</li>
+                <li>"Plot temperature in the Gulf of Mexico"</li>
+            </ul>
+        </div>
+    </div>
+    <div class="chat-input-container">
+        <form id="chat-form" class="chat-form">
+            <textarea id="message-input" placeholder="Ask about climate data..." rows="1"></textarea>
+            <button type="submit" id="send-btn" disabled>Send</button>
+        </form>
+        <div class="input-hints">
+            <kbd>Enter</kbd> to send, <kbd>Shift+Enter</kbd> for new line
+        </div>
+    </div>
+</div>
+<dialog id="cache-modal">
+    <article>
+        <header>
+            <h3>Cached Datasets</h3>
+            <button class="close-modal">&times;</button>
+        </header>
+        <div id="cache-content">
+            <p>Loading...</p>
+        </div>
+    </article>
+</dialog>
+{% endblock %}
+{% block scripts %}
+<script src="/static/js/chat.js?v=20260216"></script>
+{% endblock %}