Upload folder using huggingface_hub
Browse files- .dockerignore +44 -0
- .gitattributes +2 -35
- .gitignore +70 -0
- Dockerfile +61 -0
- LICENSE +21 -0
- README.md +213 -5
- assets/eurus_logo.jpeg +3 -0
- assets/eurus_logo_neon.jpeg +3 -0
- docker-compose.yml +47 -0
- main.py +428 -0
- pyproject.toml +148 -0
- requirements.txt +50 -0
- requirements_full.txt +190 -0
- scripts/qa_image_review.py +369 -0
- scripts/qa_runner.py +738 -0
- setup_env.sh +21 -0
- src/eurus/__init__.py +77 -0
- src/eurus/config.py +751 -0
- src/eurus/logging_config.py +100 -0
- src/eurus/memory.py +508 -0
- src/eurus/retrieval.py +536 -0
- src/eurus/server.py +258 -0
- src/eurus/tools/__init__.py +66 -0
- src/eurus/tools/analysis_guide.py +1191 -0
- src/eurus/tools/era5.py +204 -0
- src/eurus/tools/repl.py +564 -0
- src/eurus/tools/routing.py +289 -0
- tests/test_config.py +105 -0
- tests/test_e2e.py +368 -0
- tests/test_edge_cases.py +210 -0
- tests/test_server_integration.py +365 -0
- web/__init__.py +7 -0
- web/agent_wrapper.py +306 -0
- web/app.py +125 -0
- web/routes/__init__.py +7 -0
- web/routes/api.py +173 -0
- web/routes/pages.py +27 -0
- web/routes/websocket.py +131 -0
- web/static/css/style.css +854 -0
- web/static/eurus_avatar.png +0 -0
- web/static/favicon.jpeg +0 -0
- web/static/js/chat.js +734 -0
- web/templates/base.html +59 -0
- web/templates/components/message.html +53 -0
- web/templates/index.html +63 -0
.dockerignore
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Virtual environments
|
| 2 |
+
.venv/
|
| 3 |
+
venv/
|
| 4 |
+
|
| 5 |
+
# Git
|
| 6 |
+
.git/
|
| 7 |
+
.gitignore
|
| 8 |
+
|
| 9 |
+
# IDE
|
| 10 |
+
.vscode/
|
| 11 |
+
.idea/
|
| 12 |
+
*.swp
|
| 13 |
+
|
| 14 |
+
# Python cache
|
| 15 |
+
__pycache__/
|
| 16 |
+
*.py[cod]
|
| 17 |
+
*.egg-info/
|
| 18 |
+
.pytest_cache/
|
| 19 |
+
.coverage
|
| 20 |
+
htmlcov/
|
| 21 |
+
|
| 22 |
+
# Project artifacts (not needed in container)
|
| 23 |
+
data/
|
| 24 |
+
.memory/
|
| 25 |
+
.cache/
|
| 26 |
+
.claude/
|
| 27 |
+
icechunk/
|
| 28 |
+
logs/
|
| 29 |
+
*.log
|
| 30 |
+
|
| 31 |
+
# Build/docs
|
| 32 |
+
docs/
|
| 33 |
+
publications/
|
| 34 |
+
deep_searches/
|
| 35 |
+
bug_reports/
|
| 36 |
+
test_reports/
|
| 37 |
+
dummy_key/
|
| 38 |
+
|
| 39 |
+
# Misc
|
| 40 |
+
.DS_Store
|
| 41 |
+
project_structure.txt
|
| 42 |
+
save_new.py
|
| 43 |
+
improvements_feedback.txt
|
| 44 |
+
user_queries.txt
|
.gitattributes
CHANGED
|
@@ -1,35 +1,2 @@
|
|
| 1 |
-
*.
|
| 2 |
-
*.
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
*.jpeg filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.png filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
*.so
|
| 6 |
+
.Python
|
| 7 |
+
build/
|
| 8 |
+
develop-eggs/
|
| 9 |
+
dist/
|
| 10 |
+
downloads/
|
| 11 |
+
eggs/
|
| 12 |
+
.eggs/
|
| 13 |
+
lib/
|
| 14 |
+
lib64/
|
| 15 |
+
parts/
|
| 16 |
+
sdist/
|
| 17 |
+
var/
|
| 18 |
+
wheels/
|
| 19 |
+
*.egg-info/
|
| 20 |
+
.installed.cfg
|
| 21 |
+
*.egg
|
| 22 |
+
MANIFEST
|
| 23 |
+
|
| 24 |
+
# Virtual Environment
|
| 25 |
+
.venv/
|
| 26 |
+
venv/
|
| 27 |
+
ENV/
|
| 28 |
+
env/
|
| 29 |
+
|
| 30 |
+
# IDEs
|
| 31 |
+
.idea/
|
| 32 |
+
.vscode/
|
| 33 |
+
*.swp
|
| 34 |
+
.DS_Store
|
| 35 |
+
|
| 36 |
+
# Testing
|
| 37 |
+
.pytest_cache/
|
| 38 |
+
.coverage
|
| 39 |
+
htmlcov/
|
| 40 |
+
.tox/
|
| 41 |
+
|
| 42 |
+
# Project specific
|
| 43 |
+
.env
|
| 44 |
+
.mcp.json
|
| 45 |
+
.memory/
|
| 46 |
+
.cache/
|
| 47 |
+
.claude/
|
| 48 |
+
data/
|
| 49 |
+
!data/plots/.gitkeep
|
| 50 |
+
|
| 51 |
+
# Logs
|
| 52 |
+
*.log
|
| 53 |
+
session.log
|
| 54 |
+
session_retry.log
|
| 55 |
+
|
| 56 |
+
# Generated/Temp files
|
| 57 |
+
icechunk/
|
| 58 |
+
save_new.py
|
| 59 |
+
project_structure.txt
|
| 60 |
+
dummy_key/
|
| 61 |
+
test_reports/
|
| 62 |
+
bug_reports/
|
| 63 |
+
publications/
|
| 64 |
+
user_queries.txt
|
| 65 |
+
improvements_feedback.txt
|
| 66 |
+
docs/
|
| 67 |
+
deep_searches/
|
| 68 |
+
# Generated project dumps
|
| 69 |
+
full_project.txt
|
| 70 |
+
src_structure.txt
|
Dockerfile
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# Eurus ERA5 Agent — Docker Image
|
| 3 |
+
# ============================================================================
|
| 4 |
+
# Multi-target build:
|
| 5 |
+
# docker build --target agent -t eurus-agent .
|
| 6 |
+
# docker build --target web -t eurus-web .
|
| 7 |
+
#
|
| 8 |
+
# Or use docker-compose (preferred):
|
| 9 |
+
# docker compose run --rm agent # interactive CLI
|
| 10 |
+
# docker compose up web # FastAPI on :8000
|
| 11 |
+
# ============================================================================
|
| 12 |
+
|
| 13 |
+
# ---------- base ----------
|
| 14 |
+
FROM python:3.12-slim AS base
|
| 15 |
+
|
| 16 |
+
# System deps for scientific stack (numpy/scipy wheels, geopandas/shapely, matplotlib)
|
| 17 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 18 |
+
gcc g++ \
|
| 19 |
+
libgeos-dev \
|
| 20 |
+
libproj-dev \
|
| 21 |
+
libffi-dev \
|
| 22 |
+
curl \
|
| 23 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 24 |
+
|
| 25 |
+
WORKDIR /app
|
| 26 |
+
|
| 27 |
+
# Install Python deps first (layer caching)
|
| 28 |
+
COPY requirements.txt .
|
| 29 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
| 30 |
+
|
| 31 |
+
# Copy project source
|
| 32 |
+
COPY pyproject.toml .
|
| 33 |
+
COPY src/ src/
|
| 34 |
+
COPY main.py .
|
| 35 |
+
COPY web/ web/
|
| 36 |
+
COPY tests/ tests/
|
| 37 |
+
COPY scripts/ scripts/
|
| 38 |
+
COPY assets/ assets/
|
| 39 |
+
COPY README.md LICENSE ./
|
| 40 |
+
|
| 41 |
+
# Install eurus package in editable mode
|
| 42 |
+
RUN pip install --no-cache-dir -e ".[agent,web]"
|
| 43 |
+
|
| 44 |
+
# Create dirs the agent expects
|
| 45 |
+
RUN mkdir -p /app/data/plots /app/.memory /app/logs
|
| 46 |
+
|
| 47 |
+
# Signal to the REPL that we're inside Docker → security checks disabled
|
| 48 |
+
ENV EURUS_DOCKER=1
|
| 49 |
+
# Matplotlib: no GUI backend
|
| 50 |
+
ENV MPLBACKEND=Agg
|
| 51 |
+
# Ensure Python output is unbuffered (for docker logs)
|
| 52 |
+
ENV PYTHONUNBUFFERED=1
|
| 53 |
+
|
| 54 |
+
# ---------- agent (CLI mode) ----------
|
| 55 |
+
FROM base AS agent
|
| 56 |
+
ENTRYPOINT ["python", "main.py"]
|
| 57 |
+
|
| 58 |
+
# ---------- web (FastAPI mode) ----------
|
| 59 |
+
FROM base AS web
|
| 60 |
+
EXPOSE 7860
|
| 61 |
+
CMD ["uvicorn", "web.app:app", "--host", "0.0.0.0", "--port", "7860"]
|
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 Vostok Team
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1,11 +1,219 @@
|
|
| 1 |
---
|
| 2 |
title: Eurus
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
-
license: mit
|
| 9 |
---
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Eurus
|
| 3 |
+
emoji: 🌊
|
| 4 |
+
colorFrom: purple
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
+
# Eurus - ERA5 Climate Analysis Agent
|
| 11 |
+
|
| 12 |
+
<div align="center">
|
| 13 |
+
<img src="assets/eurus_logo.jpeg?v=2" alt="Eurus Logo" width="300"/>
|
| 14 |
+
|
| 15 |
+
<h3><b>Next-Generation Oceanographic & Climate Data Intelligence</b></h3>
|
| 16 |
+
|
| 17 |
+
[](https://opensource.org/licenses/MIT)
|
| 18 |
+
[](https://www.python.org/downloads/)
|
| 19 |
+
[](https://modelcontextprotocol.io)
|
| 20 |
+
[](https://earthmover.io)
|
| 21 |
+
</div>
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
**Eurus** is a high-performance, intelligent climate analysis agent designed for oceanographers, climate scientists, and data engineers. Built on the cutting-edge **Icechunk** transactional storage engine, Eurus bridges Earthmover's cloud-optimized ERA5 archives with advanced LLM reasoning, enabling seamless, natural language-driven exploration of planetary-scale climate data.
|
| 26 |
+
|
| 27 |
+
### ❄️ Powered By
|
| 28 |
+
|
| 29 |
+
This project is made possible by the incredible open-source work from the **[Earthmover](https://earthmover.io)** team:
|
| 30 |
+
- **[Icechunk](https://github.com/earth-mover/icechunk)**: The transactional storage engine for Zarr that provides the backbone for our high-performance data access.
|
| 31 |
+
- **Arraylake**: The cloud-native data lake that hosts the global ERA5 reanalysis archives used by this agent.
|
| 32 |
+
|
| 33 |
+
### 🚀 Core Pillars
|
| 34 |
+
|
| 35 |
+
- **Intelligence-First Analysis**: Leveraging LLMs to translate complex natural language queries into precise data retrieval and scientific analysis.
|
| 36 |
+
- **Multi-Interface Access**: Interact via a powerful CLI, a rich Web Interface, or integrate directly into IDEs via the Model Context Protocol (MCP).
|
| 37 |
+
- **Cloud-Native Performance**: Direct integration with Earthmover's Arraylake and Icechunk/Zarr storage for lightning-fast, subsetted data access.
|
| 38 |
+
- **Python REPL**: Built-in interactive Python environment with pandas, xarray, matplotlib for custom analysis.
|
| 39 |
+
- **Maritime Routing**: Calculate optimal shipping routes with weather risk assessment.
|
| 40 |
+
- **Persistent Context**: Memory system that tracks cached datasets across sessions.
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## Features
|
| 45 |
+
|
| 46 |
+
- **Cloud-Optimized Data Retrieval**: Downloads ERA5 reanalysis data directly from Earthmover's Arraylake.
|
| 47 |
+
- **Python REPL**: Interactive Python environment with pre-loaded scientific libraries (pandas, numpy, xarray, matplotlib).
|
| 48 |
+
- **Maritime Routing**: Calculate optimal shipping routes considering land masks (requires scgraph).
|
| 49 |
+
- **Analysis Guides**: Built-in methodology guides for climate analysis and visualization.
|
| 50 |
+
- **Automatic Visualization**: Matplotlib plots automatically saved to `./data/plots/`.
|
| 51 |
+
- **Intelligent Caching**: Re-uses previously downloaded data to save bandwidth.
|
| 52 |
+
- **MCP Server**: Acts as a brain for Claude and other AI assistants.
|
| 53 |
+
|
| 54 |
+
## Installation
|
| 55 |
+
|
| 56 |
+
### Prerequisites
|
| 57 |
+
- Python 3.10 or higher
|
| 58 |
+
- An Earthmover Arraylake API Key
|
| 59 |
+
- An OpenAI API Key
|
| 60 |
+
|
| 61 |
+
### Setup
|
| 62 |
+
|
| 63 |
+
1. **Clone the repository:**
|
| 64 |
+
```bash
|
| 65 |
+
git clone https://github.com/yourusername/era_5_agent.git
|
| 66 |
+
cd era_5_agent
|
| 67 |
+
```
|
| 68 |
+
|
| 69 |
+
2. **Create and activate a virtual environment:**
|
| 70 |
+
```bash
|
| 71 |
+
python -m venv .venv
|
| 72 |
+
source .venv/bin/activate # or `.venv\Scripts\activate` on Windows
|
| 73 |
+
```
|
| 74 |
+
|
| 75 |
+
3. **Install dependencies:**
|
| 76 |
+
```bash
|
| 77 |
+
pip install -r requirements.txt
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
4. **Configuration:**
|
| 81 |
+
Create a `.env` file in the root directory with your API keys:
|
| 82 |
+
|
| 83 |
+
```env
|
| 84 |
+
OPENAI_API_KEY=your_openai_api_key
|
| 85 |
+
ARRAYLAKE_API_KEY=your_arraylake_api_key
|
| 86 |
+
# Optional: Custom Host/Port for Web UI
|
| 87 |
+
# WEB_HOST=127.0.0.1
|
| 88 |
+
# WEB_PORT=8000
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
---
|
| 92 |
+
|
| 93 |
+
## Usage
|
| 94 |
+
|
| 95 |
+
Eurus provides three ways to interact with the agent.
|
| 96 |
+
|
| 97 |
+
### 1. Interactive CLI Agent
|
| 98 |
+
The classic terminal experience with rich text output and direct interaction.
|
| 99 |
+
|
| 100 |
+
```bash
|
| 101 |
+
python main.py
|
| 102 |
+
```
|
| 103 |
+
|
| 104 |
+
**Commands:**
|
| 105 |
+
- `/help` - Show help message
|
| 106 |
+
- `/clear` - Clear conversation history
|
| 107 |
+
- `/cache` - List cached datasets
|
| 108 |
+
- `/memory` - Show memory summary
|
| 109 |
+
- `/cleardata` - Clear all downloaded datasets
|
| 110 |
+
- `/quit` or `q` - Exit
|
| 111 |
+
|
| 112 |
+
### 2. Web Interface
|
| 113 |
+
A modern web-based chat interface with rendered plots and easier navigation.
|
| 114 |
+
|
| 115 |
+
```bash
|
| 116 |
+
python web/app.py
|
| 117 |
+
# or
|
| 118 |
+
eurus-web
|
| 119 |
+
```
|
| 120 |
+
Access the interface at `http://127.0.0.1:8000`.
|
| 121 |
+
|
| 122 |
+
### 3. MCP Server (for Claude / IDEs)
|
| 123 |
+
Integrate Eurus's capabilities directly into Claude Desktop or compatible IDEs using the Model Context Protocol.
|
| 124 |
+
|
| 125 |
+
**Configuration for Claude Desktop:**
|
| 126 |
+
Add the following to your `claude_desktop_config.json`:
|
| 127 |
+
|
| 128 |
+
```json
|
| 129 |
+
{
|
| 130 |
+
"mcpServers": {
|
| 131 |
+
"eurus": {
|
| 132 |
+
"command": "python",
|
| 133 |
+
"args": ["-m", "eurus.server"],
|
| 134 |
+
"env": {
|
| 135 |
+
"ARRAYLAKE_API_KEY": "your_key_here",
|
| 136 |
+
"PYTHONPATH": "/absolute/path/to/era_5_agent/src"
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
}
|
| 140 |
+
}
|
| 141 |
+
```
|
| 142 |
+
|
| 143 |
+
Or run directly for testing:
|
| 144 |
+
```bash
|
| 145 |
+
python -m eurus.server
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
---
|
| 149 |
+
|
| 150 |
+
## Example Queries
|
| 151 |
+
|
| 152 |
+
Eurus can answer questions like:
|
| 153 |
+
|
| 154 |
+
* **Data Retrieval:** "Show me the sea surface temperature off California for 2023."
|
| 155 |
+
* **Visualization:** "Plot a time series of temperature anomalies in the North Atlantic."
|
| 156 |
+
* **Comparison:** "Compare SST between El Niño region and the California coast."
|
| 157 |
+
* **Routing:** "Calculate a ship route from Rotterdam to Singapore with weather risk."
|
| 158 |
+
* **Custom Analysis:** "Use Python to calculate the monthly mean SST and plot it."
|
| 159 |
+
|
| 160 |
+
## Available Data
|
| 161 |
+
|
| 162 |
+
### Variables
|
| 163 |
+
| Variable | Description | Units |
|
| 164 |
+
|----------|-------------|-------|
|
| 165 |
+
| `sst` | Sea Surface Temperature | K |
|
| 166 |
+
| `t2` | 2m Air Temperature | K |
|
| 167 |
+
| `u10` | 10m U-Wind Component | m/s |
|
| 168 |
+
| `v10` | 10m V-Wind Component | m/s |
|
| 169 |
+
| `mslp` | Mean Sea Level Pressure | Pa |
|
| 170 |
+
| `sp` | Surface Pressure | Pa |
|
| 171 |
+
| `tcc` | Total Cloud Cover | 0-1 |
|
| 172 |
+
| `tp` | Total Precipitation | m |
|
| 173 |
+
|
| 174 |
+
### Predefined Regions
|
| 175 |
+
Eurus knows many regions by name, including:
|
| 176 |
+
- `north_atlantic`, `south_atlantic`
|
| 177 |
+
- `north_pacific`, `south_pacific`
|
| 178 |
+
- `california_coast`, `gulf_of_mexico`, `caribbean`
|
| 179 |
+
- `mediterranean`, `europe`, `asia_east`
|
| 180 |
+
- `arctic`, `antarctic`
|
| 181 |
+
- `nino34`, `nino3`, `nino4`
|
| 182 |
+
|
| 183 |
+
---
|
| 184 |
+
|
| 185 |
+
## Project Structure
|
| 186 |
+
|
| 187 |
+
```
|
| 188 |
+
era_5_agent/
|
| 189 |
+
├── main.py # CLI Entry Point
|
| 190 |
+
├── pyproject.toml # Project configuration
|
| 191 |
+
├── requirements.txt # Python dependencies
|
| 192 |
+
├── src/
|
| 193 |
+
│ └── eurus/
|
| 194 |
+
│ ├── config.py # Configuration & Constants
|
| 195 |
+
│ ├── memory.py # Persistent Memory System
|
| 196 |
+
│ ├── server.py # MCP Server Entry Point
|
| 197 |
+
│ └── tools/ # Agent Tools
|
| 198 |
+
│ ├── era5.py # Data Retrieval
|
| 199 |
+
│ ├── routing.py # Maritime Routing
|
| 200 |
+
│ └── analysis_guide.py
|
| 201 |
+
├── web/ # Web Interface
|
| 202 |
+
│ ├── app.py # FastAPI Application
|
| 203 |
+
│ ├── routes/ # API & Page Routes
|
| 204 |
+
│ └── templates/ # HTML Templates
|
| 205 |
+
├── data/ # Data Storage (Local)
|
| 206 |
+
│ ├── plots/ # Generated Visualizations
|
| 207 |
+
│ └── *.zarr/ # Cached ERA5 Datasets
|
| 208 |
+
└── .memory/ # Agent Conversation History
|
| 209 |
+
```
|
| 210 |
+
|
| 211 |
+
## License
|
| 212 |
+
|
| 213 |
+
MIT License
|
| 214 |
+
|
| 215 |
+
---
|
| 216 |
+
|
| 217 |
+
<div align="center">
|
| 218 |
+
<p>Special thanks to the <b>Icechunk</b> and <b>Earthmover</b> teams for their pioneering work in cloud-native scientific data storage.</p>
|
| 219 |
+
</div>
|
assets/eurus_logo.jpeg
ADDED
|
Git LFS Details
|
assets/eurus_logo_neon.jpeg
ADDED
|
Git LFS Details
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ============================================================================
|
| 2 |
+
# Eurus — Docker Compose
|
| 3 |
+
# ============================================================================
|
| 4 |
+
# Usage:
|
| 5 |
+
# docker compose run --rm agent # interactive CLI
|
| 6 |
+
# docker compose up web # web UI on http://localhost:8000
|
| 7 |
+
# docker compose up web -d # web UI (detached)
|
| 8 |
+
# ============================================================================
|
| 9 |
+
|
| 10 |
+
services:
|
| 11 |
+
# ── Interactive CLI agent ──────────────────────────────────────────────
|
| 12 |
+
agent:
|
| 13 |
+
build:
|
| 14 |
+
context: .
|
| 15 |
+
target: agent
|
| 16 |
+
image: eurus-agent
|
| 17 |
+
env_file: .env
|
| 18 |
+
environment:
|
| 19 |
+
- EURUS_DOCKER=1
|
| 20 |
+
volumes:
|
| 21 |
+
- eurus-data:/app/data # persist downloaded datasets
|
| 22 |
+
- eurus-memory:/app/.memory # persist memory between runs
|
| 23 |
+
- eurus-plots:/app/data/plots # persist generated plots
|
| 24 |
+
stdin_open: true # -i (interactive)
|
| 25 |
+
tty: true # -t (terminal)
|
| 26 |
+
|
| 27 |
+
# ── Web interface (FastAPI + WebSocket) ────────────────────────────────
|
| 28 |
+
web:
|
| 29 |
+
build:
|
| 30 |
+
context: .
|
| 31 |
+
target: web
|
| 32 |
+
image: eurus-web
|
| 33 |
+
env_file: .env
|
| 34 |
+
environment:
|
| 35 |
+
- EURUS_DOCKER=1
|
| 36 |
+
ports:
|
| 37 |
+
- "8000:8000"
|
| 38 |
+
volumes:
|
| 39 |
+
- eurus-data:/app/data
|
| 40 |
+
- eurus-memory:/app/.memory
|
| 41 |
+
- eurus-plots:/app/data/plots
|
| 42 |
+
restart: unless-stopped
|
| 43 |
+
|
| 44 |
+
volumes:
|
| 45 |
+
eurus-data:
|
| 46 |
+
eurus-memory:
|
| 47 |
+
eurus-plots:
|
main.py
ADDED
|
@@ -0,0 +1,428 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Eurus - ERA5 Climate Analysis Agent
|
| 4 |
+
======================================
|
| 5 |
+
An intelligent oceanography and climate data analysis assistant.
|
| 6 |
+
|
| 7 |
+
Features:
|
| 8 |
+
- Persistent memory across sessions
|
| 9 |
+
- Cloud-optimized ERA5 data retrieval
|
| 10 |
+
- Interactive Python analysis with visualization
|
| 11 |
+
- Conversation history and context awareness
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
python main.py
|
| 15 |
+
|
| 16 |
+
Commands:
|
| 17 |
+
q, quit, exit - Exit the agent
|
| 18 |
+
/clear - Clear conversation history
|
| 19 |
+
/cache - List cached datasets
|
| 20 |
+
/memory - Show memory summary
|
| 21 |
+
/cleardata - Clear all downloaded ERA5 datasets
|
| 22 |
+
/help - Show help message
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
import os
|
| 26 |
+
import sys
|
| 27 |
+
import logging
|
| 28 |
+
import warnings
|
| 29 |
+
from pathlib import Path
|
| 30 |
+
from datetime import datetime
|
| 31 |
+
|
| 32 |
+
# Suppress noisy warnings from xarray/zarr
|
| 33 |
+
warnings.filterwarnings("ignore", category=FutureWarning)
|
| 34 |
+
warnings.filterwarnings("ignore", message="Consolidated metadata", category=UserWarning)
|
| 35 |
+
|
| 36 |
+
from dotenv import load_dotenv
|
| 37 |
+
|
| 38 |
+
# Load environment variables first
|
| 39 |
+
load_dotenv()
|
| 40 |
+
|
| 41 |
+
# Add src to path
|
| 42 |
+
PROJECT_ROOT = Path(__file__).parent
|
| 43 |
+
sys.path.insert(0, str(PROJECT_ROOT / "src"))
|
| 44 |
+
|
| 45 |
+
# Setup centralized logging
|
| 46 |
+
from eurus.logging_config import setup_logging, cleanup_old_logs
|
| 47 |
+
setup_logging(mode="cli")
|
| 48 |
+
cleanup_old_logs(keep=20)
|
| 49 |
+
|
| 50 |
+
logger = logging.getLogger(__name__)
|
| 51 |
+
|
| 52 |
+
# Import after logging is configured
|
| 53 |
+
from langchain_openai import ChatOpenAI
|
| 54 |
+
from langchain.agents import create_agent
|
| 55 |
+
|
| 56 |
+
from eurus.config import CONFIG, AGENT_SYSTEM_PROMPT, DATA_DIR, PLOTS_DIR
|
| 57 |
+
from eurus.memory import get_memory, MemoryManager
|
| 58 |
+
from eurus.tools import get_all_tools
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
# ============================================================================
|
| 62 |
+
# BANNER AND HELP
|
| 63 |
+
# ============================================================================
|
| 64 |
+
|
| 65 |
+
BANNER = """
|
| 66 |
+
╔═══════════════════════════════════════════════════════════════════════════╗
|
| 67 |
+
║ ║
|
| 68 |
+
║ ███████╗██╗ ██╗██████╗ ██╗ ██╗███████╗ ║
|
| 69 |
+
║ ██╔════╝██║ ██║██╔══██╗██║ ██║██╔════╝ ║
|
| 70 |
+
║ █████╗ ██║ ██║██████╔╝██║ ██║███████╗ ║
|
| 71 |
+
║ ██╔══╝ ██║ ██║██╔══██╗██║ ██║╚════██║ ║
|
| 72 |
+
║ ███████╗╚██████╔╝██║ ██║╚██████╔╝███████║ ║
|
| 73 |
+
║ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ ╚═════╝ ╚══════╝ ║
|
| 74 |
+
║ ║
|
| 75 |
+
║ AI Climate Physicist v2.0 ║
|
| 76 |
+
║ ───────────────────────────────────────── ║
|
| 77 |
+
║ ║
|
| 78 |
+
║ Scientific Capabilities: ║
|
| 79 |
+
║ • ERA5 reanalysis data retrieval (SST, wind, temperature, pressure) ║
|
| 80 |
+
║ • Climate Diagnostics: Anomalies, Z-Scores, Statistical Significance ║
|
| 81 |
+
║ • Pattern Discovery: EOF/PCA analysis for climate modes ║
|
| 82 |
+
║ • Compound Extremes: "Ocean Oven" detection (Heat + Stagnation) ║
|
| 83 |
+
║ • Trend Analysis: Decadal trends with p-value significance ║
|
| 84 |
+
║ • Teleconnections: Correlation and lead-lag analysis ║
|
| 85 |
+
║ • Maritime Routing & Lagrangian Risk Assessment ║
|
| 86 |
+
║ ║
|
| 87 |
+
║ Commands: /help, /clear, /cache, /memory, /quit ║
|
| 88 |
+
║ ║
|
| 89 |
+
╚═══════════════════════════════════════════════════════════════════════════╝
|
| 90 |
+
"""
|
| 91 |
+
|
| 92 |
+
HELP_TEXT = """
|
| 93 |
+
╔═══════════════════════════════════════════════════════════════════════════╗
|
| 94 |
+
║ EURUS HELP - AI Climate Physicist ║
|
| 95 |
+
╠═════════════════════════��═════════════════════════════════════════════════╣
|
| 96 |
+
║ ║
|
| 97 |
+
║ COMMANDS: ║
|
| 98 |
+
║ ───────────────────────────────────────────────────────────────────── ║
|
| 99 |
+
║ /help - Show this help message ║
|
| 100 |
+
║ /clear - Clear conversation history (fresh start) ║
|
| 101 |
+
║ /cache - List all cached ERA5 datasets ║
|
| 102 |
+
║ /memory - Show memory summary (datasets, analyses) ║
|
| 103 |
+
║ /cleardata - Clear all downloaded ERA5 datasets ║
|
| 104 |
+
║ /quit - Exit the agent (also: q, quit, exit) ║
|
| 105 |
+
║ ║
|
| 106 |
+
║ SCIENTIFIC ANALYSIS (Publication-Grade): ║
|
| 107 |
+
║ ───────────────────────────────────────────────────────────────────── ║
|
| 108 |
+
║ "Analyze marine heatwaves in the North Atlantic summer 2023" ║
|
| 109 |
+
║ "Find compound extremes where high SST coincides with low wind" ║
|
| 110 |
+
║ "Perform EOF analysis on SST anomalies to find climate modes" ║
|
| 111 |
+
║ "Calculate SST trends with statistical significance" ║
|
| 112 |
+
║ "Detect Ocean Ovens in the Mediterranean" ║
|
| 113 |
+
║ ║
|
| 114 |
+
║ SCIENCE TOOLS (The "Physics Brain"): ║
|
| 115 |
+
║ ───────────────────────────────────────────────────────────────────── ║
|
| 116 |
+
║ analyze_climate_modes_eof - Pattern discovery via EOF/PCA ║
|
| 117 |
+
║ detect_compound_extremes - "Ocean Oven" detection ║
|
| 118 |
+
║ calculate_climate_trends - Trends with p-value significance ║
|
| 119 |
+
║ detrend_climate_data - Remove warming trend for analysis ║
|
| 120 |
+
║ detect_percentile_extremes - Percentile-based extreme detection ║
|
| 121 |
+
║ fetch_climate_index - NOAA indices (Nino3.4, NAO, PDO, AMO) ║
|
| 122 |
+
║ calculate_return_periods - GEV/EVT (1-in-100 year events) ║
|
| 123 |
+
║ analyze_granger_causality - Prove X causes Y (not just correlated) ║
|
| 124 |
+
║ ║
|
| 125 |
+
║ AVAILABLE VARIABLES: ║
|
| 126 |
+
║ ───────────────────────────────────────────────────────────────────── ║
|
| 127 |
+
║ sst - Sea Surface Temperature (K) ║
|
| 128 |
+
║ t2 - 2m Air Temperature (K) ║
|
| 129 |
+
║ u10 - 10m U-Wind Component (m/s) ║
|
| 130 |
+
║ v10 - 10m V-Wind Component (m/s) ║
|
| 131 |
+
║ mslp - Mean Sea Level Pressure (Pa) ║
|
| 132 |
+
║ tcc - Total Cloud Cover (0-1) ║
|
| 133 |
+
║ tp - Total Precipitation (m) ║
|
| 134 |
+
║ ║
|
| 135 |
+
║ PREDEFINED REGIONS: ║
|
| 136 |
+
║ ───────────────────────────────────────────────────────────────────── ║
|
| 137 |
+
║ north_atlantic, north_pacific, california_coast, mediterranean ║
|
| 138 |
+
║ gulf_of_mexico, caribbean, nino34, nino3, nino4, arctic, antarctic ║
|
| 139 |
+
║ ║
|
| 140 |
+
║ SCIENTIFIC WORKFLOW: ║
|
| 141 |
+
║ ───────────────────────────────────────────────────────────────────── ║
|
| 142 |
+
║ 1. RETRIEVE data → 2. DIAGNOSE (Z-scores) → 3. DISCOVER (EOF) ║
|
| 143 |
+
║ 4. DETECT (extremes) → 5. ATTRIBUTE (correlation) → 6. VISUALIZE ║
|
| 144 |
+
║ ║
|
| 145 |
+
║ TIPS: ║
|
| 146 |
+
║ ───────────────────────────────────────────────────────────────────── ║
|
| 147 |
+
║ • Always report in anomalies/Z-scores, not raw values ║
|
| 148 |
+
║ • Z > 2σ means statistically significant extreme ║
|
| 149 |
+
║ • Use diverging colormaps (RdBu_r) centered at 0 for anomalies ║
|
| 150 |
+
║ • Add stippling for p < 0.05 significance ║
|
| 151 |
+
║ ║
|
| 152 |
+
╚═══════════════════════════════════════════════════════════════════════════╝
|
| 153 |
+
"""
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def clear_data_directory(data_dir: Path = None) -> tuple[int, float]:
|
| 158 |
+
"""
|
| 159 |
+
Remove all downloaded ERA5 datasets (zarr directories) from the data folder.
|
| 160 |
+
|
| 161 |
+
Args:
|
| 162 |
+
data_dir: Data directory path. Defaults to DATA_DIR from config.
|
| 163 |
+
|
| 164 |
+
Returns:
|
| 165 |
+
Tuple of (datasets_removed, total_size_mb_freed)
|
| 166 |
+
"""
|
| 167 |
+
import shutil
|
| 168 |
+
|
| 169 |
+
if data_dir is None:
|
| 170 |
+
data_dir = DATA_DIR
|
| 171 |
+
|
| 172 |
+
datasets_removed = 0
|
| 173 |
+
total_bytes = 0
|
| 174 |
+
|
| 175 |
+
if not data_dir.exists():
|
| 176 |
+
return 0, 0.0
|
| 177 |
+
|
| 178 |
+
# Find and remove all .zarr directories
|
| 179 |
+
for zarr_dir in data_dir.glob('*.zarr'):
|
| 180 |
+
if zarr_dir.is_dir():
|
| 181 |
+
# Calculate size before removing
|
| 182 |
+
dir_size = sum(f.stat().st_size for f in zarr_dir.rglob('*') if f.is_file())
|
| 183 |
+
total_bytes += dir_size
|
| 184 |
+
shutil.rmtree(zarr_dir)
|
| 185 |
+
datasets_removed += 1
|
| 186 |
+
logger.debug(f"Removed dataset: {zarr_dir}")
|
| 187 |
+
|
| 188 |
+
total_mb = total_bytes / (1024 * 1024)
|
| 189 |
+
return datasets_removed, total_mb
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
# ============================================================================
|
| 193 |
+
# COMMAND HANDLERS
|
| 194 |
+
# ============================================================================
|
| 195 |
+
|
| 196 |
+
def handle_command(command: str, memory: MemoryManager) -> tuple[bool, str]:
|
| 197 |
+
"""
|
| 198 |
+
Handle slash commands.
|
| 199 |
+
|
| 200 |
+
Returns:
|
| 201 |
+
(should_continue, response_message)
|
| 202 |
+
"""
|
| 203 |
+
cmd = command.lower().strip()
|
| 204 |
+
|
| 205 |
+
if cmd in ('/quit', '/exit', '/q', 'quit', 'exit', 'q'):
|
| 206 |
+
return False, "Goodbye! Your conversation has been saved."
|
| 207 |
+
|
| 208 |
+
elif cmd == '/help':
|
| 209 |
+
return True, HELP_TEXT
|
| 210 |
+
|
| 211 |
+
elif cmd == '/clear':
|
| 212 |
+
memory.clear_conversation()
|
| 213 |
+
return True, "Conversation history cleared. Starting fresh!"
|
| 214 |
+
|
| 215 |
+
elif cmd == '/cache':
|
| 216 |
+
cache_info = memory.list_datasets()
|
| 217 |
+
return True, f"\n{cache_info}\n"
|
| 218 |
+
|
| 219 |
+
elif cmd == '/memory':
|
| 220 |
+
summary = memory.get_context_summary()
|
| 221 |
+
datasets = len([p for p in memory.datasets if os.path.exists(p)])
|
| 222 |
+
analyses = len(memory.analyses)
|
| 223 |
+
convos = len(memory.conversations)
|
| 224 |
+
|
| 225 |
+
response = f"""
|
| 226 |
+
╔═══════════════════════════════════════════════════════════════════════════╗
|
| 227 |
+
║ MEMORY SUMMARY ║
|
| 228 |
+
╠═══════════════════════════════════════════════════════════════════════════╣
|
| 229 |
+
║ Conversation messages: {convos:<5} ║
|
| 230 |
+
║ Cached datasets: {datasets:<5} ║
|
| 231 |
+
║ Recorded analyses: {analyses:<5} ║
|
| 232 |
+
╚═══════════════════════════════════════════════════════════════════════════╝
|
| 233 |
+
|
| 234 |
+
{summary}
|
| 235 |
+
"""
|
| 236 |
+
return True, response
|
| 237 |
+
|
| 238 |
+
elif cmd == '/cleardata':
|
| 239 |
+
datasets_removed, size_freed = clear_data_directory(DATA_DIR)
|
| 240 |
+
# Also clear memory references
|
| 241 |
+
memory.datasets.clear()
|
| 242 |
+
memory._save_datasets()
|
| 243 |
+
response = f"""
|
| 244 |
+
╔═══════════════════════════════════════════════════════════════════════════╗
|
| 245 |
+
║ ERA5 DATA CLEARED ║
|
| 246 |
+
╠═══════════════════════════════════════════════════════════════════════════╣
|
| 247 |
+
║ Datasets removed: {datasets_removed:<5} ║
|
| 248 |
+
║ Space freed: {size_freed:>8.2f} MB ║
|
| 249 |
+
╚═══════════════════════════════════════════════════════════════════════════╝
|
| 250 |
+
"""
|
| 251 |
+
return True, response
|
| 252 |
+
|
| 253 |
+
elif cmd.startswith('/'):
|
| 254 |
+
return True, f"Unknown command: {cmd}\nType /help for available commands."
|
| 255 |
+
|
| 256 |
+
return True, None # Not a command
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
# ============================================================================
|
| 260 |
+
# CALLBACK FOR TOOL PROGRESS
|
| 261 |
+
# ============================================================================
|
| 262 |
+
|
| 263 |
+
from langchain_core.callbacks import BaseCallbackHandler
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
class ToolProgressCallback(BaseCallbackHandler):
|
| 267 |
+
"""Print tool calls in real-time during agent execution."""
|
| 268 |
+
def on_tool_start(self, serialized, input_str, **kwargs):
|
| 269 |
+
tool_name = serialized.get('name', kwargs.get('name', 'unknown'))
|
| 270 |
+
print(f"🔧 Calling: {tool_name}...", flush=True)
|
| 271 |
+
|
| 272 |
+
def on_tool_end(self, output, name=None, **kwargs):
|
| 273 |
+
display_name = name or "tool"
|
| 274 |
+
print(f" ✓ {display_name} done", flush=True)
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
# ============================================================================
|
| 278 |
+
# MAIN AGENT LOOP
|
| 279 |
+
# ============================================================================
|
| 280 |
+
|
| 281 |
+
def main():
|
| 282 |
+
"""Main entry point for the Eurus agent."""
|
| 283 |
+
|
| 284 |
+
# Print banner
|
| 285 |
+
print(BANNER)
|
| 286 |
+
|
| 287 |
+
# Check for required API keys
|
| 288 |
+
if not os.environ.get("ARRAYLAKE_API_KEY"):
|
| 289 |
+
print("ERROR: ARRAYLAKE_API_KEY not found in environment.")
|
| 290 |
+
print("Please add it to your .env file:")
|
| 291 |
+
print(" ARRAYLAKE_API_KEY=your_api_key_here")
|
| 292 |
+
sys.exit(1)
|
| 293 |
+
|
| 294 |
+
if not os.environ.get("OPENAI_API_KEY"):
|
| 295 |
+
print("ERROR: OPENAI_API_KEY not found in environment.")
|
| 296 |
+
print("Please add it to your .env file:")
|
| 297 |
+
print(" OPENAI_API_KEY=your_api_key_here")
|
| 298 |
+
sys.exit(1)
|
| 299 |
+
|
| 300 |
+
# Initialize memory
|
| 301 |
+
print("Initializing memory system...")
|
| 302 |
+
memory = get_memory()
|
| 303 |
+
|
| 304 |
+
# Load recent conversation context
|
| 305 |
+
recent_messages = memory.get_langchain_messages(n_messages=10)
|
| 306 |
+
logger.info(f"Loaded {len(recent_messages)} messages from history")
|
| 307 |
+
|
| 308 |
+
# Initialize tools
|
| 309 |
+
print("Starting Python kernel...")
|
| 310 |
+
|
| 311 |
+
# All capabilities enabled by default (including maritime routing)
|
| 312 |
+
tools = get_all_tools(enable_routing=True, enable_guide=True)
|
| 313 |
+
logger.info(f"Loaded {len(tools)} tools")
|
| 314 |
+
|
| 315 |
+
# Initialize LLM
|
| 316 |
+
print("Connecting to LLM...")
|
| 317 |
+
llm = ChatOpenAI(
|
| 318 |
+
model=CONFIG.model_name,
|
| 319 |
+
temperature=CONFIG.temperature,
|
| 320 |
+
streaming=True # Enable streaming for real-time output
|
| 321 |
+
)
|
| 322 |
+
|
| 323 |
+
# Create enhanced system prompt with context
|
| 324 |
+
context_summary = memory.get_context_summary()
|
| 325 |
+
enhanced_prompt = AGENT_SYSTEM_PROMPT
|
| 326 |
+
|
| 327 |
+
if context_summary and context_summary != "No context available.":
|
| 328 |
+
enhanced_prompt += f"\n\n## CURRENT CONTEXT\n{context_summary}"
|
| 329 |
+
|
| 330 |
+
# Create agent
|
| 331 |
+
print("Creating agent...")
|
| 332 |
+
agent = create_agent(
|
| 333 |
+
model=llm,
|
| 334 |
+
tools=tools,
|
| 335 |
+
system_prompt=enhanced_prompt,
|
| 336 |
+
debug=False
|
| 337 |
+
)
|
| 338 |
+
|
| 339 |
+
# Initialize messages with history
|
| 340 |
+
messages = recent_messages.copy()
|
| 341 |
+
|
| 342 |
+
print("\n" + "=" * 75)
|
| 343 |
+
print("READY! Type your question or /help for commands.")
|
| 344 |
+
print("=" * 75 + "\n")
|
| 345 |
+
|
| 346 |
+
# Main interaction loop
|
| 347 |
+
try:
|
| 348 |
+
while True:
|
| 349 |
+
# Get user input
|
| 350 |
+
try:
|
| 351 |
+
user_input = input(">> You: ").strip()
|
| 352 |
+
except EOFError:
|
| 353 |
+
break
|
| 354 |
+
|
| 355 |
+
if not user_input:
|
| 356 |
+
continue
|
| 357 |
+
|
| 358 |
+
# Handle commands
|
| 359 |
+
should_continue, response = handle_command(user_input, memory)
|
| 360 |
+
|
| 361 |
+
if response:
|
| 362 |
+
print(response)
|
| 363 |
+
|
| 364 |
+
if not should_continue:
|
| 365 |
+
break
|
| 366 |
+
|
| 367 |
+
if response: # Command was handled, skip agent
|
| 368 |
+
continue
|
| 369 |
+
|
| 370 |
+
# Save user message to memory
|
| 371 |
+
memory.add_message("user", user_input)
|
| 372 |
+
messages.append({"role": "user", "content": user_input})
|
| 373 |
+
|
| 374 |
+
# Get agent response
|
| 375 |
+
print("\nThinking...\n")
|
| 376 |
+
|
| 377 |
+
try:
|
| 378 |
+
print("\n" + "─" * 75)
|
| 379 |
+
|
| 380 |
+
# Use invoke() with callback handler for real-time tool progress
|
| 381 |
+
config = {"recursion_limit": 35, "callbacks": [ToolProgressCallback()]}
|
| 382 |
+
result = agent.invoke({"messages": messages}, config=config)
|
| 383 |
+
|
| 384 |
+
# Update messages from result (keep as LangChain messages)
|
| 385 |
+
messages = list(result["messages"])
|
| 386 |
+
last_message = messages[-1]
|
| 387 |
+
|
| 388 |
+
if hasattr(last_message, 'content') and last_message.content:
|
| 389 |
+
response_text = last_message.content
|
| 390 |
+
elif isinstance(last_message, dict) and last_message.get('content'):
|
| 391 |
+
response_text = last_message['content']
|
| 392 |
+
else:
|
| 393 |
+
response_text = str(last_message)
|
| 394 |
+
|
| 395 |
+
print(f"\n📝 Eurus:\n{response_text}", flush=True)
|
| 396 |
+
print("─" * 75 + "\n")
|
| 397 |
+
memory.add_message("assistant", response_text)
|
| 398 |
+
|
| 399 |
+
except KeyboardInterrupt:
|
| 400 |
+
print("\n\nInterrupted. Type /quit to exit or continue with a new question.")
|
| 401 |
+
|
| 402 |
+
except Exception as e:
|
| 403 |
+
error_msg = f"Error: {str(e)}"
|
| 404 |
+
logger.error(error_msg, exc_info=True)
|
| 405 |
+
print(f"\nError during processing: {error_msg}")
|
| 406 |
+
print("Please try again or rephrase your question.\n")
|
| 407 |
+
|
| 408 |
+
except KeyboardInterrupt:
|
| 409 |
+
print("\n\nReceived interrupt signal.")
|
| 410 |
+
|
| 411 |
+
finally:
|
| 412 |
+
# Cleanup
|
| 413 |
+
print("\nShutting down...")
|
| 414 |
+
|
| 415 |
+
# Clean up missing dataset records
|
| 416 |
+
removed = memory.cleanup_missing_datasets()
|
| 417 |
+
if removed:
|
| 418 |
+
logger.info(f"Cleaned up {removed} missing dataset records")
|
| 419 |
+
|
| 420 |
+
print("Session saved. Goodbye!")
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
# ============================================================================
|
| 424 |
+
# ENTRY POINT
|
| 425 |
+
# ============================================================================
|
| 426 |
+
|
| 427 |
+
if __name__ == "__main__":
|
| 428 |
+
main()
|
pyproject.toml
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["hatchling"]
|
| 3 |
+
build-backend = "hatchling.build"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "eurus"
|
| 7 |
+
version = "1.0.0"
|
| 8 |
+
description = "Eurus Climate Agent - Access ERA5 reanalysis data through Model Context Protocol"
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
license = {text = "MIT"}
|
| 11 |
+
requires-python = ">=3.10"
|
| 12 |
+
authors = [
|
| 13 |
+
{name = "Eurus Team", email = "eurus@example.com"}
|
| 14 |
+
]
|
| 15 |
+
keywords = [
|
| 16 |
+
"era5",
|
| 17 |
+
"climate",
|
| 18 |
+
"mcp",
|
| 19 |
+
"model-context-protocol",
|
| 20 |
+
"oceanography",
|
| 21 |
+
"reanalysis",
|
| 22 |
+
"weather",
|
| 23 |
+
"xarray",
|
| 24 |
+
"zarr"
|
| 25 |
+
]
|
| 26 |
+
classifiers = [
|
| 27 |
+
"Development Status :: 4 - Beta",
|
| 28 |
+
"Environment :: Console",
|
| 29 |
+
"Intended Audience :: Science/Research",
|
| 30 |
+
"License :: OSI Approved :: MIT License",
|
| 31 |
+
"Operating System :: OS Independent",
|
| 32 |
+
"Programming Language :: Python :: 3",
|
| 33 |
+
"Programming Language :: Python :: 3.10",
|
| 34 |
+
"Programming Language :: Python :: 3.11",
|
| 35 |
+
"Programming Language :: Python :: 3.12",
|
| 36 |
+
"Topic :: Scientific/Engineering :: Atmospheric Science",
|
| 37 |
+
"Topic :: Scientific/Engineering :: GIS",
|
| 38 |
+
]
|
| 39 |
+
|
| 40 |
+
dependencies = [
|
| 41 |
+
"mcp>=1.0.0",
|
| 42 |
+
"arraylake>=0.10.0",
|
| 43 |
+
"xarray>=2024.10.0",
|
| 44 |
+
"zarr>=3.0.0",
|
| 45 |
+
"pandas>=2.0.0",
|
| 46 |
+
"numpy>=1.24.0",
|
| 47 |
+
"pydantic>=2.0.0",
|
| 48 |
+
"python-dotenv>=1.0.0",
|
| 49 |
+
]
|
| 50 |
+
|
| 51 |
+
[project.optional-dependencies]
|
| 52 |
+
agent = [
|
| 53 |
+
"langchain>=0.3.0",
|
| 54 |
+
"langchain-openai>=0.2.0",
|
| 55 |
+
"langchain-core>=0.3.0",
|
| 56 |
+
"openai>=1.0.0",
|
| 57 |
+
"jupyter_client>=8.0.0",
|
| 58 |
+
"ipykernel>=6.0.0",
|
| 59 |
+
"matplotlib>=3.7.0",
|
| 60 |
+
"scipy>=1.10.0",
|
| 61 |
+
"seaborn>=0.12.0",
|
| 62 |
+
]
|
| 63 |
+
web = [
|
| 64 |
+
"fastapi>=0.109.0",
|
| 65 |
+
"uvicorn[standard]>=0.27.0",
|
| 66 |
+
"jinja2>=3.1.0",
|
| 67 |
+
"python-multipart>=0.0.6",
|
| 68 |
+
"websockets>=12.0",
|
| 69 |
+
]
|
| 70 |
+
dev = [
|
| 71 |
+
"pytest>=7.0.0",
|
| 72 |
+
"pytest-asyncio>=0.21.0",
|
| 73 |
+
"pytest-cov>=4.0.0",
|
| 74 |
+
"black>=23.0.0",
|
| 75 |
+
"ruff>=0.1.0",
|
| 76 |
+
"mypy>=1.0.0",
|
| 77 |
+
"pre-commit>=3.0.0",
|
| 78 |
+
]
|
| 79 |
+
docs = [
|
| 80 |
+
"mkdocs>=1.5.0",
|
| 81 |
+
"mkdocs-material>=9.0.0",
|
| 82 |
+
"mkdocstrings[python]>=0.24.0",
|
| 83 |
+
]
|
| 84 |
+
|
| 85 |
+
[project.urls]
|
| 86 |
+
Homepage = "https://github.com/yourusername/era5-mcp"
|
| 87 |
+
Documentation = "https://github.com/yourusername/era5-mcp#readme"
|
| 88 |
+
Repository = "https://github.com/yourusername/era5-mcp"
|
| 89 |
+
Issues = "https://github.com/yourusername/era5-mcp/issues"
|
| 90 |
+
|
| 91 |
+
[project.scripts]
|
| 92 |
+
eurus-mcp = "eurus.server:main"
|
| 93 |
+
eurus-agent = "eurus.agent:main"
|
| 94 |
+
eurus-web = "web.app:main"
|
| 95 |
+
|
| 96 |
+
[tool.hatch.build.targets.wheel]
|
| 97 |
+
packages = ["src/eurus"]
|
| 98 |
+
|
| 99 |
+
[tool.hatch.build.targets.sdist]
|
| 100 |
+
include = [
|
| 101 |
+
"/src",
|
| 102 |
+
"/tests",
|
| 103 |
+
"/README.md",
|
| 104 |
+
"/LICENSE",
|
| 105 |
+
]
|
| 106 |
+
|
| 107 |
+
[tool.black]
|
| 108 |
+
line-length = 100
|
| 109 |
+
target-version = ['py310', 'py311', 'py312']
|
| 110 |
+
|
| 111 |
+
[tool.ruff]
|
| 112 |
+
line-length = 100
|
| 113 |
+
select = [
|
| 114 |
+
"E", # pycodestyle errors
|
| 115 |
+
"W", # pycodestyle warnings
|
| 116 |
+
"F", # pyflakes
|
| 117 |
+
"I", # isort
|
| 118 |
+
"B", # flake8-bugbear
|
| 119 |
+
"C4", # flake8-comprehensions
|
| 120 |
+
"UP", # pyupgrade
|
| 121 |
+
]
|
| 122 |
+
ignore = [
|
| 123 |
+
"E501", # line too long (handled by black)
|
| 124 |
+
"B008", # do not perform function calls in argument defaults
|
| 125 |
+
]
|
| 126 |
+
|
| 127 |
+
[tool.mypy]
|
| 128 |
+
python_version = "3.10"
|
| 129 |
+
warn_return_any = true
|
| 130 |
+
warn_unused_configs = true
|
| 131 |
+
disallow_untyped_defs = true
|
| 132 |
+
|
| 133 |
+
[tool.pytest.ini_options]
|
| 134 |
+
asyncio_mode = "auto"
|
| 135 |
+
testpaths = ["tests"]
|
| 136 |
+
addopts = "-v --cov=src/eurus --cov-report=term-missing"
|
| 137 |
+
|
| 138 |
+
[tool.coverage.run]
|
| 139 |
+
source = ["src/eurus"]
|
| 140 |
+
branch = true
|
| 141 |
+
|
| 142 |
+
[tool.coverage.report]
|
| 143 |
+
exclude_lines = [
|
| 144 |
+
"pragma: no cover",
|
| 145 |
+
"def __repr__",
|
| 146 |
+
"raise AssertionError",
|
| 147 |
+
"raise NotImplementedError",
|
| 148 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ERA5 Agent Dependencies
|
| 2 |
+
# =======================
|
| 3 |
+
|
| 4 |
+
# LangChain (Agent Framework)
|
| 5 |
+
langchain>=0.3.0
|
| 6 |
+
langchain-openai>=0.2.0
|
| 7 |
+
langchain-core>=0.3.0
|
| 8 |
+
|
| 9 |
+
# OpenAI
|
| 10 |
+
openai>=1.0.0
|
| 11 |
+
|
| 12 |
+
# Data Access
|
| 13 |
+
arraylake>=0.10.0
|
| 14 |
+
icechunk>=0.1.0
|
| 15 |
+
|
| 16 |
+
# Scientific Computing
|
| 17 |
+
xarray>=2024.10.0
|
| 18 |
+
zarr>=3.0.0
|
| 19 |
+
pandas>=2.0.0
|
| 20 |
+
numpy>=1.24.0
|
| 21 |
+
scipy>=1.10.0
|
| 22 |
+
scikit-learn>=1.3.0 # For EOF/PCA climate pattern analysis
|
| 23 |
+
statsmodels>=0.14.0 # For Granger Causality & trend analysis
|
| 24 |
+
bottleneck>=1.3.0 # Fast rolling windows for time series
|
| 25 |
+
|
| 26 |
+
# Visualization
|
| 27 |
+
matplotlib>=3.7.0
|
| 28 |
+
seaborn>=0.12.0
|
| 29 |
+
geopandas
|
| 30 |
+
|
| 31 |
+
# Validation & Config
|
| 32 |
+
pydantic>=2.0.0
|
| 33 |
+
python-dotenv>=1.0.0
|
| 34 |
+
|
| 35 |
+
# Jupyter Kernel
|
| 36 |
+
jupyter_client>=8.0.0
|
| 37 |
+
ipykernel>=6.0.0
|
| 38 |
+
|
| 39 |
+
# MCP Server
|
| 40 |
+
mcp>=1.0.0
|
| 41 |
+
|
| 42 |
+
# Maritime Routing (Optional Extended Features)
|
| 43 |
+
scgraph>=1.0.0
|
| 44 |
+
global-land-mask>=1.0.0
|
| 45 |
+
|
| 46 |
+
#Web
|
| 47 |
+
fastapi
|
| 48 |
+
uvicorn[standard]
|
| 49 |
+
websockets
|
| 50 |
+
jinja2
|
requirements_full.txt
ADDED
|
@@ -0,0 +1,190 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Eurus Environment - Thu Feb 12 22:56:45 CET 2026
|
| 2 |
+
# Python: Python 3.12.2
|
| 3 |
+
|
| 4 |
+
affine==2.4.0
|
| 5 |
+
aiohappyeyeballs==2.6.1
|
| 6 |
+
aiohttp==3.13.3
|
| 7 |
+
aiosignal==1.4.0
|
| 8 |
+
annotated-doc==0.0.4
|
| 9 |
+
annotated-types==0.7.0
|
| 10 |
+
anyio==4.12.1
|
| 11 |
+
appnope==0.1.4
|
| 12 |
+
arraylake==0.28.1
|
| 13 |
+
asttokens==3.0.1
|
| 14 |
+
attrs==25.4.0
|
| 15 |
+
cachetools==7.0.0
|
| 16 |
+
cachey==0.2.1
|
| 17 |
+
Cartopy==0.25.0
|
| 18 |
+
certifi==2026.1.4
|
| 19 |
+
cf_xarray==0.10.11
|
| 20 |
+
cffi==2.0.0
|
| 21 |
+
cftime==1.6.5
|
| 22 |
+
charset-normalizer==3.4.4
|
| 23 |
+
click==8.3.1
|
| 24 |
+
cligj==0.7.2
|
| 25 |
+
cloudpickle==3.1.2
|
| 26 |
+
cmocean==4.0.3
|
| 27 |
+
colorcet==3.1.0
|
| 28 |
+
comm==0.2.3
|
| 29 |
+
contourpy==1.3.3
|
| 30 |
+
coverage==7.13.2
|
| 31 |
+
cryptography==46.0.4
|
| 32 |
+
cycler==0.12.1
|
| 33 |
+
dask==2026.1.2
|
| 34 |
+
datashader==0.18.2
|
| 35 |
+
debugpy==1.8.19
|
| 36 |
+
decorator==5.2.1
|
| 37 |
+
distro==1.9.0
|
| 38 |
+
dnspython==2.8.0
|
| 39 |
+
donfig==0.8.1.post1
|
| 40 |
+
email-validator==2.3.0
|
| 41 |
+
executing==2.2.1
|
| 42 |
+
fastapi==0.128.0
|
| 43 |
+
fonttools==4.61.1
|
| 44 |
+
frozenlist==1.8.0
|
| 45 |
+
fsspec==2026.2.0
|
| 46 |
+
geographiclib==2.1
|
| 47 |
+
geopandas==1.1.2
|
| 48 |
+
geopy==2.4.1
|
| 49 |
+
global-land-mask==1.0.0
|
| 50 |
+
google-crc32c==1.8.0
|
| 51 |
+
h11==0.16.0
|
| 52 |
+
HeapDict==1.0.1
|
| 53 |
+
httpcore==1.0.9
|
| 54 |
+
httptools==0.7.1
|
| 55 |
+
httpx==0.27.2
|
| 56 |
+
httpx-sse==0.4.3
|
| 57 |
+
icechunk==1.1.17
|
| 58 |
+
idna==3.11
|
| 59 |
+
iniconfig==2.3.0
|
| 60 |
+
ipykernel==7.1.0
|
| 61 |
+
ipython==9.9.0
|
| 62 |
+
ipython_pygments_lexers==1.1.1
|
| 63 |
+
jedi==0.19.2
|
| 64 |
+
Jinja2==3.1.6
|
| 65 |
+
jiter==0.12.0
|
| 66 |
+
joblib==1.5.3
|
| 67 |
+
jsonpatch==1.33
|
| 68 |
+
jsonpointer==3.0.0
|
| 69 |
+
jsonschema==4.26.0
|
| 70 |
+
jsonschema-specifications==2025.9.1
|
| 71 |
+
jupyter_client==8.8.0
|
| 72 |
+
jupyter_core==5.9.1
|
| 73 |
+
kiwisolver==1.4.9
|
| 74 |
+
langchain==1.2.7
|
| 75 |
+
langchain-core==1.2.7
|
| 76 |
+
langchain-openai==1.1.7
|
| 77 |
+
langgraph==1.0.7
|
| 78 |
+
langgraph-checkpoint==4.0.0
|
| 79 |
+
langgraph-prebuilt==1.0.7
|
| 80 |
+
langgraph-sdk==0.3.3
|
| 81 |
+
langsmith==0.6.6
|
| 82 |
+
llvmlite==0.46.0
|
| 83 |
+
locket==1.0.0
|
| 84 |
+
markdown-it-py==4.0.0
|
| 85 |
+
MarkupSafe==3.0.3
|
| 86 |
+
matplotlib==3.10.8
|
| 87 |
+
matplotlib-inline==0.2.1
|
| 88 |
+
mcp==1.26.0
|
| 89 |
+
mdurl==0.1.2
|
| 90 |
+
morecantile==7.0.3
|
| 91 |
+
multidict==6.7.1
|
| 92 |
+
multipledispatch==1.0.0
|
| 93 |
+
nest-asyncio==1.6.0
|
| 94 |
+
numba==0.63.1
|
| 95 |
+
numba_celltree==0.4.1
|
| 96 |
+
numbagg==0.9.4
|
| 97 |
+
numcodecs==0.16.5
|
| 98 |
+
numpy==2.3.5
|
| 99 |
+
openai==2.16.0
|
| 100 |
+
orjson==3.11.5
|
| 101 |
+
ormsgpack==1.12.2
|
| 102 |
+
packaging==25.0
|
| 103 |
+
pandas==3.0.0
|
| 104 |
+
param==2.3.2
|
| 105 |
+
parso==0.8.5
|
| 106 |
+
partd==1.4.2
|
| 107 |
+
pexpect==4.9.0
|
| 108 |
+
pillow==12.1.0
|
| 109 |
+
platformdirs==4.5.1
|
| 110 |
+
pluggy==1.6.0
|
| 111 |
+
pooch==1.9.0
|
| 112 |
+
prompt_toolkit==3.0.52
|
| 113 |
+
propcache==0.4.1
|
| 114 |
+
psutil==7.2.2
|
| 115 |
+
ptyprocess==0.7.0
|
| 116 |
+
pure_eval==0.2.3
|
| 117 |
+
pycparser==3.0
|
| 118 |
+
pyct==0.6.0
|
| 119 |
+
pydantic==2.12.5
|
| 120 |
+
pydantic-settings==2.12.0
|
| 121 |
+
pydantic-xml==2.18.0
|
| 122 |
+
pydantic_core==2.41.5
|
| 123 |
+
Pygments==2.19.2
|
| 124 |
+
PyJWT==2.10.1
|
| 125 |
+
PyMuPDF==1.26.7
|
| 126 |
+
pyogrio==0.12.1
|
| 127 |
+
pyparsing==3.3.2
|
| 128 |
+
pyproj==3.7.2
|
| 129 |
+
pyshp==3.0.3
|
| 130 |
+
PySide6==6.10.1
|
| 131 |
+
PySide6_Addons==6.10.1
|
| 132 |
+
PySide6_Essentials==6.10.1
|
| 133 |
+
pytest==9.0.2
|
| 134 |
+
pytest-cov==7.0.0
|
| 135 |
+
python-dateutil==2.9.0.post0
|
| 136 |
+
python-dotenv==1.2.1
|
| 137 |
+
python-multipart==0.0.22
|
| 138 |
+
PyYAML==6.0.3
|
| 139 |
+
pyzmq==27.1.0
|
| 140 |
+
rasterio==1.5.0
|
| 141 |
+
rasterix==0.2.0
|
| 142 |
+
referencing==0.37.0
|
| 143 |
+
regex==2026.1.15
|
| 144 |
+
requests==2.32.5
|
| 145 |
+
requests-toolbelt==1.0.0
|
| 146 |
+
rich==14.3.1
|
| 147 |
+
rioxarray==0.21.0
|
| 148 |
+
rpds-py==0.30.0
|
| 149 |
+
ruamel.yaml==0.19.1
|
| 150 |
+
scgraph==2.15.0
|
| 151 |
+
scikit-learn==1.8.0
|
| 152 |
+
scipy==1.17.0
|
| 153 |
+
seaborn==0.13.2
|
| 154 |
+
shapely==2.1.2
|
| 155 |
+
shellingham==1.5.4
|
| 156 |
+
shiboken6==6.10.1
|
| 157 |
+
six==1.17.0
|
| 158 |
+
sniffio==1.3.1
|
| 159 |
+
sse-starlette==3.2.0
|
| 160 |
+
stack-data==0.6.3
|
| 161 |
+
starlette==0.50.0
|
| 162 |
+
structlog==25.5.0
|
| 163 |
+
tenacity==9.1.2
|
| 164 |
+
term-image==0.7.2
|
| 165 |
+
threadpoolctl==3.6.0
|
| 166 |
+
tiktoken==0.12.0
|
| 167 |
+
toolz==1.1.0
|
| 168 |
+
tornado==6.5.4
|
| 169 |
+
tqdm==4.67.1
|
| 170 |
+
traitlets==5.14.3
|
| 171 |
+
triangle==20250106
|
| 172 |
+
typer==0.21.1
|
| 173 |
+
typing-inspection==0.4.2
|
| 174 |
+
typing_extensions==4.15.0
|
| 175 |
+
urllib3==2.6.3
|
| 176 |
+
uuid_utils==0.14.0
|
| 177 |
+
uvicorn==0.40.0
|
| 178 |
+
uvloop==0.22.1
|
| 179 |
+
-e git+https://github.com/dmpantiu/Eurus.git@9a6d481226f01ea0cc61969659907827cc0933d1#egg=vostok
|
| 180 |
+
watchfiles==1.1.1
|
| 181 |
+
wcwidth==0.5.0
|
| 182 |
+
websockets==16.0
|
| 183 |
+
xarray==2025.11.0
|
| 184 |
+
xproj==0.2.1
|
| 185 |
+
xpublish==0.4.2
|
| 186 |
+
xpublish-tiles==0.4.0
|
| 187 |
+
xxhash==3.6.0
|
| 188 |
+
yarl==1.22.0
|
| 189 |
+
zarr==3.1.5
|
| 190 |
+
zstandard==0.25.0
|
scripts/qa_image_review.py
ADDED
|
@@ -0,0 +1,369 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
QA Image Reviewer — Uses Gemini 3 Pro Preview (Vertex AI Express) to review
|
| 4 |
+
all generated plots from a QA run and checks whether each plot matches its
|
| 5 |
+
task requirements.
|
| 6 |
+
|
| 7 |
+
Usage:
|
| 8 |
+
python scripts/qa_image_review.py [--run RUN_DIR] [--query N] [--output FILE]
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import argparse
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
import sys
|
| 15 |
+
import time
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
|
| 19 |
+
from dotenv import load_dotenv
|
| 20 |
+
from google import genai
|
| 21 |
+
from google.genai import types
|
| 22 |
+
|
| 23 |
+
# ── project root ────────────────────────────────────────────────────
|
| 24 |
+
PROJECT_ROOT = Path(__file__).resolve().parent.parent
|
| 25 |
+
load_dotenv(PROJECT_ROOT / ".env")
|
| 26 |
+
|
| 27 |
+
# ── Gemini Config ───────────────────────────────────────────────────
|
| 28 |
+
PRIMARY_MODEL = "gemini-3-pro-preview"
|
| 29 |
+
FALLBACK_MODEL = "gemini-2.0-flash"
|
| 30 |
+
|
| 31 |
+
# ── The query definitions (mirrored from qa_runner.py) ──────────────
|
| 32 |
+
QA_QUERIES = {
|
| 33 |
+
1: {"slug": "europe_heatwave_anomaly",
|
| 34 |
+
"task": "Spatial map of 2m temperature anomalies across Europe during June 2023 heatwave vs June 2022."},
|
| 35 |
+
2: {"slug": "storm_isha_mslp_wind",
|
| 36 |
+
"task": "MSLP isobars and 10m wind vectors over the North Atlantic for 2024-01-22 showing Storm Isha."},
|
| 37 |
+
3: {"slug": "atmospheric_river_jan2023",
|
| 38 |
+
"task": "Total column water vapour for the US West Coast, Jan 2023, showing the atmospheric river event around Jan 9th."},
|
| 39 |
+
4: {"slug": "sahara_heat_july2024",
|
| 40 |
+
"task": "Daily mean 2m temperature time series over the Sahara for July 2024 vs July 2023 on the same chart."},
|
| 41 |
+
5: {"slug": "great_plains_wind_may2024",
|
| 42 |
+
"task": "Map of mean 10m wind speed over US Great Plains for May 2024, highlighting areas >5 m/s."},
|
| 43 |
+
6: {"slug": "nino34_index",
|
| 44 |
+
"task": "Niño 3.4 index from ERA5 SST for 2015-2024 classifying El Niño / La Niña episodes."},
|
| 45 |
+
7: {"slug": "elnino_vs_lanina_tropical_belt",
|
| 46 |
+
"task": "SST anomaly difference map: Dec 2023 (El Niño) minus Dec 2022 (La Niña) across the tropical belt."},
|
| 47 |
+
8: {"slug": "nao_index",
|
| 48 |
+
"task": "NAO index from MSLP (Azores minus Iceland) for 2000-2024 with 3-month rolling mean."},
|
| 49 |
+
9: {"slug": "australia_enso_rainfall",
|
| 50 |
+
"task": "Two-panel map of annual total precipitation over Eastern Australia for La Niña 2022 vs El Niño 2023, plus difference map."},
|
| 51 |
+
10: {"slug": "med_eof_sst",
|
| 52 |
+
"task": "EOF analysis on Mediterranean SST anomalies for 2019-2024: first 3 modes with variance explained."},
|
| 53 |
+
11: {"slug": "arctic_polar_amplification",
|
| 54 |
+
"task": "January mean 2m temperature maps for the Arctic (>70°N): 2024 vs 2000 side by side, with polar amplification quantification."},
|
| 55 |
+
12: {"slug": "med_marine_heatwave_2023",
|
| 56 |
+
"task": "Summer JJA 2023 SST anomaly map over the Mediterranean vs 2018-2022 mean, highlighting marine heatwave hotspots >+2°C."},
|
| 57 |
+
13: {"slug": "paris_decadal_comparison",
|
| 58 |
+
"task": "Average summer (JJA) temperature difference map for Paris: 2014-2023 vs 2000-2009, plus time series."},
|
| 59 |
+
14: {"slug": "alps_snow_trend",
|
| 60 |
+
"task": "December-February snow depth trend over the Alps for the last 30 years."},
|
| 61 |
+
15: {"slug": "uk_precip_anomaly_winter2024",
|
| 62 |
+
"task": "Total precipitation anomaly map over the British Isles for January 2024 vs 2019-2023 January mean, highlighting >150% normal."},
|
| 63 |
+
16: {"slug": "delhi_heatwave_detection",
|
| 64 |
+
"task": "Heatwave events in Delhi 2010-2024 using 90th percentile threshold with 3-day criterion; frequency change analysis."},
|
| 65 |
+
17: {"slug": "horn_africa_drought",
|
| 66 |
+
"task": "3-month SPI proxy for the Horn of Africa 2020-2024, identifying worst drought periods."},
|
| 67 |
+
18: {"slug": "baghdad_hot_days",
|
| 68 |
+
"task": "Bar chart of days per year >35°C in Baghdad from 1980-2024 with trend line."},
|
| 69 |
+
19: {"slug": "sea_p95_precip",
|
| 70 |
+
"task": "95th percentile daily precipitation map for Southeast Asia 2010-2023."},
|
| 71 |
+
20: {"slug": "scandinavia_blocking_2018",
|
| 72 |
+
"task": "Blocking event over Scandinavia July 2018: MSLP anomalies persisting 5+ days."},
|
| 73 |
+
21: {"slug": "rotterdam_shanghai_route",
|
| 74 |
+
"task": "Maritime route from Rotterdam to Shanghai with wind risk analysis for December."},
|
| 75 |
+
22: {"slug": "indian_ocean_sst_dipole",
|
| 76 |
+
"task": "SST anomaly map across the Indian Ocean for October 2023 relative to 2019-2022 October mean, showing IOD pattern."},
|
| 77 |
+
23: {"slug": "japan_typhoon_season_wind",
|
| 78 |
+
"task": "Mean and maximum 10m wind speed maps around Japan during typhoon season (Aug-Oct) 2023, highlighting areas >8 m/s."},
|
| 79 |
+
24: {"slug": "south_atlantic_sst_gradient",
|
| 80 |
+
"task": "Mean SST field across the South Atlantic for March 2024 with SST isotherms and Brazil-Malvinas confluence zone."},
|
| 81 |
+
25: {"slug": "north_sea_wind_power",
|
| 82 |
+
"task": "Mean 100m wind power density map across the North Sea for 2020-2024 identifying best offshore wind sites."},
|
| 83 |
+
26: {"slug": "german_bight_weibull",
|
| 84 |
+
"task": "Weibull distribution fit to 100m wind speed at German Bight for 2023 with histogram and fit overlay."},
|
| 85 |
+
27: {"slug": "solar_sahara_vs_germany",
|
| 86 |
+
"task": "Monthly mean incoming solar radiation (SSRD) comparison: Sahara vs Northern Germany for 2023."},
|
| 87 |
+
28: {"slug": "persian_gulf_sst_summer",
|
| 88 |
+
"task": "Mean SST map across Persian Gulf and Arabian Sea for August 2023, highlighting areas where SST >32°C."},
|
| 89 |
+
29: {"slug": "sahara_diurnal_t2_blh",
|
| 90 |
+
"task": "Diurnal cycle of 2m temperature and boundary layer height in the Sahara for July 2024, dual-axis plot."},
|
| 91 |
+
30: {"slug": "amazon_convective_peak",
|
| 92 |
+
"task": "Hourly climatology of convective precipitation peak over the Amazon basin during DJF."},
|
| 93 |
+
31: {"slug": "europe_rh_august",
|
| 94 |
+
"task": "Relative humidity map from 2m temperature and dewpoint for central Europe, August 2023."},
|
| 95 |
+
32: {"slug": "hovmoller_equator_skt",
|
| 96 |
+
"task": "Hovmöller diagram of skin temperature along the equator for 2023 to visualize MJO."},
|
| 97 |
+
33: {"slug": "hurricane_otis_dashboard",
|
| 98 |
+
"task": "Summary dashboard for Hurricane Otis (Oct 2023): SST map, wind speed time series, TCWV distribution in one figure."},
|
| 99 |
+
34: {"slug": "california_sst_jan",
|
| 100 |
+
"task": "Average SST off California coast in January 2024 with spatial map of the SST field."},
|
| 101 |
+
35: {"slug": "berlin_monthly_temp",
|
| 102 |
+
"task": "2023 monthly mean temperature for Berlin as a seasonal curve."},
|
| 103 |
+
36: {"slug": "biscay_wind_stats",
|
| 104 |
+
"task": "10m wind speed stats for Bay of Biscay (last 3 years) with histogram or time series plot."},
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
REVIEW_SYSTEM_PROMPT = """\
|
| 109 |
+
You are a senior scientific visualization reviewer for a climate/weather data agent.
|
| 110 |
+
You will receive one or more PNG plots generated by an AI agent and the TASK that the agent was asked to complete.
|
| 111 |
+
|
| 112 |
+
Review each plot against the task and provide a structured assessment:
|
| 113 |
+
|
| 114 |
+
1. **Task Compliance** (1-10): Does the plot address what was asked?
|
| 115 |
+
2. **Scientific Accuracy** (1-10): Are axes labeled, units correct, colorbar present, projections reasonable?
|
| 116 |
+
3. **Visual Quality** (1-10): Is the plot publication-quality? Good resolution, readable labels, professional aesthetics?
|
| 117 |
+
4. **Spatial/Map Quality** (1-10): If it's a map — does it have coastlines, proper projection, geographic labels? If not a map, rate the chart type appropriateness.
|
| 118 |
+
5. **Overall Score** (1-10): Weighted average considering all factors.
|
| 119 |
+
|
| 120 |
+
Also provide:
|
| 121 |
+
- **Summary**: 1-2 sentence summary of what the plot shows.
|
| 122 |
+
- **Strengths**: Key things done well.
|
| 123 |
+
- **Issues**: Any problems, missing elements, or improvements needed.
|
| 124 |
+
|
| 125 |
+
Respond ONLY in valid JSON with this exact structure:
|
| 126 |
+
{
|
| 127 |
+
"task_compliance": <int>,
|
| 128 |
+
"scientific_accuracy": <int>,
|
| 129 |
+
"visual_quality": <int>,
|
| 130 |
+
"spatial_quality": <int>,
|
| 131 |
+
"overall_score": <int>,
|
| 132 |
+
"summary": "<string>",
|
| 133 |
+
"strengths": ["<string>", ...],
|
| 134 |
+
"issues": ["<string>", ...]
|
| 135 |
+
}
|
| 136 |
+
"""
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
def create_client() -> genai.Client:
|
| 140 |
+
"""Create Gemini API client using Vertex AI Express (same pattern as cmip6 project)."""
|
| 141 |
+
api_key = os.environ.get("vertex_api_key")
|
| 142 |
+
if not api_key:
|
| 143 |
+
print("❌ vertex_api_key not found in .env!")
|
| 144 |
+
sys.exit(1)
|
| 145 |
+
print(f" Using Vertex AI Express (API key auth)")
|
| 146 |
+
return genai.Client(vertexai=True, api_key=api_key)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def review_single_question(client: genai.Client, qid: int, task: str,
|
| 150 |
+
image_paths: list[Path], model: str) -> dict:
|
| 151 |
+
"""Send images + task to Gemini and get structured review."""
|
| 152 |
+
|
| 153 |
+
# Build content parts: text prompt + inline images
|
| 154 |
+
prompt_text = (
|
| 155 |
+
f"**TASK (Q{qid:02d}):** {task}\n\n"
|
| 156 |
+
f"Below are {len(image_paths)} plot(s) generated by the agent. "
|
| 157 |
+
f"Review them against the task."
|
| 158 |
+
)
|
| 159 |
+
parts = [types.Part.from_text(text=prompt_text)]
|
| 160 |
+
|
| 161 |
+
for img_path in image_paths:
|
| 162 |
+
with open(img_path, "rb") as f:
|
| 163 |
+
img_bytes = f.read()
|
| 164 |
+
parts.append(types.Part.from_bytes(data=img_bytes, mime_type="image/png"))
|
| 165 |
+
|
| 166 |
+
for attempt in range(4):
|
| 167 |
+
try:
|
| 168 |
+
response = client.models.generate_content(
|
| 169 |
+
model=model,
|
| 170 |
+
contents=parts,
|
| 171 |
+
config=types.GenerateContentConfig(
|
| 172 |
+
system_instruction=REVIEW_SYSTEM_PROMPT,
|
| 173 |
+
temperature=0.2,
|
| 174 |
+
max_output_tokens=1000,
|
| 175 |
+
),
|
| 176 |
+
)
|
| 177 |
+
raw = response.text.strip()
|
| 178 |
+
# Strip markdown code fences if present
|
| 179 |
+
if raw.startswith("```"):
|
| 180 |
+
raw = raw.split("\n", 1)[1] if "\n" in raw else raw[3:]
|
| 181 |
+
if raw.endswith("```"):
|
| 182 |
+
raw = raw[:-3]
|
| 183 |
+
raw = raw.strip()
|
| 184 |
+
return json.loads(raw)
|
| 185 |
+
except json.JSONDecodeError:
|
| 186 |
+
# Try to extract JSON from the response
|
| 187 |
+
import re
|
| 188 |
+
match = re.search(r'\{[^{}]*\}', raw, re.DOTALL)
|
| 189 |
+
if match:
|
| 190 |
+
try:
|
| 191 |
+
return json.loads(match.group())
|
| 192 |
+
except json.JSONDecodeError:
|
| 193 |
+
pass
|
| 194 |
+
return {"error": f"Failed to parse JSON: {raw[:500]}"}
|
| 195 |
+
except Exception as e:
|
| 196 |
+
err_str = str(e)
|
| 197 |
+
if "429" in err_str or "RESOURCE_EXHAUSTED" in err_str:
|
| 198 |
+
wait = min(2 ** attempt * 5, 60)
|
| 199 |
+
print(f"\n Rate limited, waiting {wait}s (attempt {attempt+1}/4)...", end="", flush=True)
|
| 200 |
+
time.sleep(wait)
|
| 201 |
+
else:
|
| 202 |
+
if attempt < 3:
|
| 203 |
+
time.sleep(2)
|
| 204 |
+
continue
|
| 205 |
+
return {"error": str(e)[:300]}
|
| 206 |
+
|
| 207 |
+
return {"error": "Max retries exceeded"}
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
def main():
|
| 211 |
+
parser = argparse.ArgumentParser(description="QA Image Reviewer using Gemini 3 Pro Preview")
|
| 212 |
+
parser.add_argument("--run", type=str, default=None,
|
| 213 |
+
help="Path to QA run directory (default: latest in data/qa_runs/)")
|
| 214 |
+
parser.add_argument("--query", type=int, default=None,
|
| 215 |
+
help="Review only a specific query ID")
|
| 216 |
+
parser.add_argument("--output", type=str, default=None,
|
| 217 |
+
help="Output JSON file (default: <run_dir>/image_review.json)")
|
| 218 |
+
parser.add_argument("--model", type=str, default=PRIMARY_MODEL,
|
| 219 |
+
help=f"Gemini model to use (default: {PRIMARY_MODEL})")
|
| 220 |
+
args = parser.parse_args()
|
| 221 |
+
|
| 222 |
+
# Find run directory
|
| 223 |
+
if args.run:
|
| 224 |
+
run_dir = Path(args.run)
|
| 225 |
+
else:
|
| 226 |
+
qa_runs = PROJECT_ROOT / "data" / "qa_runs"
|
| 227 |
+
runs = sorted(qa_runs.glob("run_*"))
|
| 228 |
+
if not runs:
|
| 229 |
+
print("❌ No QA runs found in data/qa_runs/")
|
| 230 |
+
sys.exit(1)
|
| 231 |
+
run_dir = runs[-1]
|
| 232 |
+
|
| 233 |
+
if not run_dir.exists():
|
| 234 |
+
print(f"❌ Run directory not found: {run_dir}")
|
| 235 |
+
sys.exit(1)
|
| 236 |
+
|
| 237 |
+
# Gemini client (Vertex AI Express)
|
| 238 |
+
client = create_client()
|
| 239 |
+
|
| 240 |
+
print(f"""
|
| 241 |
+
╔══════════════════════════════════════════════════════╗
|
| 242 |
+
║ QA Image Reviewer (Gemini 3 Pro Preview) ║
|
| 243 |
+
║ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ║
|
| 244 |
+
╚══════════════════════════════════════════════════════╝
|
| 245 |
+
Run directory: {run_dir}
|
| 246 |
+
Model: {args.model}
|
| 247 |
+
""")
|
| 248 |
+
|
| 249 |
+
# Collect questions to review
|
| 250 |
+
all_reviews = {}
|
| 251 |
+
question_dirs = sorted(run_dir.glob("q*_*"))
|
| 252 |
+
|
| 253 |
+
for qdir in question_dirs:
|
| 254 |
+
# Extract question ID from folder name (e.g., q01_xxx -> 1)
|
| 255 |
+
try:
|
| 256 |
+
qid = int(qdir.name.split("_")[0][1:])
|
| 257 |
+
except (ValueError, IndexError):
|
| 258 |
+
continue
|
| 259 |
+
|
| 260 |
+
if args.query and qid != args.query:
|
| 261 |
+
continue
|
| 262 |
+
|
| 263 |
+
if qid not in QA_QUERIES:
|
| 264 |
+
print(f"⚠️ Q{qid:02d}: Unknown query ID, skipping")
|
| 265 |
+
continue
|
| 266 |
+
|
| 267 |
+
# Find PNG files
|
| 268 |
+
pngs = sorted(qdir.glob("*.png"))
|
| 269 |
+
if not pngs:
|
| 270 |
+
print(f"⏭️ Q{qid:02d} ({QA_QUERIES[qid]['slug']}): No PNG files, skipping")
|
| 271 |
+
all_reviews[qid] = {"status": "no_images", "slug": QA_QUERIES[qid]["slug"]}
|
| 272 |
+
continue
|
| 273 |
+
|
| 274 |
+
task_desc = QA_QUERIES[qid]["task"]
|
| 275 |
+
png_names = [p.name for p in pngs]
|
| 276 |
+
|
| 277 |
+
print(f"🔍 Q{qid:02d} ({QA_QUERIES[qid]['slug']}): Reviewing {len(pngs)} image(s)...", end=" ", flush=True)
|
| 278 |
+
|
| 279 |
+
try:
|
| 280 |
+
start = time.time()
|
| 281 |
+
review = review_single_question(client, qid, task_desc, pngs, args.model)
|
| 282 |
+
elapsed = time.time() - start
|
| 283 |
+
|
| 284 |
+
review["slug"] = QA_QUERIES[qid]["slug"]
|
| 285 |
+
review["task"] = task_desc
|
| 286 |
+
review["images"] = png_names
|
| 287 |
+
review["status"] = "reviewed"
|
| 288 |
+
review["review_time_s"] = round(elapsed, 1)
|
| 289 |
+
|
| 290 |
+
score = review.get("overall_score", "?")
|
| 291 |
+
if isinstance(score, int):
|
| 292 |
+
icon = "✅" if score >= 7 else "⚠️" if score >= 5 else "❌"
|
| 293 |
+
else:
|
| 294 |
+
icon = "❓"
|
| 295 |
+
print(f"{icon} Score: {score}/10 ({elapsed:.1f}s)")
|
| 296 |
+
|
| 297 |
+
all_reviews[qid] = review
|
| 298 |
+
|
| 299 |
+
except Exception as e:
|
| 300 |
+
print(f"❌ Error: {e}")
|
| 301 |
+
all_reviews[qid] = {
|
| 302 |
+
"status": "error",
|
| 303 |
+
"slug": QA_QUERIES[qid]["slug"],
|
| 304 |
+
"error": str(e),
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
# Rate limit: pause between calls
|
| 308 |
+
time.sleep(1)
|
| 309 |
+
|
| 310 |
+
# ── Summary ──────────────────────────────────────────────────────
|
| 311 |
+
reviewed = [v for v in all_reviews.values() if v.get("status") == "reviewed"]
|
| 312 |
+
scores = [v["overall_score"] for v in reviewed if isinstance(v.get("overall_score"), int)]
|
| 313 |
+
|
| 314 |
+
print(f"\n{'='*70}")
|
| 315 |
+
print("REVIEW SUMMARY")
|
| 316 |
+
print(f"{'='*70}")
|
| 317 |
+
|
| 318 |
+
# Score table
|
| 319 |
+
for qid in sorted(all_reviews.keys()):
|
| 320 |
+
r = all_reviews[qid]
|
| 321 |
+
if r.get("status") == "reviewed":
|
| 322 |
+
s = r.get("overall_score", 0)
|
| 323 |
+
if isinstance(s, int):
|
| 324 |
+
icon = "✅" if s >= 7 else "⚠️" if s >= 5 else "❌"
|
| 325 |
+
else:
|
| 326 |
+
icon = "❓"
|
| 327 |
+
tc = r.get("task_compliance", "?")
|
| 328 |
+
sa = r.get("scientific_accuracy", "?")
|
| 329 |
+
vq = r.get("visual_quality", "?")
|
| 330 |
+
sq = r.get("spatial_quality", "?")
|
| 331 |
+
print(f" {icon} Q{qid:02d} {r['slug']:35s} | Overall: {s:>2}/10 | "
|
| 332 |
+
f"Task:{tc} Sci:{sa} Vis:{vq} Spa:{sq}")
|
| 333 |
+
elif r.get("status") == "no_images":
|
| 334 |
+
print(f" ⏭️ Q{qid:02d} {r['slug']:35s} | No images")
|
| 335 |
+
else:
|
| 336 |
+
print(f" ❌ Q{qid:02d} {r['slug']:35s} | Error: {r.get('error', 'unknown')[:50]}")
|
| 337 |
+
|
| 338 |
+
if scores:
|
| 339 |
+
avg = sum(scores) / len(scores)
|
| 340 |
+
excellent = sum(1 for s in scores if s >= 8)
|
| 341 |
+
good = sum(1 for s in scores if 6 <= s < 8)
|
| 342 |
+
needs_work = sum(1 for s in scores if s < 6)
|
| 343 |
+
|
| 344 |
+
print(f"\n📊 Average score: {avg:.1f}/10 across {len(scores)} reviewed plots")
|
| 345 |
+
print(f" 🟢 Excellent (8-10): {excellent}")
|
| 346 |
+
print(f" 🟡 Good (6-7): {good}")
|
| 347 |
+
print(f" 🔴 Needs work (<6): {needs_work}")
|
| 348 |
+
|
| 349 |
+
# ── Save results ─────────────────────────────────────────────────
|
| 350 |
+
output_path = Path(args.output) if args.output else run_dir / "image_review.json"
|
| 351 |
+
|
| 352 |
+
# Convert int keys to strings for JSON
|
| 353 |
+
output_data = {
|
| 354 |
+
"timestamp": datetime.now().isoformat(),
|
| 355 |
+
"run_directory": str(run_dir),
|
| 356 |
+
"model": args.model,
|
| 357 |
+
"total_reviewed": len(reviewed),
|
| 358 |
+
"average_score": round(avg, 2) if scores else None,
|
| 359 |
+
"reviews": {f"q{k:02d}": v for k, v in sorted(all_reviews.items())},
|
| 360 |
+
}
|
| 361 |
+
|
| 362 |
+
with open(output_path, "w") as f:
|
| 363 |
+
json.dump(output_data, f, indent=2, ensure_ascii=False)
|
| 364 |
+
|
| 365 |
+
print(f"\n💾 Full review saved to: {output_path}")
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
if __name__ == "__main__":
|
| 369 |
+
main()
|
scripts/qa_runner.py
ADDED
|
@@ -0,0 +1,738 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
QA Runner — Automated End-to-End Agent Testing
|
| 4 |
+
===============================================
|
| 5 |
+
Runs test queries through the Eurus agent, captures ALL intermediate steps
|
| 6 |
+
(tool calls, tool outputs, reasoning, plots) and saves structured results
|
| 7 |
+
to data/qa_results/q{NN}_{slug}/.
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
PYTHONPATH=src OPENAI_API_KEY=... python3 scripts/qa_runner.py
|
| 11 |
+
|
| 12 |
+
Or run a single query:
|
| 13 |
+
PYTHONPATH=src OPENAI_API_KEY=... python3 scripts/qa_runner.py --query 2
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
import sys
|
| 18 |
+
import json
|
| 19 |
+
import shutil
|
| 20 |
+
import base64
|
| 21 |
+
import time
|
| 22 |
+
import argparse
|
| 23 |
+
from pathlib import Path
|
| 24 |
+
from datetime import datetime
|
| 25 |
+
from typing import Optional
|
| 26 |
+
|
| 27 |
+
# Ensure eurus package is importable
|
| 28 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 29 |
+
sys.path.insert(0, str(PROJECT_ROOT / "src"))
|
| 30 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 31 |
+
|
| 32 |
+
# Load .env (API keys)
|
| 33 |
+
from dotenv import load_dotenv
|
| 34 |
+
load_dotenv(PROJECT_ROOT / ".env")
|
| 35 |
+
|
| 36 |
+
from langchain_openai import ChatOpenAI
|
| 37 |
+
from langchain.agents import create_agent
|
| 38 |
+
from langchain_core.messages import HumanMessage, AIMessage, ToolMessage
|
| 39 |
+
|
| 40 |
+
from eurus.config import AGENT_SYSTEM_PROMPT, CONFIG, get_plots_dir
|
| 41 |
+
from eurus.tools import get_all_tools
|
| 42 |
+
|
| 43 |
+
# ============================================================================
|
| 44 |
+
# QA TEST QUERIES — 36 research-grade demo queries
|
| 45 |
+
#
|
| 46 |
+
# §1 Synoptic Meteorology & Case Studies (Q01–Q05)
|
| 47 |
+
# §2 Climate Variability & Teleconnections (Q06–Q10)
|
| 48 |
+
# §3 Trends & Climate Change Signals (Q11–Q15)
|
| 49 |
+
# §4 Extreme Events & Risk (Q16–Q20)
|
| 50 |
+
# §5 Maritime & Shipping (Q21–Q24)
|
| 51 |
+
# §6 Energy Assessment (Q25–Q28)
|
| 52 |
+
# §7 Diurnal & Sub-Daily Processes (Q29–Q30)
|
| 53 |
+
# §8 Multi-Variable & Diagnostics (Q31–Q33)
|
| 54 |
+
# §9 Quick Lookups (Q34–Q36)
|
| 55 |
+
# ============================================================================
|
| 56 |
+
|
| 57 |
+
QA_QUERIES = [
|
| 58 |
+
# ═══════════════════════════════════════════════════════════════
|
| 59 |
+
# §1 — Synoptic Meteorology & Case Studies
|
| 60 |
+
# ═══════════════════════════════════════════════════════════════
|
| 61 |
+
{
|
| 62 |
+
"id": 1,
|
| 63 |
+
"slug": "europe_heatwave_anomaly",
|
| 64 |
+
"query": "Show me a spatial map of 2m temperature anomalies across Europe "
|
| 65 |
+
"during the June 2023 heatwave compared to June 2022.",
|
| 66 |
+
"type": "anomaly_map",
|
| 67 |
+
"variables": ["t2"],
|
| 68 |
+
"region": "Europe",
|
| 69 |
+
},
|
| 70 |
+
{
|
| 71 |
+
"id": 2,
|
| 72 |
+
"slug": "storm_isha_mslp_wind",
|
| 73 |
+
"query": "Plot MSLP isobars and 10m wind vectors over the North Atlantic "
|
| 74 |
+
"for 2024-01-22 — I want to see Storm Isha's structure.",
|
| 75 |
+
"type": "contour_quiver",
|
| 76 |
+
"variables": ["mslp", "u10", "v10"],
|
| 77 |
+
"region": "North Atlantic",
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"id": 3,
|
| 81 |
+
"slug": "atmospheric_river_jan2023",
|
| 82 |
+
"query": "Download total column water vapour for the US West Coast, Jan 2023, "
|
| 83 |
+
"and show the atmospheric river event around Jan 9th.",
|
| 84 |
+
"type": "ar_detection",
|
| 85 |
+
"variables": ["tcwv"],
|
| 86 |
+
"region": "US West Coast",
|
| 87 |
+
},
|
| 88 |
+
{
|
| 89 |
+
"id": 4,
|
| 90 |
+
"slug": "sahara_heat_july2024",
|
| 91 |
+
"query": "Plot the daily mean 2m temperature time series averaged over "
|
| 92 |
+
"the Sahara (20-30°N, 0 to 15°E) for July 2024 and compare "
|
| 93 |
+
"it to July 2023 on the same chart.",
|
| 94 |
+
"type": "time_series",
|
| 95 |
+
"variables": ["t2"],
|
| 96 |
+
"region": "Sahara",
|
| 97 |
+
},
|
| 98 |
+
{
|
| 99 |
+
"id": 5,
|
| 100 |
+
"slug": "great_plains_wind_may2024",
|
| 101 |
+
"query": "Plot a map of mean 10m wind speed over the US Great Plains "
|
| 102 |
+
"(30-45°N, -105 to -90°W) for May 2024 and highlight areas exceeding 5 m/s.",
|
| 103 |
+
"type": "threshold_map",
|
| 104 |
+
"variables": ["u10", "v10"],
|
| 105 |
+
"region": "US Great Plains",
|
| 106 |
+
},
|
| 107 |
+
|
| 108 |
+
# ═══════════════════════════════════════════════════════════════
|
| 109 |
+
# §2 — Climate Variability & Teleconnections
|
| 110 |
+
# ═══════════════════════════════════════════════════════════════
|
| 111 |
+
{
|
| 112 |
+
"id": 6,
|
| 113 |
+
"slug": "nino34_index",
|
| 114 |
+
"query": "Calculate the Niño 3.4 index from ERA5 SST for 2015-2024 and "
|
| 115 |
+
"classify El Niño / La Niña episodes.",
|
| 116 |
+
"type": "climate_index",
|
| 117 |
+
"variables": ["sst"],
|
| 118 |
+
"region": "Tropical Pacific",
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
"id": 7,
|
| 122 |
+
"slug": "elnino_vs_lanina_tropical_belt",
|
| 123 |
+
"query": "Compare SST anomalies across the entire tropical belt "
|
| 124 |
+
"(30°S-30°N, global) for December 2023 (peak El Niño) vs December 2022 "
|
| 125 |
+
"(La Niña). Show the full basin-wide pattern across the Pacific, "
|
| 126 |
+
"Atlantic, and Indian oceans in a single anomaly difference map.",
|
| 127 |
+
"type": "anomaly_comparison",
|
| 128 |
+
"variables": ["sst"],
|
| 129 |
+
"region": "Tropical Belt (global)",
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"id": 8,
|
| 133 |
+
"slug": "nao_index",
|
| 134 |
+
"query": "Compute the NAO index from MSLP (Azores minus Iceland) for 2000-2024 "
|
| 135 |
+
"and plot it with a 3-month rolling mean.",
|
| 136 |
+
"type": "climate_index",
|
| 137 |
+
"variables": ["mslp"],
|
| 138 |
+
"region": "North Atlantic",
|
| 139 |
+
},
|
| 140 |
+
{
|
| 141 |
+
"id": 9,
|
| 142 |
+
"slug": "australia_enso_rainfall",
|
| 143 |
+
"query": "Compare precipitation over Eastern Australia (25-45°S, 145-155°E) "
|
| 144 |
+
"between the La Niña year 2022 and El Niño year 2023. "
|
| 145 |
+
"Show a two-panel map of annual total precipitation for each year "
|
| 146 |
+
"and a difference map (2023 minus 2022).",
|
| 147 |
+
"type": "multi_year_anomaly",
|
| 148 |
+
"variables": ["tp"],
|
| 149 |
+
"region": "Australia",
|
| 150 |
+
},
|
| 151 |
+
{
|
| 152 |
+
"id": 10,
|
| 153 |
+
"slug": "med_eof_sst",
|
| 154 |
+
"query": "Perform an EOF analysis on Mediterranean SST anomalies "
|
| 155 |
+
"(30-46°N, -6 to 36°E) for 2019-2024 and show the first 3 modes "
|
| 156 |
+
"with variance explained. Interpret the dominant patterns.",
|
| 157 |
+
"type": "eof_analysis",
|
| 158 |
+
"variables": ["sst"],
|
| 159 |
+
"region": "Mediterranean",
|
| 160 |
+
},
|
| 161 |
+
|
| 162 |
+
# ═══════════════════════════════════════════════════════════════
|
| 163 |
+
# §3 — Trends & Climate Change Signals
|
| 164 |
+
# ═══════════════════════════════════════════════════════════════
|
| 165 |
+
{
|
| 166 |
+
"id": 11,
|
| 167 |
+
"slug": "arctic_polar_amplification",
|
| 168 |
+
"query": "Compare January mean 2m temperature across the entire Arctic "
|
| 169 |
+
"(north of 70°N) for 2024 vs 2000. Show both maps side by side, "
|
| 170 |
+
"compute the area-weighted temperature difference, and quantify "
|
| 171 |
+
"polar amplification.",
|
| 172 |
+
"type": "decadal_comparison",
|
| 173 |
+
"variables": ["t2"],
|
| 174 |
+
"region": "Arctic (>70°N)",
|
| 175 |
+
},
|
| 176 |
+
{
|
| 177 |
+
"id": 12,
|
| 178 |
+
"slug": "med_marine_heatwave_2023",
|
| 179 |
+
"query": "Map the summer (JJA) 2023 mean SST anomaly across the entire "
|
| 180 |
+
"Mediterranean basin (30-46°N, -6 to 36°E) compared to the 2018-2022 "
|
| 181 |
+
"summer mean. Identify marine heatwave hotspots where SST exceeded "
|
| 182 |
+
"+2°C above normal.",
|
| 183 |
+
"type": "marine_heatwave",
|
| 184 |
+
"variables": ["sst"],
|
| 185 |
+
"region": "Mediterranean",
|
| 186 |
+
},
|
| 187 |
+
{
|
| 188 |
+
"id": 13,
|
| 189 |
+
"slug": "paris_decadal_comparison",
|
| 190 |
+
"query": "Compare the average summer (JJA) temperature in Paris between the "
|
| 191 |
+
"decades 2000-2009 and 2014-2023 — show a difference map and time series.",
|
| 192 |
+
"type": "multi_panel_comparison",
|
| 193 |
+
"variables": ["t2"],
|
| 194 |
+
"region": "Paris",
|
| 195 |
+
},
|
| 196 |
+
{
|
| 197 |
+
"id": 14,
|
| 198 |
+
"slug": "alps_snow_trend",
|
| 199 |
+
"query": "Has the snow depth over the Alps decreased over the last 30 years? "
|
| 200 |
+
"Show me the December-February trend.",
|
| 201 |
+
"type": "trend_analysis",
|
| 202 |
+
"variables": ["sd"],
|
| 203 |
+
"region": "Alps",
|
| 204 |
+
},
|
| 205 |
+
{
|
| 206 |
+
"id": 15,
|
| 207 |
+
"slug": "uk_precip_anomaly_winter2024",
|
| 208 |
+
"query": "Map the total precipitation anomaly over the British Isles "
|
| 209 |
+
"(49-60°N, 11°W-2°E) for January 2024 compared to the 2019-2023 "
|
| 210 |
+
"January mean. Highlight regions receiving more than 150% of normal "
|
| 211 |
+
"rainfall. Save the map as a PNG file.",
|
| 212 |
+
"type": "anomaly_map",
|
| 213 |
+
"variables": ["tp"],
|
| 214 |
+
"region": "British Isles",
|
| 215 |
+
},
|
| 216 |
+
|
| 217 |
+
# ═══════════════════════════════════════════════════════════════
|
| 218 |
+
# §4 — Extreme Events & Risk
|
| 219 |
+
# ═══════════════════════════════════════════════════════════════
|
| 220 |
+
{
|
| 221 |
+
"id": 16,
|
| 222 |
+
"slug": "delhi_heatwave_detection",
|
| 223 |
+
"query": "Detect heatwave events in Delhi from 2010-2024 using the 90th "
|
| 224 |
+
"percentile threshold with a 3-day duration criterion — how has the "
|
| 225 |
+
"frequency changed?",
|
| 226 |
+
"type": "heatwave_detection",
|
| 227 |
+
"variables": ["t2"],
|
| 228 |
+
"region": "Delhi",
|
| 229 |
+
},
|
| 230 |
+
{
|
| 231 |
+
"id": 17,
|
| 232 |
+
"slug": "horn_africa_drought",
|
| 233 |
+
"query": "Calculate a 3-month SPI proxy for the Horn of Africa "
|
| 234 |
+
"(Ethiopia/Somalia) for 2020-2024 — when were the worst drought periods?",
|
| 235 |
+
"type": "drought_analysis",
|
| 236 |
+
"variables": ["tp"],
|
| 237 |
+
"region": "Horn of Africa",
|
| 238 |
+
},
|
| 239 |
+
{
|
| 240 |
+
"id": 18,
|
| 241 |
+
"slug": "baghdad_hot_days",
|
| 242 |
+
"query": "How many days per year exceeded 35°C in Baghdad from 1980 to 2024? "
|
| 243 |
+
"Plot as a bar chart with a trend line.",
|
| 244 |
+
"type": "exceedance_frequency",
|
| 245 |
+
"variables": ["t2"],
|
| 246 |
+
"region": "Baghdad",
|
| 247 |
+
},
|
| 248 |
+
{
|
| 249 |
+
"id": 19,
|
| 250 |
+
"slug": "sea_p95_precip",
|
| 251 |
+
"query": "Show me the 95th percentile daily precipitation map for Southeast Asia "
|
| 252 |
+
"for 2010-2023.",
|
| 253 |
+
"type": "extreme_percentile",
|
| 254 |
+
"variables": ["tp"],
|
| 255 |
+
"region": "Southeast Asia",
|
| 256 |
+
},
|
| 257 |
+
{
|
| 258 |
+
"id": 20,
|
| 259 |
+
"slug": "scandinavia_blocking_2018",
|
| 260 |
+
"query": "Analyse the blocking event over Scandinavia in July 2018 — show MSLP "
|
| 261 |
+
"anomalies persisting for 5+ days.",
|
| 262 |
+
"type": "blocking_detection",
|
| 263 |
+
"variables": ["mslp"],
|
| 264 |
+
"region": "Scandinavia",
|
| 265 |
+
},
|
| 266 |
+
|
| 267 |
+
# ═══════════════════════════════════════════════════════════════
|
| 268 |
+
# §5 — Maritime & Shipping
|
| 269 |
+
# ═══════════════════════════════════════════════════════════════
|
| 270 |
+
{
|
| 271 |
+
"id": 21,
|
| 272 |
+
"slug": "rotterdam_shanghai_route",
|
| 273 |
+
"query": "Calculate the maritime route from Rotterdam to Shanghai and analyse "
|
| 274 |
+
"wind risk along the route for December.",
|
| 275 |
+
"type": "maritime_route_risk",
|
| 276 |
+
"variables": ["u10", "v10"],
|
| 277 |
+
"region": "Europe-Asia",
|
| 278 |
+
},
|
| 279 |
+
{
|
| 280 |
+
"id": 22,
|
| 281 |
+
"slug": "indian_ocean_sst_dipole",
|
| 282 |
+
"query": "Map the SST anomaly across the Indian Ocean (30°S-25°N, 30-120°E) "
|
| 283 |
+
"for October 2023 relative to the 2019-2022 October mean. "
|
| 284 |
+
"Show the Indian Ocean Dipole pattern. Save the map as PNG.",
|
| 285 |
+
"type": "anomaly_map",
|
| 286 |
+
"variables": ["sst"],
|
| 287 |
+
"region": "Indian Ocean",
|
| 288 |
+
},
|
| 289 |
+
{
|
| 290 |
+
"id": 23,
|
| 291 |
+
"slug": "japan_typhoon_season_wind",
|
| 292 |
+
"query": "Map the mean and maximum 10m wind speed over the seas around Japan "
|
| 293 |
+
"(20-45°N, 120-150°E) during typhoon season (August-October) 2023. "
|
| 294 |
+
"Show two-panel spatial maps highlighting areas where mean wind "
|
| 295 |
+
"exceeded 8 m/s. Save as PNG.",
|
| 296 |
+
"type": "multi_panel_map",
|
| 297 |
+
"variables": ["u10", "v10"],
|
| 298 |
+
"region": "Japan",
|
| 299 |
+
},
|
| 300 |
+
{
|
| 301 |
+
"id": 24,
|
| 302 |
+
"slug": "south_atlantic_sst_gradient",
|
| 303 |
+
"query": "Map the mean SST field across the South Atlantic (40°S-5°N, 50°W-15°E) "
|
| 304 |
+
"for March 2024. Overlay SST isotherms and highlight the "
|
| 305 |
+
"Brazil-Malvinas confluence zone. Save as PNG.",
|
| 306 |
+
"type": "sst_map",
|
| 307 |
+
"variables": ["sst"],
|
| 308 |
+
"region": "South Atlantic",
|
| 309 |
+
},
|
| 310 |
+
|
| 311 |
+
# ═══════════════════════════════════════════════════════════════
|
| 312 |
+
# §6 — Energy Assessment
|
| 313 |
+
# ═══════════════════════════════════════════════════════════════
|
| 314 |
+
{
|
| 315 |
+
"id": 25,
|
| 316 |
+
"slug": "north_sea_wind_power",
|
| 317 |
+
"query": "Map the mean 100m wind power density across the North Sea for "
|
| 318 |
+
"2020-2024 — where are the best offshore wind sites?",
|
| 319 |
+
"type": "wind_energy",
|
| 320 |
+
"variables": ["u100", "v100"],
|
| 321 |
+
"region": "North Sea",
|
| 322 |
+
},
|
| 323 |
+
{
|
| 324 |
+
"id": 26,
|
| 325 |
+
"slug": "german_bight_weibull",
|
| 326 |
+
"query": "Fit a Weibull distribution to 100m wind speed at 54°N, 7°E "
|
| 327 |
+
"(German Bight) for 2023 and estimate the capacity factor for a "
|
| 328 |
+
"3-25 m/s turbine range. Plot the histogram with Weibull fit overlay "
|
| 329 |
+
"and save as PNG.",
|
| 330 |
+
"type": "weibull_analysis",
|
| 331 |
+
"variables": ["u100", "v100"],
|
| 332 |
+
"region": "German Bight",
|
| 333 |
+
},
|
| 334 |
+
{
|
| 335 |
+
"id": 27,
|
| 336 |
+
"slug": "solar_sahara_vs_germany",
|
| 337 |
+
"query": "Compare incoming solar radiation (SSRD) between the Sahara and "
|
| 338 |
+
"northern Germany across 2023 — show monthly means.",
|
| 339 |
+
"type": "comparison_timeseries",
|
| 340 |
+
"variables": ["ssrd"],
|
| 341 |
+
"region": "Sahara / Germany",
|
| 342 |
+
},
|
| 343 |
+
{
|
| 344 |
+
"id": 28,
|
| 345 |
+
"slug": "persian_gulf_sst_summer",
|
| 346 |
+
"query": "Map the mean SST across the Persian Gulf and Arabian Sea "
|
| 347 |
+
"(12-32°N, 44-70°E) for August 2023. Highlight areas where SST "
|
| 348 |
+
"exceeded 32°C in a spatial map. Save as PNG.",
|
| 349 |
+
"type": "threshold_map",
|
| 350 |
+
"variables": ["sst"],
|
| 351 |
+
"region": "Persian Gulf",
|
| 352 |
+
},
|
| 353 |
+
|
| 354 |
+
# ═══════════════════════════════════════════════════════════════
|
| 355 |
+
# §7 — Diurnal & Sub-Daily Processes
|
| 356 |
+
# ═══════════════════════════════════════════════════════════════
|
| 357 |
+
{
|
| 358 |
+
"id": 29,
|
| 359 |
+
"slug": "sahara_diurnal_t2_blh",
|
| 360 |
+
"query": "Show the diurnal cycle of 2m temperature and boundary layer height "
|
| 361 |
+
"in the Sahara for July 2024 — dual-axis plot.",
|
| 362 |
+
"type": "diurnal_cycle",
|
| 363 |
+
"variables": ["t2", "blh"],
|
| 364 |
+
"region": "Sahara",
|
| 365 |
+
},
|
| 366 |
+
{
|
| 367 |
+
"id": 30,
|
| 368 |
+
"slug": "amazon_convective_peak",
|
| 369 |
+
"query": "When does convective precipitation peak over the Amazon basin during "
|
| 370 |
+
"DJF? Hourly climatology please.",
|
| 371 |
+
"type": "diurnal_cycle",
|
| 372 |
+
"variables": ["cp"],
|
| 373 |
+
"region": "Amazon",
|
| 374 |
+
},
|
| 375 |
+
|
| 376 |
+
# ═══════════════════════════════════════════════════════════════
|
| 377 |
+
# §8 — Multi-Variable & Diagnostics
|
| 378 |
+
# ═══════════════════════════════════════════════════════════════
|
| 379 |
+
{
|
| 380 |
+
"id": 31,
|
| 381 |
+
"slug": "europe_rh_august",
|
| 382 |
+
"query": "Compute relative humidity from 2m temperature and dewpoint for "
|
| 383 |
+
"central Europe, August 2023, and map the spatial mean.",
|
| 384 |
+
"type": "derived_variable",
|
| 385 |
+
"variables": ["t2", "d2"],
|
| 386 |
+
"region": "Central Europe",
|
| 387 |
+
},
|
| 388 |
+
{
|
| 389 |
+
"id": 32,
|
| 390 |
+
"slug": "hovmoller_equator_skt",
|
| 391 |
+
"query": "Create a Hovmöller diagram of 850 hPa equivalent — use skin "
|
| 392 |
+
"temperature as proxy — along the equator for 2023 to visualise the MJO.",
|
| 393 |
+
"type": "hovmoller",
|
| 394 |
+
"variables": ["skt"],
|
| 395 |
+
"region": "Equatorial",
|
| 396 |
+
},
|
| 397 |
+
{
|
| 398 |
+
"id": 33,
|
| 399 |
+
"slug": "hurricane_otis_dashboard",
|
| 400 |
+
"query": "Plot a summary dashboard for Hurricane Otis (Oct 2023, Acapulco): "
|
| 401 |
+
"SST map, wind speed time series, and TCWV distribution in one figure.",
|
| 402 |
+
"type": "dashboard",
|
| 403 |
+
"variables": ["sst", "u10", "v10", "tcwv"],
|
| 404 |
+
"region": "East Pacific / Mexico",
|
| 405 |
+
},
|
| 406 |
+
|
| 407 |
+
# ═══════════════════════════════════════════════════════════════
|
| 408 |
+
# §9 — Quick Lookups
|
| 409 |
+
# ═══════════════════════════════════════════════════════════════
|
| 410 |
+
{
|
| 411 |
+
"id": 34,
|
| 412 |
+
"slug": "california_sst_jan",
|
| 413 |
+
"query": "What was the average SST off the coast of California in January 2024? "
|
| 414 |
+
"Also plot a spatial map of the SST field for that month and save as PNG.",
|
| 415 |
+
"type": "point_retrieval",
|
| 416 |
+
"variables": ["sst"],
|
| 417 |
+
"region": "California",
|
| 418 |
+
},
|
| 419 |
+
{
|
| 420 |
+
"id": 35,
|
| 421 |
+
"slug": "berlin_monthly_temp",
|
| 422 |
+
"query": "Plot the 2023 monthly mean temperature for Berlin as a seasonal curve.",
|
| 423 |
+
"type": "time_series",
|
| 424 |
+
"variables": ["t2"],
|
| 425 |
+
"region": "Berlin",
|
| 426 |
+
},
|
| 427 |
+
{
|
| 428 |
+
"id": 36,
|
| 429 |
+
"slug": "biscay_wind_stats",
|
| 430 |
+
"query": "Download 10m wind speed for the Bay of Biscay, last 3 years, and "
|
| 431 |
+
"give me basic statistics. Also plot a wind speed histogram or time "
|
| 432 |
+
"series and save as PNG.",
|
| 433 |
+
"type": "stats_retrieval",
|
| 434 |
+
"variables": ["u10", "v10"],
|
| 435 |
+
"region": "Bay of Biscay",
|
| 436 |
+
},
|
| 437 |
+
]
|
| 438 |
+
|
| 439 |
+
|
| 440 |
+
# ============================================================================
|
| 441 |
+
# AGENT SETUP (mirrors main.py exactly)
|
| 442 |
+
# ============================================================================
|
| 443 |
+
|
| 444 |
+
def build_agent():
|
| 445 |
+
"""Build a LangChain agent with full tool suite."""
|
| 446 |
+
llm = ChatOpenAI(
|
| 447 |
+
model=CONFIG.model_name,
|
| 448 |
+
temperature=CONFIG.temperature,
|
| 449 |
+
)
|
| 450 |
+
|
| 451 |
+
tools = get_all_tools(enable_routing=False, enable_guide=True)
|
| 452 |
+
|
| 453 |
+
agent = create_agent(
|
| 454 |
+
model=llm,
|
| 455 |
+
tools=tools,
|
| 456 |
+
system_prompt=AGENT_SYSTEM_PROMPT,
|
| 457 |
+
debug=False,
|
| 458 |
+
)
|
| 459 |
+
|
| 460 |
+
return agent
|
| 461 |
+
|
| 462 |
+
|
| 463 |
+
# ============================================================================
|
| 464 |
+
# STEP CAPTURE
|
| 465 |
+
# ============================================================================
|
| 466 |
+
|
| 467 |
+
def extract_steps(messages) -> list:
|
| 468 |
+
"""
|
| 469 |
+
Extract ALL intermediate steps from agent message history.
|
| 470 |
+
Returns list of step dicts with type, content, tool_name, etc.
|
| 471 |
+
"""
|
| 472 |
+
steps = []
|
| 473 |
+
|
| 474 |
+
for msg in messages:
|
| 475 |
+
if isinstance(msg, HumanMessage):
|
| 476 |
+
steps.append({
|
| 477 |
+
"step": len(steps) + 1,
|
| 478 |
+
"type": "user_query",
|
| 479 |
+
"content": msg.content[:2000],
|
| 480 |
+
})
|
| 481 |
+
elif isinstance(msg, AIMessage):
|
| 482 |
+
# AI thinking / tool calls
|
| 483 |
+
if msg.tool_calls:
|
| 484 |
+
for tc in msg.tool_calls:
|
| 485 |
+
# Capture tool call request
|
| 486 |
+
args = tc.get("args", {})
|
| 487 |
+
# Truncate large args
|
| 488 |
+
args_str = json.dumps(args, indent=2, default=str)
|
| 489 |
+
if len(args_str) > 5000:
|
| 490 |
+
args_str = args_str[:5000] + "\n... [TRUNCATED]"
|
| 491 |
+
|
| 492 |
+
steps.append({
|
| 493 |
+
"step": len(steps) + 1,
|
| 494 |
+
"type": "tool_call",
|
| 495 |
+
"tool_name": tc.get("name", "unknown"),
|
| 496 |
+
"tool_id": tc.get("id", ""),
|
| 497 |
+
"arguments": json.loads(args_str) if len(args_str) <= 5000 else args_str,
|
| 498 |
+
"reasoning": msg.content[:1000] if msg.content else "",
|
| 499 |
+
})
|
| 500 |
+
elif msg.content:
|
| 501 |
+
# Final response or intermediate reasoning
|
| 502 |
+
steps.append({
|
| 503 |
+
"step": len(steps) + 1,
|
| 504 |
+
"type": "ai_response",
|
| 505 |
+
"content": msg.content[:5000],
|
| 506 |
+
})
|
| 507 |
+
elif isinstance(msg, ToolMessage):
|
| 508 |
+
# Tool output
|
| 509 |
+
content = msg.content if isinstance(msg.content, str) else str(msg.content)
|
| 510 |
+
if len(content) > 3000:
|
| 511 |
+
content = content[:3000] + "\n... [TRUNCATED]"
|
| 512 |
+
|
| 513 |
+
steps.append({
|
| 514 |
+
"step": len(steps) + 1,
|
| 515 |
+
"type": "tool_output",
|
| 516 |
+
"tool_name": msg.name if hasattr(msg, 'name') else "unknown",
|
| 517 |
+
"tool_call_id": msg.tool_call_id if hasattr(msg, 'tool_call_id') else "",
|
| 518 |
+
"content": content,
|
| 519 |
+
})
|
| 520 |
+
|
| 521 |
+
return steps
|
| 522 |
+
|
| 523 |
+
|
| 524 |
+
# ============================================================================
|
| 525 |
+
# QA RUNNER
|
| 526 |
+
# ============================================================================
|
| 527 |
+
|
| 528 |
+
def run_single_query(agent, query_def: dict, output_dir: Path) -> dict:
|
| 529 |
+
"""
|
| 530 |
+
Run a single QA query and capture everything.
|
| 531 |
+
|
| 532 |
+
Returns: metadata dict
|
| 533 |
+
"""
|
| 534 |
+
qid = query_def["id"]
|
| 535 |
+
slug = query_def["slug"]
|
| 536 |
+
query = query_def["query"]
|
| 537 |
+
|
| 538 |
+
folder = output_dir / f"q{qid:02d}_{slug}"
|
| 539 |
+
folder.mkdir(parents=True, exist_ok=True)
|
| 540 |
+
|
| 541 |
+
print(f"\n{'='*70}")
|
| 542 |
+
print(f" Q{qid:02d}: {query[:70]}...")
|
| 543 |
+
print(f"{'='*70}")
|
| 544 |
+
|
| 545 |
+
start_time = time.time()
|
| 546 |
+
|
| 547 |
+
try:
|
| 548 |
+
# Snapshot existing plots BEFORE running so we only copy NEW ones
|
| 549 |
+
plots_dir = get_plots_dir()
|
| 550 |
+
existing_plots = set()
|
| 551 |
+
if plots_dir.exists():
|
| 552 |
+
existing_plots = {f.name for f in plots_dir.glob("*.png")}
|
| 553 |
+
|
| 554 |
+
# Invoke agent
|
| 555 |
+
config = {"recursion_limit": 35}
|
| 556 |
+
messages = [HumanMessage(content=query)]
|
| 557 |
+
|
| 558 |
+
result = agent.invoke({"messages": messages}, config=config)
|
| 559 |
+
|
| 560 |
+
elapsed = time.time() - start_time
|
| 561 |
+
result_messages = result["messages"]
|
| 562 |
+
|
| 563 |
+
# Extract intermediate steps
|
| 564 |
+
steps = extract_steps(result_messages)
|
| 565 |
+
|
| 566 |
+
# Get final response
|
| 567 |
+
final_response = ""
|
| 568 |
+
for msg in reversed(result_messages):
|
| 569 |
+
if isinstance(msg, AIMessage) and msg.content and not msg.tool_calls:
|
| 570 |
+
final_response = msg.content
|
| 571 |
+
break
|
| 572 |
+
|
| 573 |
+
# Save steps.json
|
| 574 |
+
steps_path = folder / "steps.json"
|
| 575 |
+
with open(steps_path, "w") as f:
|
| 576 |
+
json.dump(steps, f, indent=2, default=str, ensure_ascii=False)
|
| 577 |
+
|
| 578 |
+
# Save final response
|
| 579 |
+
response_path = folder / "response.md"
|
| 580 |
+
with open(response_path, "w") as f:
|
| 581 |
+
f.write(f"# Q{qid:02d}: {slug}\n\n")
|
| 582 |
+
f.write(f"**Query:** {query}\n\n")
|
| 583 |
+
f.write(f"**Elapsed:** {elapsed:.1f}s\n\n")
|
| 584 |
+
f.write("---\n\n")
|
| 585 |
+
f.write(final_response)
|
| 586 |
+
|
| 587 |
+
# Copy only NEW plots (diff against pre-query snapshot)
|
| 588 |
+
plot_files = []
|
| 589 |
+
if plots_dir.exists():
|
| 590 |
+
for f_path in sorted(plots_dir.glob("*.png")):
|
| 591 |
+
if f_path.name not in existing_plots:
|
| 592 |
+
dest = folder / f_path.name
|
| 593 |
+
shutil.copy2(f_path, dest)
|
| 594 |
+
plot_files.append(f_path.name)
|
| 595 |
+
print(f" 📊 Plot saved: {f_path.name}")
|
| 596 |
+
|
| 597 |
+
# Count tool calls
|
| 598 |
+
tool_calls = [s for s in steps if s["type"] == "tool_call"]
|
| 599 |
+
tools_used = list(set(s["tool_name"] for s in tool_calls))
|
| 600 |
+
|
| 601 |
+
# Build metadata
|
| 602 |
+
metadata = {
|
| 603 |
+
"query_id": qid,
|
| 604 |
+
"slug": slug,
|
| 605 |
+
"query": query,
|
| 606 |
+
"type": query_def.get("type", "unknown"),
|
| 607 |
+
"variables": query_def.get("variables", []),
|
| 608 |
+
"region": query_def.get("region", ""),
|
| 609 |
+
"timestamp": datetime.now().isoformat(),
|
| 610 |
+
"elapsed_seconds": round(elapsed, 1),
|
| 611 |
+
"status": "success",
|
| 612 |
+
"tools_used": tools_used,
|
| 613 |
+
"num_tool_calls": len(tool_calls),
|
| 614 |
+
"num_steps": len(steps),
|
| 615 |
+
"plot_files": plot_files,
|
| 616 |
+
"notes": "",
|
| 617 |
+
}
|
| 618 |
+
|
| 619 |
+
# Save metadata.json
|
| 620 |
+
meta_path = folder / "metadata.json"
|
| 621 |
+
with open(meta_path, "w") as f:
|
| 622 |
+
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
| 623 |
+
|
| 624 |
+
print(f" ✅ SUCCESS in {elapsed:.1f}s | Tools: {', '.join(tools_used)} | Steps: {len(steps)}")
|
| 625 |
+
|
| 626 |
+
return metadata
|
| 627 |
+
|
| 628 |
+
except Exception as e:
|
| 629 |
+
elapsed = time.time() - start_time
|
| 630 |
+
print(f" ❌ FAILED in {elapsed:.1f}s: {e}")
|
| 631 |
+
|
| 632 |
+
metadata = {
|
| 633 |
+
"query_id": qid,
|
| 634 |
+
"slug": slug,
|
| 635 |
+
"query": query,
|
| 636 |
+
"type": query_def.get("type", "unknown"),
|
| 637 |
+
"variables": query_def.get("variables", []),
|
| 638 |
+
"region": query_def.get("region", ""),
|
| 639 |
+
"timestamp": datetime.now().isoformat(),
|
| 640 |
+
"elapsed_seconds": round(elapsed, 1),
|
| 641 |
+
"status": "error",
|
| 642 |
+
"error": str(e),
|
| 643 |
+
"tools_used": [],
|
| 644 |
+
"num_tool_calls": 0,
|
| 645 |
+
"num_steps": 0,
|
| 646 |
+
"plot_files": [],
|
| 647 |
+
"notes": f"Error: {e}",
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
meta_path = folder / "metadata.json"
|
| 651 |
+
with open(meta_path, "w") as f:
|
| 652 |
+
json.dump(metadata, f, indent=2, ensure_ascii=False)
|
| 653 |
+
|
| 654 |
+
return metadata
|
| 655 |
+
|
| 656 |
+
|
| 657 |
+
def main():
|
| 658 |
+
parser = argparse.ArgumentParser(description="Eurus QA Runner")
|
| 659 |
+
parser.add_argument("--query", type=int, help="Run a single query by ID (1-36)")
|
| 660 |
+
parser.add_argument("--start", type=int, default=1, help="Start from query ID")
|
| 661 |
+
parser.add_argument("--end", type=int, default=36, help="End at query ID (inclusive)")
|
| 662 |
+
parser.add_argument("--output", type=str, default=None, help="Output directory (default: data/qa_results)")
|
| 663 |
+
parser.add_argument("--skip-existing", action="store_true", help="Skip if folder already has metadata.json")
|
| 664 |
+
args = parser.parse_args()
|
| 665 |
+
|
| 666 |
+
# Check API key
|
| 667 |
+
if not os.environ.get("OPENAI_API_KEY"):
|
| 668 |
+
print("❌ OPENAI_API_KEY not set!")
|
| 669 |
+
sys.exit(1)
|
| 670 |
+
|
| 671 |
+
if args.output:
|
| 672 |
+
output_dir = Path(args.output)
|
| 673 |
+
else:
|
| 674 |
+
output_dir = PROJECT_ROOT / "data" / "qa_results"
|
| 675 |
+
output_dir.mkdir(parents=True, exist_ok=True)
|
| 676 |
+
|
| 677 |
+
print(f"""
|
| 678 |
+
╔══════════════════════════════════════════════════════╗
|
| 679 |
+
║ Eurus QA Runner v1.0 ║
|
| 680 |
+
║ {datetime.now().strftime('%Y-%m-%d %H:%M:%S')} ║
|
| 681 |
+
╚══════════════════════════════════════════════════════╝
|
| 682 |
+
Output: {output_dir}
|
| 683 |
+
""")
|
| 684 |
+
|
| 685 |
+
# Build agent once
|
| 686 |
+
print("🏗️ Building agent...")
|
| 687 |
+
agent = build_agent()
|
| 688 |
+
print("✅ Agent ready\n")
|
| 689 |
+
|
| 690 |
+
# Select queries
|
| 691 |
+
if args.query:
|
| 692 |
+
queries = [q for q in QA_QUERIES if q["id"] == args.query]
|
| 693 |
+
else:
|
| 694 |
+
queries = [q for q in QA_QUERIES if args.start <= q["id"] <= args.end]
|
| 695 |
+
|
| 696 |
+
results = []
|
| 697 |
+
for q in queries:
|
| 698 |
+
folder = output_dir / f"q{q['id']:02d}_{q['slug']}"
|
| 699 |
+
if args.skip_existing and (folder / "metadata.json").exists():
|
| 700 |
+
print(f"⏭️ Skipping Q{q['id']:02d} (already exists)")
|
| 701 |
+
continue
|
| 702 |
+
|
| 703 |
+
result = run_single_query(agent, q, output_dir)
|
| 704 |
+
results.append(result)
|
| 705 |
+
|
| 706 |
+
# Print summary
|
| 707 |
+
print(f"\n{'='*70}")
|
| 708 |
+
print("QA SUMMARY")
|
| 709 |
+
print(f"{'='*70}")
|
| 710 |
+
|
| 711 |
+
success = sum(1 for r in results if r["status"] == "success")
|
| 712 |
+
failed = sum(1 for r in results if r["status"] == "error")
|
| 713 |
+
total_time = sum(r["elapsed_seconds"] for r in results)
|
| 714 |
+
|
| 715 |
+
for r in results:
|
| 716 |
+
status = "✅" if r["status"] == "success" else "❌"
|
| 717 |
+
print(f" {status} Q{r['query_id']:02d} ({r['slug']:20s}) | "
|
| 718 |
+
f"{r['elapsed_seconds']:5.1f}s | Tools: {', '.join(r['tools_used'])}")
|
| 719 |
+
|
| 720 |
+
print(f"\nTotal: {success} passed, {failed} failed, {total_time:.1f}s total")
|
| 721 |
+
|
| 722 |
+
# Save summary
|
| 723 |
+
summary_path = output_dir / "qa_summary.json"
|
| 724 |
+
with open(summary_path, "w") as f:
|
| 725 |
+
json.dump({
|
| 726 |
+
"timestamp": datetime.now().isoformat(),
|
| 727 |
+
"total_queries": len(results),
|
| 728 |
+
"passed": success,
|
| 729 |
+
"failed": failed,
|
| 730 |
+
"total_time_seconds": round(total_time, 1),
|
| 731 |
+
"results": results,
|
| 732 |
+
}, f, indent=2, ensure_ascii=False)
|
| 733 |
+
|
| 734 |
+
print(f"\nSummary saved to: {summary_path}")
|
| 735 |
+
|
| 736 |
+
|
| 737 |
+
if __name__ == "__main__":
|
| 738 |
+
main()
|
setup_env.sh
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
echo "Setting up Eurus environment..."
|
| 3 |
+
|
| 4 |
+
# Create .env file if it doesn't exist
|
| 5 |
+
if [ ! -f .env ]; then
|
| 6 |
+
echo "Creating .env file..."
|
| 7 |
+
cat > .env << EOL
|
| 8 |
+
OPENAI_API_KEY=your_openai_api_key
|
| 9 |
+
ARRAYLAKE_API_KEY=your_arraylake_api_key
|
| 10 |
+
EOL
|
| 11 |
+
echo ".env file created. Please update it with your API keys."
|
| 12 |
+
else
|
| 13 |
+
echo ".env file already exists."
|
| 14 |
+
fi
|
| 15 |
+
|
| 16 |
+
# Install dependencies
|
| 17 |
+
echo "Installing dependencies..."
|
| 18 |
+
pip install -r requirements.txt
|
| 19 |
+
pip install -e .
|
| 20 |
+
|
| 21 |
+
echo "Setup complete!"
|
src/eurus/__init__.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Eurus - ERA5 Climate Analysis Agent
|
| 3 |
+
====================================
|
| 4 |
+
|
| 5 |
+
A scientific climate analysis platform powered by ERA5 reanalysis data from
|
| 6 |
+
Earthmover's cloud-optimized archive via Icechunk.
|
| 7 |
+
|
| 8 |
+
Features:
|
| 9 |
+
- ERA5 reanalysis data retrieval (SST, temperature, wind, pressure, etc.)
|
| 10 |
+
- Interactive Python REPL with pre-loaded scientific libraries
|
| 11 |
+
- Maritime route calculation with weather risk assessment
|
| 12 |
+
- Analysis methodology guides for climate science
|
| 13 |
+
- Intelligent caching with persistent memory
|
| 14 |
+
- Predefined geographic regions (El Niño, Atlantic, Pacific, etc.)
|
| 15 |
+
- Full MCP protocol support for Claude and other AI assistants
|
| 16 |
+
|
| 17 |
+
Example usage as MCP server:
|
| 18 |
+
# In .mcp.json
|
| 19 |
+
{
|
| 20 |
+
"mcpServers": {
|
| 21 |
+
"era5": {
|
| 22 |
+
"command": "era5-mcp",
|
| 23 |
+
"env": {"ARRAYLAKE_API_KEY": "your_key"}
|
| 24 |
+
}
|
| 25 |
+
}
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
Example usage as Python library:
|
| 29 |
+
from eurus import retrieve_era5_data, list_available_variables
|
| 30 |
+
from eurus.tools import get_all_tools
|
| 31 |
+
|
| 32 |
+
# Download SST data
|
| 33 |
+
result = retrieve_era5_data(
|
| 34 |
+
query_type="temporal",
|
| 35 |
+
variable_id="sst",
|
| 36 |
+
start_date="2024-01-01",
|
| 37 |
+
end_date="2024-01-07",
|
| 38 |
+
region="california_coast"
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# Get all tools for agent (only core tools, no science clutter)
|
| 42 |
+
tools = get_all_tools(enable_routing=True)
|
| 43 |
+
"""
|
| 44 |
+
|
| 45 |
+
__version__ = "1.1.0"
|
| 46 |
+
__author__ = "Eurus Team"
|
| 47 |
+
|
| 48 |
+
from eurus.config import (
|
| 49 |
+
ERA5_VARIABLES,
|
| 50 |
+
GEOGRAPHIC_REGIONS,
|
| 51 |
+
AGENT_SYSTEM_PROMPT,
|
| 52 |
+
get_variable_info,
|
| 53 |
+
get_short_name,
|
| 54 |
+
list_available_variables,
|
| 55 |
+
)
|
| 56 |
+
from eurus.retrieval import retrieve_era5_data
|
| 57 |
+
from eurus.memory import MemoryManager, get_memory
|
| 58 |
+
from eurus.tools import get_all_tools
|
| 59 |
+
|
| 60 |
+
__all__ = [
|
| 61 |
+
# Version
|
| 62 |
+
"__version__",
|
| 63 |
+
# Config
|
| 64 |
+
"ERA5_VARIABLES",
|
| 65 |
+
"GEOGRAPHIC_REGIONS",
|
| 66 |
+
"AGENT_SYSTEM_PROMPT",
|
| 67 |
+
"get_variable_info",
|
| 68 |
+
"get_short_name",
|
| 69 |
+
"list_available_variables",
|
| 70 |
+
# Retrieval
|
| 71 |
+
"retrieve_era5_data",
|
| 72 |
+
# Memory
|
| 73 |
+
"MemoryManager",
|
| 74 |
+
"get_memory",
|
| 75 |
+
# Tools
|
| 76 |
+
"get_all_tools",
|
| 77 |
+
]
|
src/eurus/config.py
ADDED
|
@@ -0,0 +1,751 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ERA5 MCP Configuration
|
| 3 |
+
======================
|
| 4 |
+
|
| 5 |
+
Centralized configuration including ERA5 variable catalog, geographic regions,
|
| 6 |
+
and runtime settings.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
from dataclasses import dataclass, field
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from typing import Dict, Optional, List
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
|
| 17 |
+
# =============================================================================
|
| 18 |
+
# PATHS
|
| 19 |
+
# =============================================================================
|
| 20 |
+
|
| 21 |
+
def get_data_dir() -> Path:
|
| 22 |
+
"""Get the data directory, creating it if necessary."""
|
| 23 |
+
data_dir = Path(os.environ.get("ERA5_DATA_DIR", Path.cwd() / "data"))
|
| 24 |
+
data_dir.mkdir(parents=True, exist_ok=True)
|
| 25 |
+
return data_dir
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_plots_dir() -> Path:
|
| 29 |
+
"""Get the plots directory, creating it if necessary."""
|
| 30 |
+
plots_dir = get_data_dir() / "plots"
|
| 31 |
+
plots_dir.mkdir(parents=True, exist_ok=True)
|
| 32 |
+
return plots_dir
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def get_memory_dir() -> Path:
|
| 36 |
+
"""Get the memory directory, creating it if necessary."""
|
| 37 |
+
memory_dir = Path(os.environ.get("ERA5_MEMORY_DIR", Path.cwd() / ".memory"))
|
| 38 |
+
memory_dir.mkdir(parents=True, exist_ok=True)
|
| 39 |
+
return memory_dir
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# =============================================================================
|
| 43 |
+
# ERA5 VARIABLE CATALOG
|
| 44 |
+
# =============================================================================
|
| 45 |
+
|
| 46 |
+
@dataclass(frozen=True)
|
| 47 |
+
class ERA5Variable:
|
| 48 |
+
"""Metadata for an ERA5 variable."""
|
| 49 |
+
|
| 50 |
+
short_name: str
|
| 51 |
+
long_name: str
|
| 52 |
+
units: str
|
| 53 |
+
description: str
|
| 54 |
+
category: str
|
| 55 |
+
typical_range: tuple[float | None, float | None] = (None, None)
|
| 56 |
+
colormap: str = "viridis"
|
| 57 |
+
|
| 58 |
+
def __str__(self) -> str:
|
| 59 |
+
return f"{self.short_name}: {self.long_name} ({self.units})"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# Comprehensive ERA5 variable mapping — ALL 22 Arraylake variables
|
| 63 |
+
# Source: earthmover-public/era5-surface-aws Icechunk store
|
| 64 |
+
ERA5_VARIABLES: Dict[str, ERA5Variable] = {
|
| 65 |
+
# ── Ocean ──────────────────────────────────────────────────────────────
|
| 66 |
+
"sst": ERA5Variable(
|
| 67 |
+
short_name="sst",
|
| 68 |
+
long_name="Sea Surface Temperature",
|
| 69 |
+
units="K",
|
| 70 |
+
description="Temperature of sea water near the surface",
|
| 71 |
+
category="ocean",
|
| 72 |
+
typical_range=(270, 310),
|
| 73 |
+
colormap="RdYlBu_r"
|
| 74 |
+
),
|
| 75 |
+
# ── Temperature ────────────────────────────────────────────────────────
|
| 76 |
+
"t2": ERA5Variable(
|
| 77 |
+
short_name="t2",
|
| 78 |
+
long_name="2m Temperature",
|
| 79 |
+
units="K",
|
| 80 |
+
description="Air temperature at 2 meters above the surface",
|
| 81 |
+
category="atmosphere",
|
| 82 |
+
typical_range=(220, 330),
|
| 83 |
+
colormap="RdYlBu_r"
|
| 84 |
+
),
|
| 85 |
+
"d2": ERA5Variable(
|
| 86 |
+
short_name="d2",
|
| 87 |
+
long_name="2m Dewpoint Temperature",
|
| 88 |
+
units="K",
|
| 89 |
+
description="Temperature to which air at 2m must cool to reach saturation; indicates humidity",
|
| 90 |
+
category="atmosphere",
|
| 91 |
+
typical_range=(220, 310),
|
| 92 |
+
colormap="RdYlBu_r"
|
| 93 |
+
),
|
| 94 |
+
"skt": ERA5Variable(
|
| 95 |
+
short_name="skt",
|
| 96 |
+
long_name="Skin Temperature",
|
| 97 |
+
units="K",
|
| 98 |
+
description="Temperature of the Earth's uppermost surface layer (land, ocean, or ice)",
|
| 99 |
+
category="surface",
|
| 100 |
+
typical_range=(220, 340),
|
| 101 |
+
colormap="RdYlBu_r"
|
| 102 |
+
),
|
| 103 |
+
# ── Wind 10 m ──────────────────────────────────────────────────────────
|
| 104 |
+
"u10": ERA5Variable(
|
| 105 |
+
short_name="u10",
|
| 106 |
+
long_name="10m U-Wind Component",
|
| 107 |
+
units="m/s",
|
| 108 |
+
description="Eastward component of wind at 10 meters above surface",
|
| 109 |
+
category="atmosphere",
|
| 110 |
+
typical_range=(-30, 30),
|
| 111 |
+
colormap="RdBu_r"
|
| 112 |
+
),
|
| 113 |
+
"v10": ERA5Variable(
|
| 114 |
+
short_name="v10",
|
| 115 |
+
long_name="10m V-Wind Component",
|
| 116 |
+
units="m/s",
|
| 117 |
+
description="Northward component of wind at 10 meters above surface",
|
| 118 |
+
category="atmosphere",
|
| 119 |
+
typical_range=(-30, 30),
|
| 120 |
+
colormap="RdBu_r"
|
| 121 |
+
),
|
| 122 |
+
# ── Wind 100 m (hub-height for wind energy) ───────────────────────────
|
| 123 |
+
"u100": ERA5Variable(
|
| 124 |
+
short_name="u100",
|
| 125 |
+
long_name="100m U-Wind Component",
|
| 126 |
+
units="m/s",
|
| 127 |
+
description="Eastward component of wind at 100 meters above surface (wind-turbine hub height)",
|
| 128 |
+
category="atmosphere",
|
| 129 |
+
typical_range=(-40, 40),
|
| 130 |
+
colormap="RdBu_r"
|
| 131 |
+
),
|
| 132 |
+
"v100": ERA5Variable(
|
| 133 |
+
short_name="v100",
|
| 134 |
+
long_name="100m V-Wind Component",
|
| 135 |
+
units="m/s",
|
| 136 |
+
description="Northward component of wind at 100 meters above surface (wind-turbine hub height)",
|
| 137 |
+
category="atmosphere",
|
| 138 |
+
typical_range=(-40, 40),
|
| 139 |
+
colormap="RdBu_r"
|
| 140 |
+
),
|
| 141 |
+
# ── Pressure ───────────────────────────────────────────────────────────
|
| 142 |
+
"sp": ERA5Variable(
|
| 143 |
+
short_name="sp",
|
| 144 |
+
long_name="Surface Pressure",
|
| 145 |
+
units="Pa",
|
| 146 |
+
description="Pressure at the Earth's surface",
|
| 147 |
+
category="atmosphere",
|
| 148 |
+
typical_range=(85000, 108000),
|
| 149 |
+
colormap="viridis"
|
| 150 |
+
),
|
| 151 |
+
"mslp": ERA5Variable(
|
| 152 |
+
short_name="mslp",
|
| 153 |
+
long_name="Mean Sea Level Pressure",
|
| 154 |
+
units="Pa",
|
| 155 |
+
description="Atmospheric pressure reduced to mean sea level",
|
| 156 |
+
category="atmosphere",
|
| 157 |
+
typical_range=(96000, 105000),
|
| 158 |
+
colormap="viridis"
|
| 159 |
+
),
|
| 160 |
+
# ── Boundary Layer ─────────────────────────────────────────────────────
|
| 161 |
+
"blh": ERA5Variable(
|
| 162 |
+
short_name="blh",
|
| 163 |
+
long_name="Boundary Layer Height",
|
| 164 |
+
units="m",
|
| 165 |
+
description="Height of the planetary boundary layer above ground",
|
| 166 |
+
category="atmosphere",
|
| 167 |
+
typical_range=(50, 3000),
|
| 168 |
+
colormap="viridis"
|
| 169 |
+
),
|
| 170 |
+
"cape": ERA5Variable(
|
| 171 |
+
short_name="cape",
|
| 172 |
+
long_name="Convective Available Potential Energy",
|
| 173 |
+
units="J/kg",
|
| 174 |
+
description="Instability indicator for convection/thunderstorm potential",
|
| 175 |
+
category="atmosphere",
|
| 176 |
+
typical_range=(0, 5000),
|
| 177 |
+
colormap="YlOrRd"
|
| 178 |
+
),
|
| 179 |
+
# ── Cloud & Precipitation ──────────────────────────────────────────────
|
| 180 |
+
"tcc": ERA5Variable(
|
| 181 |
+
short_name="tcc",
|
| 182 |
+
long_name="Total Cloud Cover",
|
| 183 |
+
units="fraction (0-1)",
|
| 184 |
+
description="Fraction of sky covered by clouds",
|
| 185 |
+
category="atmosphere",
|
| 186 |
+
typical_range=(0, 1),
|
| 187 |
+
colormap="gray_r"
|
| 188 |
+
),
|
| 189 |
+
"cp": ERA5Variable(
|
| 190 |
+
short_name="cp",
|
| 191 |
+
long_name="Convective Precipitation",
|
| 192 |
+
units="m",
|
| 193 |
+
description="Accumulated precipitation from convective processes",
|
| 194 |
+
category="precipitation",
|
| 195 |
+
typical_range=(0, 0.1),
|
| 196 |
+
colormap="Blues"
|
| 197 |
+
),
|
| 198 |
+
"lsp": ERA5Variable(
|
| 199 |
+
short_name="lsp",
|
| 200 |
+
long_name="Large-scale Precipitation",
|
| 201 |
+
units="m",
|
| 202 |
+
description="Accumulated precipitation from large-scale weather systems",
|
| 203 |
+
category="precipitation",
|
| 204 |
+
typical_range=(0, 0.1),
|
| 205 |
+
colormap="Blues"
|
| 206 |
+
),
|
| 207 |
+
"tp": ERA5Variable(
|
| 208 |
+
short_name="tp",
|
| 209 |
+
long_name="Total Precipitation",
|
| 210 |
+
units="m",
|
| 211 |
+
description="Total accumulated precipitation (convective + large-scale)",
|
| 212 |
+
category="precipitation",
|
| 213 |
+
typical_range=(0, 0.2),
|
| 214 |
+
colormap="Blues"
|
| 215 |
+
),
|
| 216 |
+
# ── Radiation ──────────────────────────────────────────────────────────
|
| 217 |
+
"ssr": ERA5Variable(
|
| 218 |
+
short_name="ssr",
|
| 219 |
+
long_name="Surface Net Solar Radiation",
|
| 220 |
+
units="J/m²",
|
| 221 |
+
description="Net balance of downward minus reflected shortwave radiation at the surface",
|
| 222 |
+
category="radiation",
|
| 223 |
+
typical_range=(0, 3e7),
|
| 224 |
+
colormap="YlOrRd"
|
| 225 |
+
),
|
| 226 |
+
"ssrd": ERA5Variable(
|
| 227 |
+
short_name="ssrd",
|
| 228 |
+
long_name="Surface Solar Radiation Downwards",
|
| 229 |
+
units="J/m²",
|
| 230 |
+
description="Total incoming shortwave (solar) radiation reaching the surface (direct + diffuse)",
|
| 231 |
+
category="radiation",
|
| 232 |
+
typical_range=(0, 3.5e7),
|
| 233 |
+
colormap="YlOrRd"
|
| 234 |
+
),
|
| 235 |
+
# ── Moisture Columns ───────────────────────────────────────────────────
|
| 236 |
+
"tcw": ERA5Variable(
|
| 237 |
+
short_name="tcw",
|
| 238 |
+
long_name="Total Column Water",
|
| 239 |
+
units="kg/m²",
|
| 240 |
+
description="Total water (vapour + liquid + ice) in the atmospheric column",
|
| 241 |
+
category="atmosphere",
|
| 242 |
+
typical_range=(0, 80),
|
| 243 |
+
colormap="Blues"
|
| 244 |
+
),
|
| 245 |
+
"tcwv": ERA5Variable(
|
| 246 |
+
short_name="tcwv",
|
| 247 |
+
long_name="Total Column Water Vapour",
|
| 248 |
+
units="kg/m²",
|
| 249 |
+
description="Total water vapour in the atmospheric column (precipitable water)",
|
| 250 |
+
category="atmosphere",
|
| 251 |
+
typical_range=(0, 70),
|
| 252 |
+
colormap="Blues"
|
| 253 |
+
),
|
| 254 |
+
# ── Land Surface ───────────────────────────────────────────────────────
|
| 255 |
+
"sd": ERA5Variable(
|
| 256 |
+
short_name="sd",
|
| 257 |
+
long_name="Snow Depth",
|
| 258 |
+
units="m water equiv.",
|
| 259 |
+
description="Depth of snow expressed as meters of water equivalent",
|
| 260 |
+
category="land_surface",
|
| 261 |
+
typical_range=(0, 2),
|
| 262 |
+
colormap="Blues"
|
| 263 |
+
),
|
| 264 |
+
"stl1": ERA5Variable(
|
| 265 |
+
short_name="stl1",
|
| 266 |
+
long_name="Soil Temperature Level 1",
|
| 267 |
+
units="K",
|
| 268 |
+
description="Temperature of the topmost soil layer (0-7 cm depth)",
|
| 269 |
+
category="land_surface",
|
| 270 |
+
typical_range=(220, 330),
|
| 271 |
+
colormap="RdYlBu_r"
|
| 272 |
+
),
|
| 273 |
+
"swvl1": ERA5Variable(
|
| 274 |
+
short_name="swvl1",
|
| 275 |
+
long_name="Volumetric Soil Water Layer 1",
|
| 276 |
+
units="m³/m³",
|
| 277 |
+
description="Volume fraction of water in the topmost soil layer (0-7 cm depth)",
|
| 278 |
+
category="land_surface",
|
| 279 |
+
typical_range=(0, 0.5),
|
| 280 |
+
colormap="YlGnBu"
|
| 281 |
+
),
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
# Aliases for long variable names → short names
|
| 285 |
+
VARIABLE_ALIASES: Dict[str, str] = {
|
| 286 |
+
# Ocean
|
| 287 |
+
"sea_surface_temperature": "sst",
|
| 288 |
+
# Temperature
|
| 289 |
+
"2m_temperature": "t2",
|
| 290 |
+
"temperature": "t2",
|
| 291 |
+
"2m_dewpoint_temperature": "d2",
|
| 292 |
+
"dewpoint_temperature": "d2",
|
| 293 |
+
"dewpoint": "d2",
|
| 294 |
+
"skin_temperature": "skt",
|
| 295 |
+
# Wind 10m
|
| 296 |
+
"10m_u_component_of_wind": "u10",
|
| 297 |
+
"10m_v_component_of_wind": "v10",
|
| 298 |
+
# Wind 100m
|
| 299 |
+
"100m_u_component_of_wind": "u100",
|
| 300 |
+
"100m_v_component_of_wind": "v100",
|
| 301 |
+
# Pressure
|
| 302 |
+
"surface_pressure": "sp",
|
| 303 |
+
"mean_sea_level_pressure": "mslp",
|
| 304 |
+
# Boundary layer
|
| 305 |
+
"boundary_layer_height": "blh",
|
| 306 |
+
"convective_available_potential_energy": "cape",
|
| 307 |
+
# Cloud & precipitation
|
| 308 |
+
"total_cloud_cover": "tcc",
|
| 309 |
+
"convective_precipitation": "cp",
|
| 310 |
+
"large_scale_precipitation": "lsp",
|
| 311 |
+
"total_precipitation": "tp",
|
| 312 |
+
# Radiation
|
| 313 |
+
"surface_net_solar_radiation": "ssr",
|
| 314 |
+
"surface_solar_radiation_downwards": "ssrd",
|
| 315 |
+
# Moisture columns
|
| 316 |
+
"total_column_water": "tcw",
|
| 317 |
+
"total_column_water_vapour": "tcwv",
|
| 318 |
+
# Land surface
|
| 319 |
+
"snow_depth": "sd",
|
| 320 |
+
"soil_temperature": "stl1",
|
| 321 |
+
"soil_temperature_level_1": "stl1",
|
| 322 |
+
"soil_moisture": "swvl1",
|
| 323 |
+
"volumetric_soil_water_layer_1": "swvl1",
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
def get_variable_info(variable_id: str) -> Optional[ERA5Variable]:
|
| 328 |
+
"""Get variable metadata by ID (case-insensitive, supports aliases)."""
|
| 329 |
+
key = variable_id.lower()
|
| 330 |
+
# Check aliases first
|
| 331 |
+
if key in VARIABLE_ALIASES:
|
| 332 |
+
key = VARIABLE_ALIASES[key]
|
| 333 |
+
return ERA5_VARIABLES.get(key)
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def get_short_name(variable_id: str) -> str:
|
| 337 |
+
"""Get the short name for a variable (for dataset access)."""
|
| 338 |
+
key = variable_id.lower()
|
| 339 |
+
# Check aliases first
|
| 340 |
+
if key in VARIABLE_ALIASES:
|
| 341 |
+
return VARIABLE_ALIASES[key]
|
| 342 |
+
var_info = ERA5_VARIABLES.get(key)
|
| 343 |
+
if var_info:
|
| 344 |
+
return var_info.short_name
|
| 345 |
+
return key
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
def list_available_variables() -> str:
|
| 349 |
+
"""Return a formatted list of available variables."""
|
| 350 |
+
seen: set[str] = set()
|
| 351 |
+
lines = ["Available ERA5 Variables:", "=" * 50]
|
| 352 |
+
|
| 353 |
+
for var_id, var_info in ERA5_VARIABLES.items():
|
| 354 |
+
if var_info.short_name not in seen:
|
| 355 |
+
seen.add(var_info.short_name)
|
| 356 |
+
lines.append(
|
| 357 |
+
f" {var_info.short_name:8} | {var_info.long_name:30} | {var_info.units}"
|
| 358 |
+
)
|
| 359 |
+
|
| 360 |
+
return "\n".join(lines)
|
| 361 |
+
|
| 362 |
+
|
| 363 |
+
def get_all_short_names() -> list[str]:
|
| 364 |
+
"""Get list of all unique short variable names."""
|
| 365 |
+
return list({v.short_name for v in ERA5_VARIABLES.values()})
|
| 366 |
+
|
| 367 |
+
|
| 368 |
+
# =============================================================================
|
| 369 |
+
# GEOGRAPHIC REGIONS (Common oceanographic areas)
|
| 370 |
+
# =============================================================================
|
| 371 |
+
|
| 372 |
+
@dataclass(frozen=True)
|
| 373 |
+
class GeographicRegion:
|
| 374 |
+
"""A predefined geographic region."""
|
| 375 |
+
|
| 376 |
+
name: str
|
| 377 |
+
min_lat: float
|
| 378 |
+
max_lat: float
|
| 379 |
+
min_lon: float
|
| 380 |
+
max_lon: float
|
| 381 |
+
description: str = ""
|
| 382 |
+
|
| 383 |
+
def to_dict(self) -> dict:
|
| 384 |
+
return {
|
| 385 |
+
"min_lat": self.min_lat,
|
| 386 |
+
"max_lat": self.max_lat,
|
| 387 |
+
"min_lon": self.min_lon,
|
| 388 |
+
"max_lon": self.max_lon,
|
| 389 |
+
}
|
| 390 |
+
|
| 391 |
+
|
| 392 |
+
GEOGRAPHIC_REGIONS: Dict[str, GeographicRegion] = {
|
| 393 |
+
"global": GeographicRegion(
|
| 394 |
+
"global", -90, 90, 0, 359.75,
|
| 395 |
+
"Entire globe"
|
| 396 |
+
),
|
| 397 |
+
"north_atlantic": GeographicRegion(
|
| 398 |
+
"north_atlantic", 0, 65, 280, 360,
|
| 399 |
+
"North Atlantic Ocean"
|
| 400 |
+
),
|
| 401 |
+
"south_atlantic": GeographicRegion(
|
| 402 |
+
"south_atlantic", -60, 0, 280, 20,
|
| 403 |
+
"South Atlantic Ocean"
|
| 404 |
+
),
|
| 405 |
+
"north_pacific": GeographicRegion(
|
| 406 |
+
"north_pacific", 0, 65, 100, 260,
|
| 407 |
+
"North Pacific Ocean"
|
| 408 |
+
),
|
| 409 |
+
"south_pacific": GeographicRegion(
|
| 410 |
+
"south_pacific", -60, 0, 150, 290,
|
| 411 |
+
"South Pacific Ocean"
|
| 412 |
+
),
|
| 413 |
+
"indian_ocean": GeographicRegion(
|
| 414 |
+
"indian_ocean", -60, 30, 20, 120,
|
| 415 |
+
"Indian Ocean"
|
| 416 |
+
),
|
| 417 |
+
"arctic": GeographicRegion(
|
| 418 |
+
"arctic", 65, 90, 0, 359.75,
|
| 419 |
+
"Arctic Ocean and surrounding areas"
|
| 420 |
+
),
|
| 421 |
+
"antarctic": GeographicRegion(
|
| 422 |
+
"antarctic", -90, -60, 0, 359.75,
|
| 423 |
+
"Antarctic and Southern Ocean"
|
| 424 |
+
),
|
| 425 |
+
"mediterranean": GeographicRegion(
|
| 426 |
+
"mediterranean", 30, 46, 354, 42,
|
| 427 |
+
"Mediterranean Sea"
|
| 428 |
+
),
|
| 429 |
+
"gulf_of_mexico": GeographicRegion(
|
| 430 |
+
"gulf_of_mexico", 18, 31, 262, 282,
|
| 431 |
+
"Gulf of Mexico"
|
| 432 |
+
),
|
| 433 |
+
"caribbean": GeographicRegion(
|
| 434 |
+
"caribbean", 8, 28, 255, 295,
|
| 435 |
+
"Caribbean Sea"
|
| 436 |
+
),
|
| 437 |
+
"california_coast": GeographicRegion(
|
| 438 |
+
"california_coast", 32, 42, 235, 250,
|
| 439 |
+
"California coastal waters"
|
| 440 |
+
),
|
| 441 |
+
"east_coast_us": GeographicRegion(
|
| 442 |
+
"east_coast_us", 25, 45, 280, 295,
|
| 443 |
+
"US East Coast"
|
| 444 |
+
),
|
| 445 |
+
"europe": GeographicRegion(
|
| 446 |
+
"europe", 35, 72, 350, 40,
|
| 447 |
+
"Europe"
|
| 448 |
+
),
|
| 449 |
+
"asia_east": GeographicRegion(
|
| 450 |
+
"asia_east", 15, 55, 100, 145,
|
| 451 |
+
"East Asia"
|
| 452 |
+
),
|
| 453 |
+
"australia": GeographicRegion(
|
| 454 |
+
"australia", -45, -10, 110, 155,
|
| 455 |
+
"Australia and surrounding waters"
|
| 456 |
+
),
|
| 457 |
+
# El Niño regions
|
| 458 |
+
"nino34": GeographicRegion(
|
| 459 |
+
"nino34", -5, 5, 190, 240,
|
| 460 |
+
"El Niño 3.4 region (central Pacific)"
|
| 461 |
+
),
|
| 462 |
+
"nino3": GeographicRegion(
|
| 463 |
+
"nino3", -5, 5, 210, 270,
|
| 464 |
+
"El Niño 3 region (eastern Pacific)"
|
| 465 |
+
),
|
| 466 |
+
"nino4": GeographicRegion(
|
| 467 |
+
"nino4", -5, 5, 160, 210,
|
| 468 |
+
"El Niño 4 region (western Pacific)"
|
| 469 |
+
),
|
| 470 |
+
"nino12": GeographicRegion(
|
| 471 |
+
"nino12", -10, 0, 270, 280,
|
| 472 |
+
"El Niño 1+2 region (far eastern Pacific)"
|
| 473 |
+
),
|
| 474 |
+
}
|
| 475 |
+
|
| 476 |
+
|
| 477 |
+
def get_region(name: str) -> Optional[GeographicRegion]:
|
| 478 |
+
"""Get a geographic region by name (case-insensitive)."""
|
| 479 |
+
return GEOGRAPHIC_REGIONS.get(name.lower())
|
| 480 |
+
|
| 481 |
+
|
| 482 |
+
def list_regions() -> str:
|
| 483 |
+
"""Return a formatted list of available regions."""
|
| 484 |
+
lines = ["Available Geographic Regions:", "=" * 70]
|
| 485 |
+
for name, region in GEOGRAPHIC_REGIONS.items():
|
| 486 |
+
lines.append(
|
| 487 |
+
f" {name:20} | lat: [{region.min_lat:6.1f}, {region.max_lat:6.1f}] "
|
| 488 |
+
f"| lon: [{region.min_lon:6.1f}, {region.max_lon:6.1f}]"
|
| 489 |
+
)
|
| 490 |
+
return "\n".join(lines)
|
| 491 |
+
|
| 492 |
+
|
| 493 |
+
# =============================================================================
|
| 494 |
+
# AGENT CONFIGURATION
|
| 495 |
+
# =============================================================================
|
| 496 |
+
|
| 497 |
+
@dataclass
|
| 498 |
+
class AgentConfig:
|
| 499 |
+
"""Configuration for the ERA5 Agent."""
|
| 500 |
+
|
| 501 |
+
# LLM Settings
|
| 502 |
+
model_name: str = "gpt-5.2"
|
| 503 |
+
temperature: float = 0
|
| 504 |
+
max_tokens: int = 4096
|
| 505 |
+
|
| 506 |
+
# Data Settings
|
| 507 |
+
data_source: str = "earthmover-public/era5-surface-aws"
|
| 508 |
+
default_query_type: str = "temporal"
|
| 509 |
+
max_download_size_gb: float = 2.0
|
| 510 |
+
|
| 511 |
+
# Retrieval Settings
|
| 512 |
+
max_retries: int = 5
|
| 513 |
+
retry_delay: float = 2.0
|
| 514 |
+
|
| 515 |
+
# Memory Settings
|
| 516 |
+
enable_memory: bool = True
|
| 517 |
+
max_conversation_history: int = 100
|
| 518 |
+
memory_file: str = "conversation_history.json"
|
| 519 |
+
|
| 520 |
+
# Visualization Settings
|
| 521 |
+
default_figure_size: tuple = (12, 8)
|
| 522 |
+
default_dpi: int = 150
|
| 523 |
+
save_plots: bool = True
|
| 524 |
+
plot_format: str = "png"
|
| 525 |
+
|
| 526 |
+
# Kernel Settings
|
| 527 |
+
kernel_timeout: float = 300.0
|
| 528 |
+
auto_import_packages: List[str] = field(default_factory=lambda: [
|
| 529 |
+
"pandas", "numpy", "xarray",
|
| 530 |
+
"matplotlib", "matplotlib.pyplot", "datetime"
|
| 531 |
+
])
|
| 532 |
+
|
| 533 |
+
# Logging
|
| 534 |
+
log_level: str = "INFO"
|
| 535 |
+
log_to_file: bool = True
|
| 536 |
+
log_file: str = "era5_agent.log"
|
| 537 |
+
|
| 538 |
+
|
| 539 |
+
# Global config instance
|
| 540 |
+
CONFIG = AgentConfig()
|
| 541 |
+
|
| 542 |
+
# Convenience path variables (for backward compatibility)
|
| 543 |
+
DATA_DIR = get_data_dir()
|
| 544 |
+
PLOTS_DIR = get_plots_dir()
|
| 545 |
+
|
| 546 |
+
|
| 547 |
+
# =============================================================================
|
| 548 |
+
# SYSTEM PROMPTS
|
| 549 |
+
# =============================================================================
|
| 550 |
+
|
| 551 |
+
AGENT_SYSTEM_PROMPT = """You are Eurus, an AI Climate Physicist conducting research for high-impact scientific publications.
|
| 552 |
+
|
| 553 |
+
## ⚠️ CRITICAL: RESPECT USER INTENT FIRST
|
| 554 |
+
|
| 555 |
+
**Your PRIMARY directive is to do EXACTLY what the user asks.**
|
| 556 |
+
|
| 557 |
+
### TOOL USAGE RULES:
|
| 558 |
+
1. **`python_repl`**: Use for:
|
| 559 |
+
- Custom analysis (anomalies, trends, statistics)
|
| 560 |
+
- Visualization with matplotlib
|
| 561 |
+
- Any computation not directly provided by other tools
|
| 562 |
+
|
| 563 |
+
2. **`retrieve_era5_data`**: Use for downloading climate data
|
| 564 |
+
|
| 565 |
+
3. **`calculate_maritime_route`**: Use for ship routing
|
| 566 |
+
|
| 567 |
+
4. **`get_analysis_guide`/`get_visualization_guide`**: Use for methodology help
|
| 568 |
+
|
| 569 |
+
### EXAMPLES:
|
| 570 |
+
- "Get temperature for Berlin and plot it" → Retrieve data, plot RAW temperature time series
|
| 571 |
+
- "Show temperature anomalies for Berlin" → Retrieve data, use python_repl to compute anomalies
|
| 572 |
+
- "Analyze temperature trends" → Retrieve data, use python_repl for trend calculation
|
| 573 |
+
- "Why was 2023 so hot?" → Retrieve data, analyze with python_repl
|
| 574 |
+
|
| 575 |
+
## YOUR CAPABILITIES
|
| 576 |
+
|
| 577 |
+
### 1. DATA RETRIEVAL: `retrieve_era5_data`
|
| 578 |
+
Downloads ERA5 reanalysis data from Earthmover's cloud-optimized archive.
|
| 579 |
+
|
| 580 |
+
**⚠️ STRICT QUERY TYPE RULE (WRONG = 10-100x SLOWER!):**
|
| 581 |
+
┌─────────────────────────────────────────────────────────────────┐
|
| 582 |
+
│ TEMPORAL: (time > 1 day) AND (area < 30°×30°) │
|
| 583 |
+
│ SPATIAL: (time ≤ 1 day) OR (area ≥ 30°×30°) │
|
| 584 |
+
└─────────────────────────────────────────────────────────────────┘
|
| 585 |
+
|
| 586 |
+
**COORDINATES - USE ROUTE BOUNDING BOX:**
|
| 587 |
+
- Latitude: -90 to 90
|
| 588 |
+
- Longitude: Use values from route tool's bounding box DIRECTLY!
|
| 589 |
+
- For Europe/Atlantic: Use -10 to 15 (NOT 0 to 360!)
|
| 590 |
+
- For Pacific crossing dateline: Use 0-360 system
|
| 591 |
+
|
| 592 |
+
**⚠️ CRITICAL:** When `calculate_maritime_route` returns a bounding box,
|
| 593 |
+
USE THOSE EXACT VALUES for min/max longitude. Do NOT convert to 0-360!
|
| 594 |
+
|
| 595 |
+
**DATA AVAILABILITY:** 1975 to present (updated regularly)
|
| 596 |
+
|
| 597 |
+
**Available Variables (22 total):**
|
| 598 |
+
| Variable | Description | Units | Category |
|
| 599 |
+
|----------|-------------|-------|----------|
|
| 600 |
+
| sst | Sea Surface Temperature | K | Ocean |
|
| 601 |
+
| t2 | 2m Air Temperature | K | Temperature |
|
| 602 |
+
| d2 | 2m Dewpoint Temperature | K | Temperature |
|
| 603 |
+
| skt | Skin Temperature | K | Surface |
|
| 604 |
+
| u10 | 10m U-Wind (Eastward) | m/s | Wind |
|
| 605 |
+
| v10 | 10m V-Wind (Northward) | m/s | Wind |
|
| 606 |
+
| u100 | 100m U-Wind (Eastward) | m/s | Wind |
|
| 607 |
+
| v100 | 100m V-Wind (Northward) | m/s | Wind |
|
| 608 |
+
| sp | Surface Pressure | Pa | Pressure |
|
| 609 |
+
| mslp | Mean Sea Level Pressure | Pa | Pressure |
|
| 610 |
+
| blh | Boundary Layer Height | m | Atmosphere |
|
| 611 |
+
| cape | Convective Available Potential Energy | J/kg | Atmosphere |
|
| 612 |
+
| tcc | Total Cloud Cover | 0-1 | Cloud |
|
| 613 |
+
| cp | Convective Precipitation | m | Precipitation |
|
| 614 |
+
| lsp | Large-scale Precipitation | m | Precipitation |
|
| 615 |
+
| tp | Total Precipitation | m | Precipitation |
|
| 616 |
+
| ssr | Surface Net Solar Radiation | J/m² | Radiation |
|
| 617 |
+
| ssrd | Surface Solar Radiation Downwards | J/m² | Radiation |
|
| 618 |
+
| tcw | Total Column Water | kg/m² | Moisture |
|
| 619 |
+
| tcwv | Total Column Water Vapour | kg/m² | Moisture |
|
| 620 |
+
| sd | Snow Depth | m water eq. | Land |
|
| 621 |
+
| stl1 | Soil Temperature Level 1 | K | Land |
|
| 622 |
+
| swvl1 | Volumetric Soil Water Layer 1 | m³/m³ | Land |
|
| 623 |
+
|
| 624 |
+
### 2. CUSTOM ANALYSIS: `python_repl`
|
| 625 |
+
Persistent Python kernel for custom analysis and visualization.
|
| 626 |
+
**Pre-loaded:** pandas (pd), numpy (np), xarray (xr), matplotlib.pyplot (plt)
|
| 627 |
+
|
| 628 |
+
#### What you can do with python_repl:
|
| 629 |
+
- **Anomalies**: `anomaly = data - data.mean('time')`
|
| 630 |
+
- **Z-Scores**: `z = (data - clim.mean('time')) / clim.std('time')`
|
| 631 |
+
- **Trends**: Use `scipy.stats.linregress` or numpy polyfit
|
| 632 |
+
- **Extremes**: Filter data where values exceed thresholds
|
| 633 |
+
- **Visualizations**: Any matplotlib plot saved to PLOTS_DIR
|
| 634 |
+
|
| 635 |
+
### 4. MEMORY
|
| 636 |
+
Remembers conversation history and previous analyses.
|
| 637 |
+
|
| 638 |
+
### 5. MARITIME LOGISTICS: `calculate_maritime_route` (Captain Mode)
|
| 639 |
+
Plans shipping routes and assesses climatological hazards.
|
| 640 |
+
|
| 641 |
+
**WORKFLOW (Mandatory Protocol):**
|
| 642 |
+
1. **ROUTE**: Call `calculate_maritime_route(origin_lat, origin_lon, dest_lat, dest_lon, month)`
|
| 643 |
+
- Returns waypoints avoiding land via global shipping lane graph
|
| 644 |
+
- Returns bounding box for data download
|
| 645 |
+
- Returns STEP-BY-STEP INSTRUCTIONS
|
| 646 |
+
|
| 647 |
+
2. **DATA**: Download ERA5 climatology for the route region
|
| 648 |
+
- Variables: `u10`, `v10` (10m wind components) → compute wind speed
|
| 649 |
+
- NOTE: `swh` (wave height) is NOT available in this dataset!
|
| 650 |
+
- Period: Target month over LAST 3 YEARS (e.g., July 2021-2023)
|
| 651 |
+
- Why 3 years? To compute climatological statistics, not just a forecast
|
| 652 |
+
|
| 653 |
+
3. **METHODOLOGY**: Call `get_visualization_guide(viz_type='maritime_risk_assessment')`
|
| 654 |
+
- Returns mathematical formulas for Lagrangian risk analysis
|
| 655 |
+
- Defines hazard thresholds (e.g., wind speed > 15 m/s = DANGER)
|
| 656 |
+
- Explains how to compute route risk score
|
| 657 |
+
|
| 658 |
+
4. **ANALYSIS**: Execute in `python_repl` following the methodology:
|
| 659 |
+
- Extract data at each waypoint (nearest neighbor)
|
| 660 |
+
- Compute wind speed: `wspd = sqrt(u10² + v10²)`
|
| 661 |
+
- Compute max/mean/p95 statistics
|
| 662 |
+
- Identify danger zones (wind > threshold)
|
| 663 |
+
- Calculate route-level risk score
|
| 664 |
+
|
| 665 |
+
5. **DECISION**:
|
| 666 |
+
- If danger zones found → Recommend route deviation
|
| 667 |
+
- If route safe → Confirm with confidence level
|
| 668 |
+
|
| 669 |
+
**Key Formulas (from methodology):**
|
| 670 |
+
- Wind speed: `wspd = sqrt(u10² + v10²)`
|
| 671 |
+
- Exceedance probability: `P = count(wspd > threshold) / N_total`
|
| 672 |
+
- Route risk: `max(wspd_i)` for all waypoints i
|
| 673 |
+
|
| 674 |
+
## SCIENTIFIC PROTOCOL (For Publication-Grade Analysis)
|
| 675 |
+
|
| 676 |
+
When the user requests scientific analysis:
|
| 677 |
+
|
| 678 |
+
1. **ANOMALY ANALYSIS**: Report:
|
| 679 |
+
- Anomalies: "2.5°C above normal"
|
| 680 |
+
- Z-Scores: "+2.5σ (statistically significant)"
|
| 681 |
+
- Use `python_repl` to compute anomalies from downloaded data
|
| 682 |
+
|
| 683 |
+
2. **MECHANISM**: Explain WHY:
|
| 684 |
+
- Use `python_repl` to look for patterns in the data
|
| 685 |
+
- Consider atmospheric blocking, ENSO teleconnections, etc.
|
| 686 |
+
|
| 687 |
+
3. **COMPOUND EVENTS**: Look for dangerous combinations with python_repl:
|
| 688 |
+
- High heat + Low wind = "Ocean Oven"
|
| 689 |
+
- Filter data where multiple thresholds are exceeded
|
| 690 |
+
|
| 691 |
+
4. **STATISTICAL RIGOR**: Always test significance:
|
| 692 |
+
- Use Z > 2σ for "extreme"
|
| 693 |
+
- Use p < 0.05 for trends
|
| 694 |
+
- Report confidence intervals when possible
|
| 695 |
+
|
| 696 |
+
## VISUALIZATION STANDARDS
|
| 697 |
+
|
| 698 |
+
**Publication-grade light-theme rcParams are pre-set** — figures get white background,
|
| 699 |
+
black text, grid, 300 DPI on save, and a high-contrast color cycle. Do NOT override unless necessary.
|
| 700 |
+
|
| 701 |
+
### Mandatory Rules
|
| 702 |
+
1. **DPI**: Saved at 300 (print-quality) — do not lower it
|
| 703 |
+
2. **Figure size**: Default 10×6 for time series, use `figsize=(12, 8)` for map plots
|
| 704 |
+
3. **Unit conversions in labels**:
|
| 705 |
+
- Temperature → always show °C (`- 273.15`)
|
| 706 |
+
- Pressure → show hPa (`/ 100`)
|
| 707 |
+
- Precipitation → show mm (`* 1000`)
|
| 708 |
+
4. **Colormaps**:
|
| 709 |
+
- SST/Temperature: `'RdYlBu_r'` or `'coolwarm'`
|
| 710 |
+
- Wind speed: `'YlOrRd'`
|
| 711 |
+
- Anomalies: `'RdBu_r'` (diverging, centered at zero via `TwoSlopeNorm`)
|
| 712 |
+
- Precipitation: `'YlGnBu'`
|
| 713 |
+
- Cloud cover: `'Greys'`
|
| 714 |
+
- **NEVER** use `'jet'`
|
| 715 |
+
5. **Colorbar**: Always include `label=` with units:
|
| 716 |
+
```python
|
| 717 |
+
cbar = plt.colorbar(mesh, label='SST (°C)', shrink=0.8)
|
| 718 |
+
```
|
| 719 |
+
6. **Maritime maps**: Call `get_analysis_guide(topic='maritime_visualization')` for the full template
|
| 720 |
+
|
| 721 |
+
### Available in REPL Namespace
|
| 722 |
+
`pd, np, xr, plt, mcolors, cm, datetime, timedelta, PLOTS_DIR`
|
| 723 |
+
|
| 724 |
+
|
| 725 |
+
## RESPONSE STYLE
|
| 726 |
+
- Be precise and scientific
|
| 727 |
+
- Follow user intent exactly
|
| 728 |
+
- Include statistical significance when doing scientific analysis
|
| 729 |
+
- Reference specific dates/locations
|
| 730 |
+
- Acknowledge limitations and uncertainty
|
| 731 |
+
- **NEVER list file paths** of saved plots in your response — plots are displayed automatically in the UI
|
| 732 |
+
- Do NOT say "you can view it here" or similar — the user already sees the plot inline
|
| 733 |
+
"""
|
| 734 |
+
|
| 735 |
+
|
| 736 |
+
# =============================================================================
|
| 737 |
+
# UTILITY FUNCTIONS
|
| 738 |
+
# =============================================================================
|
| 739 |
+
|
| 740 |
+
def format_file_size(size_bytes: int) -> str:
|
| 741 |
+
"""Format file size in human-readable format."""
|
| 742 |
+
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
|
| 743 |
+
if size_bytes < 1024:
|
| 744 |
+
return f"{size_bytes:.2f} {unit}"
|
| 745 |
+
size_bytes /= 1024
|
| 746 |
+
return f"{size_bytes:.2f} PB"
|
| 747 |
+
|
| 748 |
+
|
| 749 |
+
def get_timestamp() -> str:
|
| 750 |
+
"""Get current timestamp string."""
|
| 751 |
+
return datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
src/eurus/logging_config.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Eurus Logging Configuration
|
| 3 |
+
============================
|
| 4 |
+
Centralized logging setup for both web and CLI modes.
|
| 5 |
+
Logs are saved to PROJECT_ROOT/logs/ with timestamps.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
import sys
|
| 10 |
+
import logging
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from datetime import datetime
|
| 13 |
+
|
| 14 |
+
# Project root
|
| 15 |
+
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
| 16 |
+
|
| 17 |
+
# Logs directory
|
| 18 |
+
LOGS_DIR = PROJECT_ROOT / "logs"
|
| 19 |
+
LOGS_DIR.mkdir(exist_ok=True)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def setup_logging(mode: str = "web", level: int = logging.DEBUG) -> logging.Logger:
|
| 23 |
+
"""
|
| 24 |
+
Configure logging for Eurus.
|
| 25 |
+
|
| 26 |
+
Args:
|
| 27 |
+
mode: 'web' or 'cli' - determines log file prefix
|
| 28 |
+
level: logging level (default: DEBUG for full logs)
|
| 29 |
+
|
| 30 |
+
Returns:
|
| 31 |
+
Root logger configured with file and console handlers
|
| 32 |
+
"""
|
| 33 |
+
# Create timestamped log filename
|
| 34 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 35 |
+
log_file = LOGS_DIR / f"eurus_{mode}_{timestamp}.log"
|
| 36 |
+
|
| 37 |
+
# Create formatters
|
| 38 |
+
detailed_formatter = logging.Formatter(
|
| 39 |
+
fmt="%(asctime)s | %(levelname)-8s | %(name)-30s | %(funcName)-20s | %(message)s",
|
| 40 |
+
datefmt="%Y-%m-%d %H:%M:%S"
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
console_formatter = logging.Formatter(
|
| 44 |
+
fmt="%(asctime)s | %(levelname)-5s | %(name)s | %(message)s",
|
| 45 |
+
datefmt="%H:%M:%S"
|
| 46 |
+
)
|
| 47 |
+
|
| 48 |
+
# Get root logger
|
| 49 |
+
root_logger = logging.getLogger()
|
| 50 |
+
root_logger.setLevel(level)
|
| 51 |
+
|
| 52 |
+
# Clear existing handlers
|
| 53 |
+
root_logger.handlers.clear()
|
| 54 |
+
|
| 55 |
+
# File handler - FULL DEBUG logs
|
| 56 |
+
file_handler = logging.FileHandler(log_file, encoding='utf-8')
|
| 57 |
+
file_handler.setLevel(logging.DEBUG)
|
| 58 |
+
file_handler.setFormatter(detailed_formatter)
|
| 59 |
+
root_logger.addHandler(file_handler)
|
| 60 |
+
|
| 61 |
+
# Console handler - respects ERA5_LOG_LEVEL env var (default: INFO)
|
| 62 |
+
console_level_name = os.environ.get("ERA5_LOG_LEVEL", "INFO").upper()
|
| 63 |
+
console_level = getattr(logging, console_level_name, logging.INFO)
|
| 64 |
+
console_handler = logging.StreamHandler(sys.stdout)
|
| 65 |
+
console_handler.setLevel(console_level)
|
| 66 |
+
console_handler.setFormatter(console_formatter)
|
| 67 |
+
root_logger.addHandler(console_handler)
|
| 68 |
+
|
| 69 |
+
# Log startup info
|
| 70 |
+
logger = logging.getLogger("eurus.logging")
|
| 71 |
+
logger.info(f"=" * 80)
|
| 72 |
+
logger.info(f"EURUS {mode.upper()} STARTING")
|
| 73 |
+
logger.info(f"Log file: {log_file}")
|
| 74 |
+
logger.info(f"=" * 80)
|
| 75 |
+
|
| 76 |
+
# Reduce noise from external libraries
|
| 77 |
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
| 78 |
+
logging.getLogger("httpcore").setLevel(logging.WARNING)
|
| 79 |
+
logging.getLogger("urllib3").setLevel(logging.WARNING)
|
| 80 |
+
logging.getLogger("asyncio").setLevel(logging.WARNING)
|
| 81 |
+
logging.getLogger("uvicorn.access").setLevel(logging.INFO)
|
| 82 |
+
|
| 83 |
+
return root_logger
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def get_logger(name: str) -> logging.Logger:
|
| 87 |
+
"""Get a logger with the given name."""
|
| 88 |
+
return logging.getLogger(name)
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
# Cleanup old logs (keep last 20)
|
| 92 |
+
def cleanup_old_logs(keep: int = 20):
|
| 93 |
+
"""Remove old log files, keeping the most recent ones."""
|
| 94 |
+
try:
|
| 95 |
+
log_files = sorted(LOGS_DIR.glob("eurus_*.log"), key=os.path.getmtime)
|
| 96 |
+
if len(log_files) > keep:
|
| 97 |
+
for old_file in log_files[:-keep]:
|
| 98 |
+
old_file.unlink()
|
| 99 |
+
except Exception:
|
| 100 |
+
pass # Don't fail on cleanup
|
src/eurus/memory.py
ADDED
|
@@ -0,0 +1,508 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ERA5 MCP Memory System
|
| 3 |
+
======================
|
| 4 |
+
|
| 5 |
+
Session-based memory with smart compression for conversation history.
|
| 6 |
+
Dataset cache persists across sessions, but conversations are fresh each session.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
import json
|
| 12 |
+
import logging
|
| 13 |
+
import os
|
| 14 |
+
import tiktoken
|
| 15 |
+
from dataclasses import asdict, dataclass, field
|
| 16 |
+
from datetime import datetime
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Any, Dict, List, Optional
|
| 19 |
+
|
| 20 |
+
from eurus.config import get_memory_dir, CONFIG
|
| 21 |
+
|
| 22 |
+
logger = logging.getLogger(__name__)
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# ============================================================================
|
| 26 |
+
# CONFIGURATION
|
| 27 |
+
# ============================================================================
|
| 28 |
+
|
| 29 |
+
# Token limits for smart memory management
|
| 30 |
+
MAX_CONTEXT_TOKENS = 8000 # Max tokens to keep in active memory
|
| 31 |
+
COMPRESSION_THRESHOLD = 6000 # Start compressing when we hit this
|
| 32 |
+
SUMMARY_TARGET_TOKENS = 500 # Target tokens for compressed summary
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
# ============================================================================
|
| 36 |
+
# DATA STRUCTURES
|
| 37 |
+
# ============================================================================
|
| 38 |
+
|
| 39 |
+
@dataclass
|
| 40 |
+
class DatasetRecord:
|
| 41 |
+
"""Record of a downloaded dataset."""
|
| 42 |
+
|
| 43 |
+
path: str
|
| 44 |
+
variable: str
|
| 45 |
+
query_type: str
|
| 46 |
+
start_date: str
|
| 47 |
+
end_date: str
|
| 48 |
+
lat_bounds: tuple[float, float]
|
| 49 |
+
lon_bounds: tuple[float, float]
|
| 50 |
+
file_size_bytes: int
|
| 51 |
+
download_timestamp: str
|
| 52 |
+
shape: Optional[tuple[int, ...]] = None
|
| 53 |
+
|
| 54 |
+
def to_dict(self) -> dict:
|
| 55 |
+
return asdict(self)
|
| 56 |
+
|
| 57 |
+
@classmethod
|
| 58 |
+
def from_dict(cls, data: dict) -> "DatasetRecord":
|
| 59 |
+
if isinstance(data.get("lat_bounds"), list):
|
| 60 |
+
data["lat_bounds"] = tuple(data["lat_bounds"])
|
| 61 |
+
if isinstance(data.get("lon_bounds"), list):
|
| 62 |
+
data["lon_bounds"] = tuple(data["lon_bounds"])
|
| 63 |
+
if isinstance(data.get("shape"), list):
|
| 64 |
+
data["shape"] = tuple(data["shape"])
|
| 65 |
+
return cls(**data)
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
@dataclass
|
| 69 |
+
class Message:
|
| 70 |
+
"""A conversation message."""
|
| 71 |
+
|
| 72 |
+
role: str
|
| 73 |
+
content: str
|
| 74 |
+
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
| 75 |
+
is_compressed: bool = False # Flag for compressed summary messages
|
| 76 |
+
|
| 77 |
+
def to_dict(self) -> dict:
|
| 78 |
+
return asdict(self)
|
| 79 |
+
|
| 80 |
+
@classmethod
|
| 81 |
+
def from_dict(cls, data: dict) -> "Message":
|
| 82 |
+
valid_keys = {'role', 'content', 'timestamp', 'is_compressed'}
|
| 83 |
+
filtered = {k: v for k, v in data.items() if k in valid_keys}
|
| 84 |
+
return cls(**filtered)
|
| 85 |
+
|
| 86 |
+
def to_langchain(self) -> dict:
|
| 87 |
+
"""Convert to LangChain message format."""
|
| 88 |
+
return {"role": self.role, "content": self.content}
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
@dataclass
|
| 92 |
+
class AnalysisRecord:
|
| 93 |
+
"""Record of an analysis performed."""
|
| 94 |
+
|
| 95 |
+
description: str
|
| 96 |
+
code: str
|
| 97 |
+
output: str
|
| 98 |
+
timestamp: str
|
| 99 |
+
datasets_used: List[str] = field(default_factory=list)
|
| 100 |
+
plots_generated: List[str] = field(default_factory=list)
|
| 101 |
+
|
| 102 |
+
def to_dict(self) -> dict:
|
| 103 |
+
return asdict(self)
|
| 104 |
+
|
| 105 |
+
@classmethod
|
| 106 |
+
def from_dict(cls, data: dict) -> "AnalysisRecord":
|
| 107 |
+
return cls(**data)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# ============================================================================
|
| 111 |
+
# TOKEN COUNTER
|
| 112 |
+
# ============================================================================
|
| 113 |
+
|
| 114 |
+
class TokenCounter:
|
| 115 |
+
"""Efficient token counting using tiktoken."""
|
| 116 |
+
|
| 117 |
+
_encoder = None
|
| 118 |
+
|
| 119 |
+
@classmethod
|
| 120 |
+
def get_encoder(cls):
|
| 121 |
+
if cls._encoder is None:
|
| 122 |
+
try:
|
| 123 |
+
cls._encoder = tiktoken.encoding_for_model("gpt-4")
|
| 124 |
+
except Exception:
|
| 125 |
+
cls._encoder = tiktoken.get_encoding("cl100k_base")
|
| 126 |
+
return cls._encoder
|
| 127 |
+
|
| 128 |
+
@classmethod
|
| 129 |
+
def count(cls, text: str) -> int:
|
| 130 |
+
"""Count tokens in text."""
|
| 131 |
+
try:
|
| 132 |
+
return len(cls.get_encoder().encode(text))
|
| 133 |
+
except Exception:
|
| 134 |
+
# Fallback: rough estimate
|
| 135 |
+
return len(text) // 4
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ============================================================================
|
| 139 |
+
# SMART CONVERSATION MEMORY
|
| 140 |
+
# ============================================================================
|
| 141 |
+
|
| 142 |
+
class SmartConversationMemory:
|
| 143 |
+
"""
|
| 144 |
+
Session-based conversation memory with smart compression.
|
| 145 |
+
|
| 146 |
+
Features:
|
| 147 |
+
- Fresh start each session (no persistent history)
|
| 148 |
+
- Automatic compression when context gets too long
|
| 149 |
+
- Preserves recent messages in full, compresses older ones
|
| 150 |
+
- Token-aware memory management
|
| 151 |
+
"""
|
| 152 |
+
|
| 153 |
+
def __init__(self):
|
| 154 |
+
self.messages: List[Message] = []
|
| 155 |
+
self.compressed_summary: Optional[str] = None
|
| 156 |
+
self._token_count = 0
|
| 157 |
+
logger.info("SmartConversationMemory initialized (fresh session)")
|
| 158 |
+
|
| 159 |
+
def add_message(self, role: str, content: str) -> Message:
|
| 160 |
+
"""Add a message and check if compression is needed."""
|
| 161 |
+
msg = Message(role=role, content=content)
|
| 162 |
+
self.messages.append(msg)
|
| 163 |
+
|
| 164 |
+
# Update token count
|
| 165 |
+
self._token_count += TokenCounter.count(content)
|
| 166 |
+
|
| 167 |
+
# Check if we need to compress
|
| 168 |
+
if self._token_count > COMPRESSION_THRESHOLD:
|
| 169 |
+
self._compress_history()
|
| 170 |
+
|
| 171 |
+
return msg
|
| 172 |
+
|
| 173 |
+
def _compress_history(self) -> None:
|
| 174 |
+
"""Compress older messages into a summary."""
|
| 175 |
+
if len(self.messages) < 6:
|
| 176 |
+
return # Not enough messages to compress
|
| 177 |
+
|
| 178 |
+
# Keep the last 4 messages in full
|
| 179 |
+
keep_count = 4
|
| 180 |
+
to_compress = self.messages[:-keep_count]
|
| 181 |
+
to_keep = self.messages[-keep_count:]
|
| 182 |
+
|
| 183 |
+
if not to_compress:
|
| 184 |
+
return
|
| 185 |
+
|
| 186 |
+
# Create a concise summary of compressed messages
|
| 187 |
+
summary_parts = []
|
| 188 |
+
for msg in to_compress:
|
| 189 |
+
role = msg.role.upper()
|
| 190 |
+
# Truncate long content for summary
|
| 191 |
+
content = msg.content[:200] + "..." if len(msg.content) > 200 else msg.content
|
| 192 |
+
summary_parts.append(f"[{role}]: {content}")
|
| 193 |
+
|
| 194 |
+
summary = "[Previous conversation summary]\n" + "\n".join(summary_parts)
|
| 195 |
+
|
| 196 |
+
# Truncate summary to target token size
|
| 197 |
+
while TokenCounter.count(summary) > SUMMARY_TARGET_TOKENS and summary:
|
| 198 |
+
# Trim from the oldest messages in the summary
|
| 199 |
+
lines = summary.split('\n')
|
| 200 |
+
if len(lines) <= 2:
|
| 201 |
+
break
|
| 202 |
+
summary = lines[0] + '\n' + '\n'.join(lines[2:])
|
| 203 |
+
|
| 204 |
+
summary_msg = Message(
|
| 205 |
+
role="system",
|
| 206 |
+
content=summary,
|
| 207 |
+
is_compressed=True
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
self.messages = [summary_msg] + to_keep
|
| 211 |
+
|
| 212 |
+
# Recalculate token count
|
| 213 |
+
self._token_count = sum(
|
| 214 |
+
TokenCounter.count(m.content) for m in self.messages
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
logger.info(f"Compressed {len(to_compress)} messages. Current tokens: {self._token_count}")
|
| 218 |
+
|
| 219 |
+
def get_messages(self, n_messages: Optional[int] = None) -> List[Message]:
|
| 220 |
+
"""Get conversation messages."""
|
| 221 |
+
if n_messages is None:
|
| 222 |
+
return list(self.messages)
|
| 223 |
+
return list(self.messages)[-n_messages:]
|
| 224 |
+
|
| 225 |
+
def get_langchain_messages(self, n_messages: Optional[int] = None) -> List[dict]:
|
| 226 |
+
"""Get messages in LangChain format."""
|
| 227 |
+
messages = self.get_messages(n_messages)
|
| 228 |
+
return [m.to_langchain() for m in messages]
|
| 229 |
+
|
| 230 |
+
def clear(self) -> None:
|
| 231 |
+
"""Clear all messages."""
|
| 232 |
+
self.messages.clear()
|
| 233 |
+
self.compressed_summary = None
|
| 234 |
+
self._token_count = 0
|
| 235 |
+
logger.info("Conversation memory cleared")
|
| 236 |
+
|
| 237 |
+
def get_token_count(self) -> int:
|
| 238 |
+
"""Get current token count."""
|
| 239 |
+
return self._token_count
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
# ============================================================================
|
| 243 |
+
# MEMORY MANAGER
|
| 244 |
+
# ============================================================================
|
| 245 |
+
|
| 246 |
+
class MemoryManager:
|
| 247 |
+
"""
|
| 248 |
+
Manages memory for ERA5 MCP.
|
| 249 |
+
|
| 250 |
+
Features:
|
| 251 |
+
- Dataset cache registry (persists across sessions)
|
| 252 |
+
- Session-based conversation history (fresh each restart)
|
| 253 |
+
- Smart compression for long conversations
|
| 254 |
+
- NO persistent conversation history to avoid stale context
|
| 255 |
+
"""
|
| 256 |
+
|
| 257 |
+
def __init__(self, memory_dir: Optional[Path] = None, persist_conversations: bool = False):
|
| 258 |
+
self.memory_dir = memory_dir or get_memory_dir()
|
| 259 |
+
self.memory_dir.mkdir(parents=True, exist_ok=True)
|
| 260 |
+
self.persist_conversations = persist_conversations
|
| 261 |
+
|
| 262 |
+
# File paths (only datasets persist)
|
| 263 |
+
self.datasets_file = self.memory_dir / "datasets.json"
|
| 264 |
+
self.analyses_file = self.memory_dir / "analyses.json"
|
| 265 |
+
|
| 266 |
+
# In-memory storage
|
| 267 |
+
self.datasets: Dict[str, DatasetRecord] = {}
|
| 268 |
+
self.analyses: List[AnalysisRecord] = []
|
| 269 |
+
|
| 270 |
+
# Session-based conversation memory (FRESH each time!)
|
| 271 |
+
self.conversation_memory = SmartConversationMemory()
|
| 272 |
+
|
| 273 |
+
# Load persistent data (only datasets)
|
| 274 |
+
self._load_datasets()
|
| 275 |
+
self._load_analyses()
|
| 276 |
+
|
| 277 |
+
logger.info(
|
| 278 |
+
f"MemoryManager initialized: {len(self.datasets)} datasets, "
|
| 279 |
+
f"FRESH conversation (session-based)"
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
# ========================================================================
|
| 283 |
+
# PERSISTENCE (Datasets only)
|
| 284 |
+
# ========================================================================
|
| 285 |
+
|
| 286 |
+
def _load_datasets(self) -> None:
|
| 287 |
+
"""Load dataset registry from disk."""
|
| 288 |
+
if self.datasets_file.exists():
|
| 289 |
+
try:
|
| 290 |
+
with open(self.datasets_file, "r") as f:
|
| 291 |
+
data = json.load(f)
|
| 292 |
+
for path, record_data in data.items():
|
| 293 |
+
self.datasets[path] = DatasetRecord.from_dict(record_data)
|
| 294 |
+
except Exception as e:
|
| 295 |
+
logger.warning(f"Failed to load datasets: {e}")
|
| 296 |
+
|
| 297 |
+
def _save_datasets(self) -> None:
|
| 298 |
+
"""Save dataset registry to disk."""
|
| 299 |
+
try:
|
| 300 |
+
with open(self.datasets_file, "w") as f:
|
| 301 |
+
json.dump({p: r.to_dict() for p, r in self.datasets.items()}, f, indent=2)
|
| 302 |
+
except Exception as e:
|
| 303 |
+
logger.error(f"Failed to save datasets: {e}")
|
| 304 |
+
|
| 305 |
+
def _load_analyses(self) -> None:
|
| 306 |
+
"""Load analysis history from disk."""
|
| 307 |
+
if self.analyses_file.exists():
|
| 308 |
+
try:
|
| 309 |
+
with open(self.analyses_file, "r") as f:
|
| 310 |
+
data = json.load(f)
|
| 311 |
+
self.analyses = [AnalysisRecord.from_dict(r) for r in data[-20:]] # Keep last 20
|
| 312 |
+
except Exception as e:
|
| 313 |
+
logger.warning(f"Failed to load analyses: {e}")
|
| 314 |
+
|
| 315 |
+
def _save_analyses(self) -> None:
|
| 316 |
+
"""Save analysis history to disk."""
|
| 317 |
+
try:
|
| 318 |
+
with open(self.analyses_file, "w") as f:
|
| 319 |
+
json.dump([a.to_dict() for a in self.analyses[-20:]], f, indent=2)
|
| 320 |
+
except Exception as e:
|
| 321 |
+
logger.error(f"Failed to save analyses: {e}")
|
| 322 |
+
|
| 323 |
+
# ========================================================================
|
| 324 |
+
# DATASET MANAGEMENT
|
| 325 |
+
# ========================================================================
|
| 326 |
+
|
| 327 |
+
def register_dataset(
|
| 328 |
+
self,
|
| 329 |
+
path: str,
|
| 330 |
+
variable: str,
|
| 331 |
+
query_type: str,
|
| 332 |
+
start_date: str,
|
| 333 |
+
end_date: str,
|
| 334 |
+
lat_bounds: tuple[float, float],
|
| 335 |
+
lon_bounds: tuple[float, float],
|
| 336 |
+
file_size_bytes: int = 0,
|
| 337 |
+
shape: Optional[tuple[int, ...]] = None,
|
| 338 |
+
) -> DatasetRecord:
|
| 339 |
+
"""Register a downloaded dataset."""
|
| 340 |
+
record = DatasetRecord(
|
| 341 |
+
path=path,
|
| 342 |
+
variable=variable,
|
| 343 |
+
query_type=query_type,
|
| 344 |
+
start_date=start_date,
|
| 345 |
+
end_date=end_date,
|
| 346 |
+
lat_bounds=lat_bounds,
|
| 347 |
+
lon_bounds=lon_bounds,
|
| 348 |
+
file_size_bytes=file_size_bytes,
|
| 349 |
+
download_timestamp=datetime.now().isoformat(),
|
| 350 |
+
shape=shape,
|
| 351 |
+
)
|
| 352 |
+
self.datasets[path] = record
|
| 353 |
+
self._save_datasets()
|
| 354 |
+
logger.info(f"Registered dataset: {path}")
|
| 355 |
+
return record
|
| 356 |
+
|
| 357 |
+
def get_dataset(self, path: str) -> Optional[DatasetRecord]:
|
| 358 |
+
"""Get dataset record by path."""
|
| 359 |
+
return self.datasets.get(path)
|
| 360 |
+
|
| 361 |
+
def list_datasets(self) -> str:
|
| 362 |
+
"""Return formatted list of cached datasets."""
|
| 363 |
+
if not self.datasets:
|
| 364 |
+
return "No datasets in cache."
|
| 365 |
+
|
| 366 |
+
lines = ["Cached Datasets:", "=" * 70]
|
| 367 |
+
for path, record in self.datasets.items():
|
| 368 |
+
if os.path.exists(path):
|
| 369 |
+
size_str = self._format_size(record.file_size_bytes)
|
| 370 |
+
lines.append(
|
| 371 |
+
f" {record.variable:5} | {record.start_date} to {record.end_date} | "
|
| 372 |
+
f"{record.query_type:8} | {size_str:>10}"
|
| 373 |
+
)
|
| 374 |
+
lines.append(f" Path: {path}")
|
| 375 |
+
else:
|
| 376 |
+
lines.append(f" [MISSING] {path}")
|
| 377 |
+
|
| 378 |
+
return "\n".join(lines)
|
| 379 |
+
|
| 380 |
+
def cleanup_missing_datasets(self) -> int:
|
| 381 |
+
"""Remove records for datasets that no longer exist."""
|
| 382 |
+
missing = [p for p in self.datasets if not os.path.exists(p)]
|
| 383 |
+
for path in missing:
|
| 384 |
+
del self.datasets[path]
|
| 385 |
+
logger.info(f"Removed missing dataset: {path}")
|
| 386 |
+
if missing:
|
| 387 |
+
self._save_datasets()
|
| 388 |
+
return len(missing)
|
| 389 |
+
|
| 390 |
+
# ========================================================================
|
| 391 |
+
# CONVERSATION MANAGEMENT (Session-based)
|
| 392 |
+
# ========================================================================
|
| 393 |
+
|
| 394 |
+
def add_message(self, role: str, content: str) -> Message:
|
| 395 |
+
"""Add a message to conversation history."""
|
| 396 |
+
return self.conversation_memory.add_message(role, content)
|
| 397 |
+
|
| 398 |
+
def get_conversation_history(self, n_messages: Optional[int] = None) -> List[Message]:
|
| 399 |
+
"""Get recent conversation history."""
|
| 400 |
+
return self.conversation_memory.get_messages(n_messages)
|
| 401 |
+
|
| 402 |
+
def clear_conversation(self) -> None:
|
| 403 |
+
"""Clear conversation history."""
|
| 404 |
+
self.conversation_memory.clear()
|
| 405 |
+
logger.info("Conversation history cleared")
|
| 406 |
+
|
| 407 |
+
def get_langchain_messages(self, n_messages: Optional[int] = None) -> List[dict]:
|
| 408 |
+
"""Get messages in LangChain format."""
|
| 409 |
+
return self.conversation_memory.get_langchain_messages(n_messages)
|
| 410 |
+
|
| 411 |
+
# Legacy property for compatibility
|
| 412 |
+
@property
|
| 413 |
+
def conversations(self) -> List[Message]:
|
| 414 |
+
return self.conversation_memory.messages
|
| 415 |
+
|
| 416 |
+
# ========================================================================
|
| 417 |
+
# ANALYSIS TRACKING
|
| 418 |
+
# ========================================================================
|
| 419 |
+
|
| 420 |
+
def record_analysis(
|
| 421 |
+
self,
|
| 422 |
+
description: str,
|
| 423 |
+
code: str,
|
| 424 |
+
output: str,
|
| 425 |
+
datasets_used: Optional[List[str]] = None,
|
| 426 |
+
plots_generated: Optional[List[str]] = None,
|
| 427 |
+
) -> AnalysisRecord:
|
| 428 |
+
"""Record an analysis for history."""
|
| 429 |
+
record = AnalysisRecord(
|
| 430 |
+
description=description,
|
| 431 |
+
code=code,
|
| 432 |
+
output=output[:2000], # Truncate long output
|
| 433 |
+
timestamp=datetime.now().isoformat(),
|
| 434 |
+
datasets_used=datasets_used or [],
|
| 435 |
+
plots_generated=plots_generated or [],
|
| 436 |
+
)
|
| 437 |
+
self.analyses.append(record)
|
| 438 |
+
self._save_analyses()
|
| 439 |
+
return record
|
| 440 |
+
|
| 441 |
+
def get_recent_analyses(self, n: int = 10) -> List[AnalysisRecord]:
|
| 442 |
+
"""Get recent analyses."""
|
| 443 |
+
return self.analyses[-n:]
|
| 444 |
+
|
| 445 |
+
# ========================================================================
|
| 446 |
+
# CONTEXT SUMMARY
|
| 447 |
+
# ========================================================================
|
| 448 |
+
|
| 449 |
+
def get_context_summary(self) -> str:
|
| 450 |
+
"""Get a summary of current context for the agent."""
|
| 451 |
+
lines = []
|
| 452 |
+
|
| 453 |
+
# Token usage
|
| 454 |
+
tokens = self.conversation_memory.get_token_count()
|
| 455 |
+
if tokens > 0:
|
| 456 |
+
lines.append(f"Session tokens: {tokens}/{MAX_CONTEXT_TOKENS}")
|
| 457 |
+
|
| 458 |
+
# Recent conversation (brief)
|
| 459 |
+
recent = self.get_conversation_history(3)
|
| 460 |
+
if recent:
|
| 461 |
+
lines.append("\nRecent in this session:")
|
| 462 |
+
for msg in recent:
|
| 463 |
+
preview = msg.content[:80] + "..." if len(msg.content) > 80 else msg.content
|
| 464 |
+
lines.append(f" [{msg.role}]: {preview}")
|
| 465 |
+
|
| 466 |
+
# Available datasets
|
| 467 |
+
valid_datasets = {p: r for p, r in self.datasets.items() if os.path.exists(p)}
|
| 468 |
+
if valid_datasets:
|
| 469 |
+
lines.append(f"\nCached Datasets ({len(valid_datasets)}):")
|
| 470 |
+
for path, record in list(valid_datasets.items())[:5]:
|
| 471 |
+
lines.append(f" - {record.variable}: {record.start_date} to {record.end_date}")
|
| 472 |
+
|
| 473 |
+
return "\n".join(lines) if lines else "Fresh session - no context yet."
|
| 474 |
+
|
| 475 |
+
# ========================================================================
|
| 476 |
+
# UTILITIES
|
| 477 |
+
# ========================================================================
|
| 478 |
+
|
| 479 |
+
@staticmethod
|
| 480 |
+
def _format_size(size_bytes: int) -> str:
|
| 481 |
+
"""Format file size in human-readable format."""
|
| 482 |
+
for unit in ["B", "KB", "MB", "GB"]:
|
| 483 |
+
if size_bytes < 1024:
|
| 484 |
+
return f"{size_bytes:.1f} {unit}"
|
| 485 |
+
size_bytes /= 1024
|
| 486 |
+
return f"{size_bytes:.1f} TB"
|
| 487 |
+
|
| 488 |
+
|
| 489 |
+
# ============================================================================
|
| 490 |
+
# GLOBAL INSTANCE
|
| 491 |
+
# ============================================================================
|
| 492 |
+
|
| 493 |
+
_memory_instance: Optional[MemoryManager] = None
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
def get_memory() -> MemoryManager:
|
| 497 |
+
"""Get the global memory manager instance."""
|
| 498 |
+
global _memory_instance
|
| 499 |
+
if _memory_instance is None:
|
| 500 |
+
_memory_instance = MemoryManager()
|
| 501 |
+
return _memory_instance
|
| 502 |
+
|
| 503 |
+
|
| 504 |
+
def reset_memory() -> None:
|
| 505 |
+
"""Reset the global memory instance (new session)."""
|
| 506 |
+
global _memory_instance
|
| 507 |
+
_memory_instance = None
|
| 508 |
+
logger.info("Memory reset - next get_memory() will create fresh session")
|
src/eurus/retrieval.py
ADDED
|
@@ -0,0 +1,536 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ERA5 Data Retrieval
|
| 3 |
+
===================
|
| 4 |
+
|
| 5 |
+
Cloud-optimized data retrieval from Earthmover's ERA5 archive.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json
|
| 11 |
+
import logging
|
| 12 |
+
import os
|
| 13 |
+
import shutil
|
| 14 |
+
import threading
|
| 15 |
+
import time
|
| 16 |
+
from datetime import datetime, timedelta
|
| 17 |
+
from pathlib import Path
|
| 18 |
+
from typing import Optional
|
| 19 |
+
from urllib.request import Request, urlopen
|
| 20 |
+
|
| 21 |
+
from eurus.config import (
|
| 22 |
+
CONFIG,
|
| 23 |
+
get_data_dir,
|
| 24 |
+
get_region,
|
| 25 |
+
get_short_name,
|
| 26 |
+
get_variable_info,
|
| 27 |
+
list_available_variables,
|
| 28 |
+
)
|
| 29 |
+
from eurus.memory import get_memory
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def _format_coord(value: float) -> str:
|
| 35 |
+
"""Format coordinates for stable, filename-safe identifiers."""
|
| 36 |
+
if abs(value) < 0.005:
|
| 37 |
+
value = 0.0
|
| 38 |
+
return f"{value:.2f}"
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def generate_filename(
|
| 42 |
+
variable: str,
|
| 43 |
+
query_type: str,
|
| 44 |
+
start: str,
|
| 45 |
+
end: str,
|
| 46 |
+
min_latitude: float,
|
| 47 |
+
max_latitude: float,
|
| 48 |
+
min_longitude: float,
|
| 49 |
+
max_longitude: float,
|
| 50 |
+
region: Optional[str] = None,
|
| 51 |
+
) -> str:
|
| 52 |
+
"""Generate a descriptive filename for the dataset."""
|
| 53 |
+
clean_var = variable.replace("_", "")
|
| 54 |
+
clean_start = start.replace("-", "")
|
| 55 |
+
clean_end = end.replace("-", "")
|
| 56 |
+
if region:
|
| 57 |
+
region_tag = region.lower()
|
| 58 |
+
else:
|
| 59 |
+
region_tag = (
|
| 60 |
+
f"lat{_format_coord(min_latitude)}_{_format_coord(max_latitude)}"
|
| 61 |
+
f"_lon{_format_coord(min_longitude)}_{_format_coord(max_longitude)}"
|
| 62 |
+
)
|
| 63 |
+
return f"era5_{clean_var}_{query_type}_{clean_start}_{clean_end}_{region_tag}.zarr"
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def format_file_size(size_bytes: int) -> str:
|
| 67 |
+
"""Format file size in human-readable format."""
|
| 68 |
+
for unit in ["B", "KB", "MB", "GB"]:
|
| 69 |
+
if size_bytes < 1024:
|
| 70 |
+
return f"{size_bytes:.2f} {unit}"
|
| 71 |
+
size_bytes /= 1024
|
| 72 |
+
return f"{size_bytes:.2f} TB"
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
_aws_region_lock = threading.Lock()
|
| 76 |
+
_aws_region_set = False
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def _ensure_aws_region(api_key: str, repo_name: Optional[str] = None) -> None:
|
| 80 |
+
"""
|
| 81 |
+
Populate AWS S3 region/endpoint env vars from Arraylake repo metadata.
|
| 82 |
+
|
| 83 |
+
Some environments fail S3 resolution unless region/endpoint are explicit.
|
| 84 |
+
"""
|
| 85 |
+
global _aws_region_set
|
| 86 |
+
if _aws_region_set:
|
| 87 |
+
return # Only run once per process
|
| 88 |
+
|
| 89 |
+
with _aws_region_lock:
|
| 90 |
+
if _aws_region_set:
|
| 91 |
+
return # Double-checked locking
|
| 92 |
+
|
| 93 |
+
repo = repo_name or CONFIG.data_source
|
| 94 |
+
try:
|
| 95 |
+
req = Request(
|
| 96 |
+
f"https://api.earthmover.io/repos/{repo}",
|
| 97 |
+
headers={"Authorization": f"Bearer {api_key}"},
|
| 98 |
+
)
|
| 99 |
+
with urlopen(req, timeout=30) as resp:
|
| 100 |
+
payload = resp.read().decode("utf-8")
|
| 101 |
+
repo_meta = json.loads(payload)
|
| 102 |
+
except Exception as exc:
|
| 103 |
+
logger.debug("Could not auto-detect AWS region from Arraylake metadata: %s", exc)
|
| 104 |
+
_aws_region_set = True # Don't retry on failure
|
| 105 |
+
return
|
| 106 |
+
|
| 107 |
+
if not isinstance(repo_meta, dict):
|
| 108 |
+
return
|
| 109 |
+
|
| 110 |
+
bucket = repo_meta.get("bucket")
|
| 111 |
+
if not isinstance(bucket, dict):
|
| 112 |
+
return
|
| 113 |
+
|
| 114 |
+
extra_cfg = bucket.get("extra_config")
|
| 115 |
+
if not isinstance(extra_cfg, dict):
|
| 116 |
+
return
|
| 117 |
+
|
| 118 |
+
region_name = extra_cfg.get("region_name")
|
| 119 |
+
if not isinstance(region_name, str) or not region_name:
|
| 120 |
+
return
|
| 121 |
+
|
| 122 |
+
endpoint = f"https://s3.{region_name}.amazonaws.com"
|
| 123 |
+
desired_values = {
|
| 124 |
+
"AWS_REGION": region_name,
|
| 125 |
+
"AWS_DEFAULT_REGION": region_name,
|
| 126 |
+
"AWS_ENDPOINT_URL": endpoint,
|
| 127 |
+
"AWS_S3_ENDPOINT": endpoint,
|
| 128 |
+
}
|
| 129 |
+
updated = False
|
| 130 |
+
for key, value in desired_values.items():
|
| 131 |
+
if not os.environ.get(key):
|
| 132 |
+
os.environ[key] = value
|
| 133 |
+
updated = True
|
| 134 |
+
|
| 135 |
+
if updated:
|
| 136 |
+
logger.info(
|
| 137 |
+
"Auto-set AWS region/endpoint for Arraylake: region=%s endpoint=%s",
|
| 138 |
+
region_name,
|
| 139 |
+
endpoint,
|
| 140 |
+
)
|
| 141 |
+
_aws_region_set = True
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
def retrieve_era5_data(
|
| 145 |
+
query_type: str,
|
| 146 |
+
variable_id: str,
|
| 147 |
+
start_date: str,
|
| 148 |
+
end_date: str,
|
| 149 |
+
min_latitude: float = -90.0,
|
| 150 |
+
max_latitude: float = 90.0,
|
| 151 |
+
min_longitude: float = 0.0,
|
| 152 |
+
max_longitude: float = 359.75,
|
| 153 |
+
region: Optional[str] = None,
|
| 154 |
+
) -> str:
|
| 155 |
+
"""
|
| 156 |
+
Retrieve ERA5 reanalysis data from Earthmover's cloud-optimized archive.
|
| 157 |
+
|
| 158 |
+
Args:
|
| 159 |
+
query_type: Either "temporal" (time series) or "spatial" (maps)
|
| 160 |
+
variable_id: ERA5 variable name (e.g., "sst", "t2", "u10")
|
| 161 |
+
start_date: Start date in YYYY-MM-DD format
|
| 162 |
+
end_date: End date in YYYY-MM-DD format
|
| 163 |
+
min_latitude: Southern bound (-90 to 90)
|
| 164 |
+
max_latitude: Northern bound (-90 to 90)
|
| 165 |
+
min_longitude: Western bound (0 to 360)
|
| 166 |
+
max_longitude: Eastern bound (0 to 360)
|
| 167 |
+
region: Optional predefined region name (overrides lat/lon)
|
| 168 |
+
|
| 169 |
+
Returns:
|
| 170 |
+
Success message with file path, or error message.
|
| 171 |
+
|
| 172 |
+
Raises:
|
| 173 |
+
No exceptions raised - errors returned as strings.
|
| 174 |
+
"""
|
| 175 |
+
memory = get_memory()
|
| 176 |
+
|
| 177 |
+
# Get API key
|
| 178 |
+
api_key = os.environ.get("ARRAYLAKE_API_KEY")
|
| 179 |
+
if not api_key:
|
| 180 |
+
return (
|
| 181 |
+
"Error: ARRAYLAKE_API_KEY not found in environment.\n"
|
| 182 |
+
"Please set it via environment variable or .env file."
|
| 183 |
+
)
|
| 184 |
+
_ensure_aws_region(api_key)
|
| 185 |
+
|
| 186 |
+
# Check dependencies
|
| 187 |
+
try:
|
| 188 |
+
import icechunk # noqa: F401
|
| 189 |
+
except ImportError:
|
| 190 |
+
return (
|
| 191 |
+
"Error: The 'icechunk' library is required.\n"
|
| 192 |
+
"Install with: pip install icechunk"
|
| 193 |
+
)
|
| 194 |
+
|
| 195 |
+
try:
|
| 196 |
+
import xarray as xr
|
| 197 |
+
except ImportError:
|
| 198 |
+
return (
|
| 199 |
+
"Error: The 'xarray' library is required.\n"
|
| 200 |
+
"Install with: pip install xarray"
|
| 201 |
+
)
|
| 202 |
+
|
| 203 |
+
# Apply region bounds if specified
|
| 204 |
+
region_tag = None
|
| 205 |
+
if region:
|
| 206 |
+
region_info = get_region(region)
|
| 207 |
+
if region_info:
|
| 208 |
+
min_latitude = region_info.min_lat
|
| 209 |
+
max_latitude = region_info.max_lat
|
| 210 |
+
min_longitude = region_info.min_lon
|
| 211 |
+
max_longitude = region_info.max_lon
|
| 212 |
+
region_tag = region.lower()
|
| 213 |
+
logger.info(f"Using region '{region}'")
|
| 214 |
+
else:
|
| 215 |
+
logger.warning(f"Unknown region '{region}', using provided coordinates")
|
| 216 |
+
|
| 217 |
+
# Resolve variable name
|
| 218 |
+
short_var = get_short_name(variable_id)
|
| 219 |
+
var_info = get_variable_info(variable_id)
|
| 220 |
+
|
| 221 |
+
# Check for future / too-recent dates (ERA5T has a ~5-day processing lag)
|
| 222 |
+
req_start = datetime.strptime(start_date, '%Y-%m-%d')
|
| 223 |
+
if req_start > datetime.now() - timedelta(days=5):
|
| 224 |
+
return (
|
| 225 |
+
f"Error: Requested start date ({start_date}) is too recent or in the future.\n"
|
| 226 |
+
f"ERA5 data has a ~5-day processing lag. Please request dates at least 5 days ago."
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
# Setup paths
|
| 230 |
+
output_dir = get_data_dir()
|
| 231 |
+
filename = generate_filename(
|
| 232 |
+
short_var,
|
| 233 |
+
query_type,
|
| 234 |
+
start_date,
|
| 235 |
+
end_date,
|
| 236 |
+
min_latitude,
|
| 237 |
+
max_latitude,
|
| 238 |
+
min_longitude,
|
| 239 |
+
max_longitude,
|
| 240 |
+
region_tag,
|
| 241 |
+
)
|
| 242 |
+
local_path = str(output_dir / filename)
|
| 243 |
+
|
| 244 |
+
# Check cache first
|
| 245 |
+
if os.path.exists(local_path):
|
| 246 |
+
existing = memory.get_dataset(local_path)
|
| 247 |
+
if existing:
|
| 248 |
+
logger.info(f"Cache hit: {local_path}")
|
| 249 |
+
var_name = f"{short_var} ({var_info.long_name})" if var_info else short_var
|
| 250 |
+
return (
|
| 251 |
+
f"CACHE HIT - Data already downloaded\n"
|
| 252 |
+
f" Variable: {var_name}\n"
|
| 253 |
+
f" Period: {existing.start_date} to {existing.end_date}\n"
|
| 254 |
+
f" Path: {local_path}\n\n"
|
| 255 |
+
f"Load with: ds = xr.open_dataset('{local_path}', engine='zarr')"
|
| 256 |
+
)
|
| 257 |
+
else:
|
| 258 |
+
# File exists but not registered - register it
|
| 259 |
+
try:
|
| 260 |
+
file_size = sum(f.stat().st_size for f in Path(local_path).rglob("*") if f.is_file())
|
| 261 |
+
memory.register_dataset(
|
| 262 |
+
path=local_path,
|
| 263 |
+
variable=short_var,
|
| 264 |
+
query_type=query_type,
|
| 265 |
+
start_date=start_date,
|
| 266 |
+
end_date=end_date,
|
| 267 |
+
lat_bounds=(min_latitude, max_latitude),
|
| 268 |
+
lon_bounds=(min_longitude, max_longitude),
|
| 269 |
+
file_size_bytes=file_size,
|
| 270 |
+
)
|
| 271 |
+
except Exception as e:
|
| 272 |
+
logger.warning(f"Could not register existing dataset: {e}")
|
| 273 |
+
|
| 274 |
+
return (
|
| 275 |
+
f"CACHE HIT - Found existing data\n"
|
| 276 |
+
f" Variable: {short_var}\n"
|
| 277 |
+
f" Path: {local_path}\n\n"
|
| 278 |
+
f"Load with: ds = xr.open_dataset('{local_path}', engine='zarr')"
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
# Guard: spatial queries are chunked for map access — multi-year ranges
|
| 282 |
+
# cause thousands of S3 chunk fetches and streaming errors.
|
| 283 |
+
# Limit spatial queries to 1 year max; suggest splitting or using temporal mode.
|
| 284 |
+
req_end = datetime.strptime(end_date, '%Y-%m-%d')
|
| 285 |
+
date_span_days = (req_end - req_start).days
|
| 286 |
+
if query_type == "spatial" and date_span_days > 366:
|
| 287 |
+
return (
|
| 288 |
+
f"Error: Spatial queries are limited to 1 year max ({date_span_days} days requested).\n"
|
| 289 |
+
f"The spatial dataset is optimised for maps, not long time series.\n\n"
|
| 290 |
+
f"Options:\n"
|
| 291 |
+
f"1. Split into yearly requests (e.g. one call per year)\n"
|
| 292 |
+
f"2. Use query_type='temporal' for multi-year time-series analysis\n"
|
| 293 |
+
f"3. Narrow the date range to ≤ 366 days"
|
| 294 |
+
)
|
| 295 |
+
|
| 296 |
+
# Download with retry logic
|
| 297 |
+
for attempt in range(CONFIG.max_retries):
|
| 298 |
+
try:
|
| 299 |
+
from arraylake import Client
|
| 300 |
+
|
| 301 |
+
logger.info(f"Connecting to Earthmover (attempt {attempt + 1})...")
|
| 302 |
+
|
| 303 |
+
client = Client(token=api_key)
|
| 304 |
+
repo = client.get_repo(CONFIG.data_source)
|
| 305 |
+
session = repo.readonly_session("main")
|
| 306 |
+
|
| 307 |
+
logger.info(f"Opening {query_type} dataset...")
|
| 308 |
+
ds = xr.open_dataset(
|
| 309 |
+
session.store,
|
| 310 |
+
engine="zarr",
|
| 311 |
+
consolidated=False,
|
| 312 |
+
zarr_format=3,
|
| 313 |
+
chunks=None,
|
| 314 |
+
group=query_type,
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
# Validate variable exists
|
| 318 |
+
# Auto-compute tp = cp + lsp if tp is not directly available
|
| 319 |
+
compute_tp = False
|
| 320 |
+
if short_var not in ds:
|
| 321 |
+
if short_var == "tp" and "cp" in ds and "lsp" in ds:
|
| 322 |
+
logger.info("Variable 'tp' not in store — will compute tp = cp + lsp")
|
| 323 |
+
compute_tp = True
|
| 324 |
+
else:
|
| 325 |
+
available = list(ds.data_vars)
|
| 326 |
+
return (
|
| 327 |
+
f"Error: Variable '{short_var}' not found in dataset.\n"
|
| 328 |
+
f"Available variables: {', '.join(available)}\n\n"
|
| 329 |
+
f"Variable reference:\n{list_available_variables()}"
|
| 330 |
+
)
|
| 331 |
+
|
| 332 |
+
# ERA5 latitude is stored 90 -> -90 (descending)
|
| 333 |
+
lat_slice = slice(max_latitude, min_latitude)
|
| 334 |
+
|
| 335 |
+
# Handle longitude - ERA5 uses 0-360 but we accept -180 to 180
|
| 336 |
+
# CRITICAL: If coordinates are in Europe (-10 to 30), we need to
|
| 337 |
+
# convert to 0-360 for ERA5's coordinate system
|
| 338 |
+
|
| 339 |
+
# Special case: Full world range (-180 to 180)
|
| 340 |
+
# Both become 180 after % 360, which creates empty slice!
|
| 341 |
+
if min_longitude == -180 and max_longitude == 180:
|
| 342 |
+
req_min = 0.0
|
| 343 |
+
req_max = 360.0
|
| 344 |
+
elif min_longitude > max_longitude and min_longitude >= 0 and max_longitude >= 0:
|
| 345 |
+
# Already in 0-360 format but wraps around 0° (e.g., Mediterranean: 354 to 42)
|
| 346 |
+
# This comes from predefined regions — go directly to two-slice logic
|
| 347 |
+
req_min = min_longitude
|
| 348 |
+
req_max = max_longitude
|
| 349 |
+
elif min_longitude < 0:
|
| 350 |
+
# Convert -180/+180 to 0-360 for ERA5
|
| 351 |
+
# e.g., -0.9 becomes 359.1
|
| 352 |
+
req_min = min_longitude % 360
|
| 353 |
+
req_max = max_longitude if max_longitude >= 0 else max_longitude % 360
|
| 354 |
+
else:
|
| 355 |
+
req_min = min_longitude
|
| 356 |
+
req_max = max_longitude if max_longitude >= 0 else max_longitude % 360
|
| 357 |
+
|
| 358 |
+
# Now handle the actual slicing
|
| 359 |
+
# If min > max after conversion, it means we span the prime meridian (0°)
|
| 360 |
+
# e.g., req_min=359.1 (was -0.9) and req_max=25.9 means we need 359.1->360 + 0->25.9
|
| 361 |
+
if req_min > req_max:
|
| 362 |
+
# Crosses prime meridian in ERA5's 0-360 system
|
| 363 |
+
# We need to get two slices and concatenate
|
| 364 |
+
logger.info(f"Region spans prime meridian: {req_min:.1f}° to {req_max:.1f}° (ERA5 coords)")
|
| 365 |
+
|
| 366 |
+
# Get western portion (from req_min to 360)
|
| 367 |
+
west_slice = slice(req_min, 360.0)
|
| 368 |
+
# Get eastern portion (from 0 to req_max)
|
| 369 |
+
east_slice = slice(0.0, req_max)
|
| 370 |
+
|
| 371 |
+
# Subset both portions
|
| 372 |
+
logger.info("Subsetting data (two-part: west + east of prime meridian)...")
|
| 373 |
+
fetch_vars = ["cp", "lsp"] if compute_tp else [short_var]
|
| 374 |
+
subsets_all = []
|
| 375 |
+
for fv in fetch_vars:
|
| 376 |
+
subset_west = ds[fv].sel(
|
| 377 |
+
time=slice(start_date, end_date),
|
| 378 |
+
latitude=lat_slice,
|
| 379 |
+
longitude=west_slice,
|
| 380 |
+
)
|
| 381 |
+
subset_east = ds[fv].sel(
|
| 382 |
+
time=slice(start_date, end_date),
|
| 383 |
+
latitude=lat_slice,
|
| 384 |
+
longitude=east_slice,
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
# Convert western longitudes from 360+ to negative (for -180/+180 output)
|
| 388 |
+
# e.g., 359.1 -> -0.9
|
| 389 |
+
subset_west = subset_west.assign_coords(
|
| 390 |
+
longitude=subset_west.longitude - 360
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
# Concatenate along longitude
|
| 394 |
+
subsets_all.append(xr.concat([subset_west, subset_east], dim='longitude'))
|
| 395 |
+
|
| 396 |
+
if compute_tp:
|
| 397 |
+
subset = (subsets_all[0] + subsets_all[1]).rename("tp")
|
| 398 |
+
else:
|
| 399 |
+
subset = subsets_all[0]
|
| 400 |
+
else:
|
| 401 |
+
# Normal case - no prime meridian crossing
|
| 402 |
+
lon_slice = slice(req_min, req_max)
|
| 403 |
+
|
| 404 |
+
# Subset the data
|
| 405 |
+
logger.info("Subsetting data...")
|
| 406 |
+
fetch_vars = ["cp", "lsp"] if compute_tp else [short_var]
|
| 407 |
+
subsets_all = []
|
| 408 |
+
for fv in fetch_vars:
|
| 409 |
+
subsets_all.append(ds[fv].sel(
|
| 410 |
+
time=slice(start_date, end_date),
|
| 411 |
+
latitude=lat_slice,
|
| 412 |
+
longitude=lon_slice,
|
| 413 |
+
))
|
| 414 |
+
|
| 415 |
+
if compute_tp:
|
| 416 |
+
subset = (subsets_all[0] + subsets_all[1]).rename("tp")
|
| 417 |
+
else:
|
| 418 |
+
subset = subsets_all[0]
|
| 419 |
+
|
| 420 |
+
# Convert to dataset
|
| 421 |
+
ds_out = subset.to_dataset(name=short_var)
|
| 422 |
+
|
| 423 |
+
# Check for empty time dimension (no data in requested range)
|
| 424 |
+
if ds_out.dims.get('time', 0) == 0:
|
| 425 |
+
# Get actual data availability
|
| 426 |
+
time_max = ds['time'].max().values
|
| 427 |
+
import numpy as np
|
| 428 |
+
last_available = str(np.datetime_as_string(time_max, unit='D'))
|
| 429 |
+
return (
|
| 430 |
+
f"Error: No data available for the requested time range.\n"
|
| 431 |
+
f"Requested: {start_date} to {end_date}\n"
|
| 432 |
+
f"ERA5 data on Arraylake is available until {last_available}.\n\n"
|
| 433 |
+
f"Please request dates up to {last_available}."
|
| 434 |
+
)
|
| 435 |
+
|
| 436 |
+
# Check for empty data (all NaNs) — only check 1st timestep
|
| 437 |
+
# Guard: skip the check for very large spatial slices to prevent OOM
|
| 438 |
+
first_step = ds_out[short_var].isel(time=0)
|
| 439 |
+
if first_step.size < 500_000 and first_step.isnull().all().compute():
|
| 440 |
+
return (
|
| 441 |
+
f"Error: The downloaded data for '{short_var}' is entirely empty (NaNs).\n"
|
| 442 |
+
f"Possible causes:\n"
|
| 443 |
+
f"1. The requested date/region has no data (e.g., SST over land).\n"
|
| 444 |
+
f"2. The request is too recent (ERA5T has a 5-day delay).\n"
|
| 445 |
+
f"3. Region bounds might be invalid or cross the prime meridian incorrectly."
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
# Size guard — prevent downloading datasets larger than the configured limit
|
| 449 |
+
estimated_gb = ds_out.nbytes / (1024 ** 3)
|
| 450 |
+
if estimated_gb > CONFIG.max_download_size_gb:
|
| 451 |
+
return (
|
| 452 |
+
f"Error: Estimated download size ({estimated_gb:.1f} GB) exceeds the "
|
| 453 |
+
f"{CONFIG.max_download_size_gb} GB limit.\n"
|
| 454 |
+
f"Try narrowing the time range or spatial area."
|
| 455 |
+
)
|
| 456 |
+
|
| 457 |
+
# Clear encoding for clean serialization
|
| 458 |
+
for var in ds_out.variables:
|
| 459 |
+
ds_out[var].encoding = {}
|
| 460 |
+
|
| 461 |
+
# Add metadata
|
| 462 |
+
ds_out.attrs["source"] = "ERA5 Reanalysis via Earthmover Arraylake"
|
| 463 |
+
ds_out.attrs["download_date"] = datetime.now().isoformat()
|
| 464 |
+
ds_out.attrs["query_type"] = query_type
|
| 465 |
+
if var_info:
|
| 466 |
+
ds_out[short_var].attrs["long_name"] = var_info.long_name
|
| 467 |
+
ds_out[short_var].attrs["units"] = var_info.units
|
| 468 |
+
|
| 469 |
+
# Clean up existing file
|
| 470 |
+
if os.path.exists(local_path):
|
| 471 |
+
shutil.rmtree(local_path)
|
| 472 |
+
|
| 473 |
+
# Save to Zarr
|
| 474 |
+
logger.info(f"Saving to {local_path}...")
|
| 475 |
+
start_time = time.time()
|
| 476 |
+
ds_out.to_zarr(local_path, mode="w", consolidated=True, compute=True)
|
| 477 |
+
download_time = time.time() - start_time
|
| 478 |
+
|
| 479 |
+
# Get actual file size
|
| 480 |
+
file_size = sum(f.stat().st_size for f in Path(local_path).rglob("*") if f.is_file())
|
| 481 |
+
shape = tuple(ds_out[short_var].shape)
|
| 482 |
+
|
| 483 |
+
# Register in memory
|
| 484 |
+
memory.register_dataset(
|
| 485 |
+
path=local_path,
|
| 486 |
+
variable=short_var,
|
| 487 |
+
query_type=query_type,
|
| 488 |
+
start_date=start_date,
|
| 489 |
+
end_date=end_date,
|
| 490 |
+
lat_bounds=(min_latitude, max_latitude),
|
| 491 |
+
lon_bounds=(min_longitude, max_longitude),
|
| 492 |
+
file_size_bytes=file_size,
|
| 493 |
+
shape=shape,
|
| 494 |
+
)
|
| 495 |
+
|
| 496 |
+
# Build success message
|
| 497 |
+
result = f"SUCCESS - Data downloaded\n{'='*50}\n Variable: {short_var}"
|
| 498 |
+
if var_info:
|
| 499 |
+
result += f" ({var_info.long_name})"
|
| 500 |
+
result += (
|
| 501 |
+
f"\n Units: {var_info.units if var_info else 'Unknown'}\n"
|
| 502 |
+
f" Period: {start_date} to {end_date}\n"
|
| 503 |
+
f" Shape: {shape}\n"
|
| 504 |
+
f" Size: {format_file_size(file_size)}\n"
|
| 505 |
+
f" Time: {download_time:.1f}s\n"
|
| 506 |
+
f" Path: {local_path}\n"
|
| 507 |
+
f"{'='*50}\n\n"
|
| 508 |
+
f"Load with:\n"
|
| 509 |
+
f" ds = xr.open_dataset('{local_path}', engine='zarr')"
|
| 510 |
+
)
|
| 511 |
+
return result
|
| 512 |
+
|
| 513 |
+
except Exception as e:
|
| 514 |
+
error_msg = str(e)
|
| 515 |
+
logger.error(f"Attempt {attempt + 1} failed: {error_msg}")
|
| 516 |
+
|
| 517 |
+
# Clean up partial download
|
| 518 |
+
if os.path.exists(local_path):
|
| 519 |
+
shutil.rmtree(local_path, ignore_errors=True)
|
| 520 |
+
|
| 521 |
+
if attempt < CONFIG.max_retries - 1:
|
| 522 |
+
wait_time = CONFIG.retry_delay * (2**attempt)
|
| 523 |
+
logger.info(f"Retrying in {wait_time:.1f}s...")
|
| 524 |
+
time.sleep(wait_time)
|
| 525 |
+
else:
|
| 526 |
+
return (
|
| 527 |
+
f"Error: Failed after {CONFIG.max_retries} attempts.\n"
|
| 528 |
+
f"Last error: {error_msg}\n\n"
|
| 529 |
+
f"Troubleshooting:\n"
|
| 530 |
+
f"1. Check your ARRAYLAKE_API_KEY\n"
|
| 531 |
+
f"2. Verify internet connection\n"
|
| 532 |
+
f"3. Try a smaller date range or region\n"
|
| 533 |
+
f"4. Check if variable '{short_var}' is available"
|
| 534 |
+
)
|
| 535 |
+
|
| 536 |
+
return "Error: Unexpected failure in retrieval logic."
|
src/eurus/server.py
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
ERA5 MCP Server
|
| 4 |
+
===============
|
| 5 |
+
|
| 6 |
+
Model Context Protocol server for ERA5 climate data retrieval.
|
| 7 |
+
|
| 8 |
+
Usage:
|
| 9 |
+
eurus-mcp # If installed as package
|
| 10 |
+
python -m eurus.server # Direct execution
|
| 11 |
+
|
| 12 |
+
Configuration via environment variables:
|
| 13 |
+
ARRAYLAKE_API_KEY - Required for data access
|
| 14 |
+
ERA5_DATA_DIR - Data storage directory (default: ./data)
|
| 15 |
+
ERA5_MEMORY_DIR - Memory storage directory (default: ./.memory)
|
| 16 |
+
ERA5_MAX_RETRIES - Download retry attempts (default: 3)
|
| 17 |
+
ERA5_LOG_LEVEL - Logging level (default: INFO)
|
| 18 |
+
"""
|
| 19 |
+
|
| 20 |
+
from __future__ import annotations
|
| 21 |
+
|
| 22 |
+
import asyncio
|
| 23 |
+
import logging
|
| 24 |
+
import os
|
| 25 |
+
import sys
|
| 26 |
+
from typing import Any
|
| 27 |
+
|
| 28 |
+
from dotenv import load_dotenv
|
| 29 |
+
|
| 30 |
+
# Load environment variables early
|
| 31 |
+
load_dotenv()
|
| 32 |
+
|
| 33 |
+
# Configure logging
|
| 34 |
+
log_level = os.environ.get("ERA5_LOG_LEVEL", "INFO").upper()
|
| 35 |
+
logging.basicConfig(
|
| 36 |
+
level=getattr(logging, log_level),
|
| 37 |
+
format="%(asctime)s | %(levelname)s | %(name)s | %(message)s",
|
| 38 |
+
datefmt="%H:%M:%S",
|
| 39 |
+
)
|
| 40 |
+
logger = logging.getLogger(__name__)
|
| 41 |
+
|
| 42 |
+
# Import MCP components
|
| 43 |
+
try:
|
| 44 |
+
from mcp.server import Server
|
| 45 |
+
from mcp.server.stdio import stdio_server
|
| 46 |
+
from mcp.types import (
|
| 47 |
+
CallToolResult,
|
| 48 |
+
TextContent,
|
| 49 |
+
Tool,
|
| 50 |
+
)
|
| 51 |
+
except ImportError:
|
| 52 |
+
logger.error("MCP library not found. Install with: pip install mcp")
|
| 53 |
+
sys.exit(1)
|
| 54 |
+
|
| 55 |
+
# Import ERA5 components
|
| 56 |
+
from eurus.config import (
|
| 57 |
+
list_available_variables,
|
| 58 |
+
)
|
| 59 |
+
from eurus.memory import get_memory
|
| 60 |
+
from eurus.tools.era5 import retrieve_era5_data, ERA5RetrievalArgs
|
| 61 |
+
|
| 62 |
+
# Import Maritime Routing tool
|
| 63 |
+
from eurus.tools.routing import (
|
| 64 |
+
calculate_maritime_route,
|
| 65 |
+
RouteArgs,
|
| 66 |
+
HAS_ROUTING_DEPS,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Create MCP server
|
| 70 |
+
server = Server("era5-climate-data")
|
| 71 |
+
|
| 72 |
+
# Alias for compatibility
|
| 73 |
+
app = server
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# ============================================================================
|
| 77 |
+
# TOOL DEFINITIONS
|
| 78 |
+
# ============================================================================
|
| 79 |
+
|
| 80 |
+
@server.list_tools()
|
| 81 |
+
async def list_tools() -> list[Tool]:
|
| 82 |
+
"""List available MCP tools."""
|
| 83 |
+
tools = [
|
| 84 |
+
Tool(
|
| 85 |
+
name="retrieve_era5_data",
|
| 86 |
+
description=(
|
| 87 |
+
"Retrieve ERA5 climate reanalysis data from Earthmover's cloud archive.\n\n"
|
| 88 |
+
"⚠️ QUERY TYPE is AUTO-DETECTED based on time/area:\n"
|
| 89 |
+
"- 'temporal': time > 1 day AND region < 30°×30° (time series, small area)\n"
|
| 90 |
+
"- 'spatial': time ≤ 1 day OR region ≥ 30°×30° (maps, snapshots, large area)\n\n"
|
| 91 |
+
"VARIABLES: sst, t2, u10, v10, mslp, tcc, tp\n"
|
| 92 |
+
"NOTE: swh (waves) is NOT available in this dataset!\n\n"
|
| 93 |
+
"COORDINATES: Always specify lat/lon bounds explicitly.\n"
|
| 94 |
+
"Longitude: Use 0-360 format (e.g., -74°W = 286°E)\n\n"
|
| 95 |
+
"Returns file path. Load: xr.open_dataset('PATH', engine='zarr')"
|
| 96 |
+
),
|
| 97 |
+
inputSchema=ERA5RetrievalArgs.model_json_schema()
|
| 98 |
+
),
|
| 99 |
+
Tool(
|
| 100 |
+
name="list_era5_variables",
|
| 101 |
+
description=(
|
| 102 |
+
"List all available ERA5 variables with their descriptions, units, "
|
| 103 |
+
"and short names for use with retrieve_era5_data."
|
| 104 |
+
),
|
| 105 |
+
inputSchema={
|
| 106 |
+
"type": "object",
|
| 107 |
+
"properties": {},
|
| 108 |
+
"additionalProperties": False
|
| 109 |
+
}
|
| 110 |
+
),
|
| 111 |
+
Tool(
|
| 112 |
+
name="list_cached_datasets",
|
| 113 |
+
description=(
|
| 114 |
+
"List all ERA5 datasets that have been downloaded and cached locally. "
|
| 115 |
+
"Shows variable, date range, file path, and size."
|
| 116 |
+
),
|
| 117 |
+
inputSchema={
|
| 118 |
+
"type": "object",
|
| 119 |
+
"properties": {},
|
| 120 |
+
"additionalProperties": False
|
| 121 |
+
}
|
| 122 |
+
),
|
| 123 |
+
]
|
| 124 |
+
|
| 125 |
+
# ========== MARITIME ROUTING TOOL (if dependencies available) ==========
|
| 126 |
+
if HAS_ROUTING_DEPS:
|
| 127 |
+
tools.append(
|
| 128 |
+
Tool(
|
| 129 |
+
name="calculate_maritime_route",
|
| 130 |
+
description=(
|
| 131 |
+
"Calculate a realistic maritime shipping route between two ports. "
|
| 132 |
+
"Uses global shipping lane graph to avoid land and find optimal path.\n\n"
|
| 133 |
+
"RETURNS: Waypoint coordinates, bounding box, and INSTRUCTIONS for "
|
| 134 |
+
"climatological risk assessment protocol.\n\n"
|
| 135 |
+
"DOES NOT: Check weather itself. The Agent must follow the returned "
|
| 136 |
+
"protocol to assess route safety using ERA5 data.\n\n"
|
| 137 |
+
"WORKFLOW:\n"
|
| 138 |
+
"1. Call this tool → get waypoints + instructions\n"
|
| 139 |
+
"2. Download ERA5 wind data (u10, v10) for the region\n"
|
| 140 |
+
"3. Call get_visualization_guide(viz_type='maritime_risk_assessment')\n"
|
| 141 |
+
"4. Execute analysis in python_repl"
|
| 142 |
+
),
|
| 143 |
+
inputSchema=RouteArgs.model_json_schema()
|
| 144 |
+
)
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
return tools
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
# ============================================================================
|
| 151 |
+
# TOOL HANDLERS
|
| 152 |
+
# ============================================================================
|
| 153 |
+
|
| 154 |
+
@server.call_tool()
|
| 155 |
+
async def call_tool(name: str, arguments: dict[str, Any]) -> CallToolResult:
|
| 156 |
+
"""Handle tool calls."""
|
| 157 |
+
|
| 158 |
+
try:
|
| 159 |
+
if name == "retrieve_era5_data":
|
| 160 |
+
# Run synchronous function in thread pool (query_type auto-detected)
|
| 161 |
+
result = await asyncio.get_event_loop().run_in_executor(
|
| 162 |
+
None,
|
| 163 |
+
lambda: retrieve_era5_data(
|
| 164 |
+
variable_id=arguments["variable_id"],
|
| 165 |
+
start_date=arguments["start_date"],
|
| 166 |
+
end_date=arguments["end_date"],
|
| 167 |
+
min_latitude=arguments["min_latitude"],
|
| 168 |
+
max_latitude=arguments["max_latitude"],
|
| 169 |
+
min_longitude=arguments["min_longitude"],
|
| 170 |
+
max_longitude=arguments["max_longitude"],
|
| 171 |
+
)
|
| 172 |
+
)
|
| 173 |
+
return CallToolResult(content=[TextContent(type="text", text=result)])
|
| 174 |
+
|
| 175 |
+
elif name == "list_era5_variables":
|
| 176 |
+
result = list_available_variables()
|
| 177 |
+
return CallToolResult(content=[TextContent(type="text", text=result)])
|
| 178 |
+
|
| 179 |
+
elif name == "list_cached_datasets":
|
| 180 |
+
memory = get_memory()
|
| 181 |
+
result = memory.list_datasets()
|
| 182 |
+
return CallToolResult(content=[TextContent(type="text", text=result)])
|
| 183 |
+
|
| 184 |
+
# ========== MARITIME ROUTING HANDLER ==========
|
| 185 |
+
elif name == "calculate_maritime_route":
|
| 186 |
+
if not HAS_ROUTING_DEPS:
|
| 187 |
+
return CallToolResult(
|
| 188 |
+
content=[TextContent(
|
| 189 |
+
type="text",
|
| 190 |
+
text="Error: Maritime routing dependencies not installed.\n"
|
| 191 |
+
"Install with: pip install scgraph geopy"
|
| 192 |
+
)],
|
| 193 |
+
isError=True
|
| 194 |
+
)
|
| 195 |
+
result = await asyncio.get_event_loop().run_in_executor(
|
| 196 |
+
None,
|
| 197 |
+
lambda: calculate_maritime_route(
|
| 198 |
+
origin_lat=arguments["origin_lat"],
|
| 199 |
+
origin_lon=arguments["origin_lon"],
|
| 200 |
+
dest_lat=arguments["dest_lat"],
|
| 201 |
+
dest_lon=arguments["dest_lon"],
|
| 202 |
+
month=arguments["month"],
|
| 203 |
+
year=arguments.get("year"),
|
| 204 |
+
speed_knots=arguments.get("speed_knots", 14.0)
|
| 205 |
+
)
|
| 206 |
+
)
|
| 207 |
+
return CallToolResult(content=[TextContent(type="text", text=result)])
|
| 208 |
+
|
| 209 |
+
else:
|
| 210 |
+
return CallToolResult(
|
| 211 |
+
content=[TextContent(type="text", text=f"Unknown tool: {name}")],
|
| 212 |
+
isError=True
|
| 213 |
+
)
|
| 214 |
+
|
| 215 |
+
except Exception as e:
|
| 216 |
+
logger.exception(f"Error executing tool {name}")
|
| 217 |
+
return CallToolResult(
|
| 218 |
+
content=[TextContent(type="text", text=f"Error: {str(e)}")],
|
| 219 |
+
isError=True
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
# ============================================================================
|
| 224 |
+
# SERVER STARTUP
|
| 225 |
+
# ============================================================================
|
| 226 |
+
|
| 227 |
+
async def run_server() -> None:
|
| 228 |
+
"""Run the MCP server using stdio transport."""
|
| 229 |
+
logger.info("Starting ERA5 MCP Server...")
|
| 230 |
+
|
| 231 |
+
# Check for API key
|
| 232 |
+
if not os.environ.get("ARRAYLAKE_API_KEY"):
|
| 233 |
+
logger.warning(
|
| 234 |
+
"ARRAYLAKE_API_KEY not set. Data retrieval will fail. "
|
| 235 |
+
"Set it via environment variable or .env file."
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
async with stdio_server() as (read_stream, write_stream):
|
| 239 |
+
await server.run(
|
| 240 |
+
read_stream,
|
| 241 |
+
write_stream,
|
| 242 |
+
server.create_initialization_options()
|
| 243 |
+
)
|
| 244 |
+
|
| 245 |
+
|
| 246 |
+
def main() -> None:
|
| 247 |
+
"""Main entry point."""
|
| 248 |
+
try:
|
| 249 |
+
asyncio.run(run_server())
|
| 250 |
+
except KeyboardInterrupt:
|
| 251 |
+
logger.info("Server shutdown requested")
|
| 252 |
+
except Exception as e:
|
| 253 |
+
logger.exception(f"Server error: {e}")
|
| 254 |
+
sys.exit(1)
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
if __name__ == "__main__":
|
| 258 |
+
main()
|
src/eurus/tools/__init__.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Eurus Tools Registry
|
| 3 |
+
=====================
|
| 4 |
+
Central hub for all agent tools.
|
| 5 |
+
|
| 6 |
+
Tools:
|
| 7 |
+
- Data Retrieval: ERA5 data access
|
| 8 |
+
- Analysis: Python REPL for custom analysis
|
| 9 |
+
- Guides: Methodology and visualization guidance
|
| 10 |
+
- Routing: Maritime navigation (optional)
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from typing import List
|
| 14 |
+
from langchain_core.tools import BaseTool
|
| 15 |
+
|
| 16 |
+
# Import core tools
|
| 17 |
+
from .era5 import era5_tool
|
| 18 |
+
from .repl import PythonREPLTool
|
| 19 |
+
from .routing import routing_tool
|
| 20 |
+
from .analysis_guide import analysis_guide_tool, visualization_guide_tool
|
| 21 |
+
|
| 22 |
+
# Optional dependency check for routing
|
| 23 |
+
try:
|
| 24 |
+
import scgraph
|
| 25 |
+
HAS_ROUTING_DEPS = True
|
| 26 |
+
except ImportError:
|
| 27 |
+
HAS_ROUTING_DEPS = False
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def get_all_tools(
|
| 31 |
+
enable_routing: bool = True,
|
| 32 |
+
enable_guide: bool = True
|
| 33 |
+
) -> List[BaseTool]:
|
| 34 |
+
"""
|
| 35 |
+
Return a list of all available tools for the agent.
|
| 36 |
+
|
| 37 |
+
Args:
|
| 38 |
+
enable_routing: If True, includes the maritime routing tool (default: True).
|
| 39 |
+
enable_guide: If True, includes the guide tools (default: True).
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
List of LangChain tools for the agent.
|
| 43 |
+
"""
|
| 44 |
+
# Core tools: data retrieval + Python analysis
|
| 45 |
+
tools = [
|
| 46 |
+
era5_tool,
|
| 47 |
+
PythonREPLTool(working_dir=".")
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
# Guide tools: methodology and visualization guidance
|
| 51 |
+
if enable_guide:
|
| 52 |
+
tools.append(analysis_guide_tool)
|
| 53 |
+
tools.append(visualization_guide_tool)
|
| 54 |
+
|
| 55 |
+
# Routing tools: maritime navigation
|
| 56 |
+
if enable_routing:
|
| 57 |
+
if HAS_ROUTING_DEPS:
|
| 58 |
+
tools.append(routing_tool)
|
| 59 |
+
else:
|
| 60 |
+
print("WARNING: Routing tools requested but dependencies (scgraph) are missing.")
|
| 61 |
+
|
| 62 |
+
return tools
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
# Alias for backward compatibility
|
| 66 |
+
get_tools = get_all_tools
|
src/eurus/tools/analysis_guide.py
ADDED
|
@@ -0,0 +1,1191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Analysis Guide Tool
|
| 3 |
+
====================
|
| 4 |
+
Provides methodological guidance for climate data analysis using python_repl.
|
| 5 |
+
|
| 6 |
+
This tool returns TEXT INSTRUCTIONS (not executable code!) for:
|
| 7 |
+
- What approach to take
|
| 8 |
+
- How to structure the analysis
|
| 9 |
+
- Quality checks and pitfalls
|
| 10 |
+
- Best practices for visualization
|
| 11 |
+
|
| 12 |
+
The agent uses python_repl to execute the actual analysis.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from typing import Literal
|
| 16 |
+
from pydantic import BaseModel, Field
|
| 17 |
+
from langchain_core.tools import StructuredTool
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# =============================================================================
|
| 21 |
+
# ANALYSIS GUIDES
|
| 22 |
+
# =============================================================================
|
| 23 |
+
|
| 24 |
+
ANALYSIS_GUIDES = {
|
| 25 |
+
# -------------------------------------------------------------------------
|
| 26 |
+
# DATA OPERATIONS
|
| 27 |
+
# -------------------------------------------------------------------------
|
| 28 |
+
"load_data": """
|
| 29 |
+
## Loading ERA5 Data
|
| 30 |
+
|
| 31 |
+
### When to use
|
| 32 |
+
- Initializing any analysis
|
| 33 |
+
- Loading downloaded Zarr data
|
| 34 |
+
|
| 35 |
+
### Workflow
|
| 36 |
+
1. **Load data** — Use `xr.open_dataset('path', engine='zarr')` or `xr.open_zarr('path')`.
|
| 37 |
+
2. **Inspect dataset** — Check coordinates and available variables.
|
| 38 |
+
3. **Convert units** before any analysis:
|
| 39 |
+
- Temp (`t2`, `d2`, `skt`, `sst`, `stl1`): subtract 273.15 → °C
|
| 40 |
+
- Precip (`tp`, `cp`, `lsp`): multiply by 1000 → mm
|
| 41 |
+
- Pressure (`sp`, `mslp`): divide by 100 → hPa
|
| 42 |
+
|
| 43 |
+
### Quality Checklist
|
| 44 |
+
- [ ] Data loaded lazily (avoid `.load()` on large datasets)
|
| 45 |
+
- [ ] Units converted before aggregations
|
| 46 |
+
- [ ] Coordinate names verified (latitude vs lat, etc.)
|
| 47 |
+
|
| 48 |
+
### Common Pitfalls
|
| 49 |
+
- ⚠️ Loading multi-year global data into memory causes OOM. Keep operations lazy until subsetted.
|
| 50 |
+
- ⚠️ Some Zarr stores have `valid_time` instead of `time` — check with `.coords`.
|
| 51 |
+
""",
|
| 52 |
+
|
| 53 |
+
"spatial_subset": """
|
| 54 |
+
## Spatial Subsetting
|
| 55 |
+
|
| 56 |
+
### When to use
|
| 57 |
+
- Focusing on a specific region, country, or routing bounding box
|
| 58 |
+
- Reducing data size before heavy analysis
|
| 59 |
+
|
| 60 |
+
### Workflow
|
| 61 |
+
1. **Determine bounds** — Find min/max latitude and longitude.
|
| 62 |
+
2. **Check coordinate orientation** — ERA5 latitude is often descending (90 to -90).
|
| 63 |
+
3. **Slice data** — `.sel(latitude=slice(north, south), longitude=slice(west, east))`.
|
| 64 |
+
|
| 65 |
+
### Quality Checklist
|
| 66 |
+
- [ ] Latitude sliced from North to South (max to min) for descending coords
|
| 67 |
+
- [ ] Longitudes match dataset format (convert -180/180 ↔ 0/360 if needed)
|
| 68 |
+
- [ ] Result is not empty — verify with `.shape`
|
| 69 |
+
|
| 70 |
+
### Common Pitfalls
|
| 71 |
+
- ⚠️ Slicing `slice(south, north)` on descending coords → empty result.
|
| 72 |
+
- ⚠️ Crossing the prime meridian in 0-360 coords requires concatenating two slices.
|
| 73 |
+
- ⚠️ Use `.sel(method='nearest')` for point extraction, not exact matching.
|
| 74 |
+
""",
|
| 75 |
+
|
| 76 |
+
"temporal_subset": """
|
| 77 |
+
## Temporal Subsetting & Aggregation
|
| 78 |
+
|
| 79 |
+
### When to use
|
| 80 |
+
- Isolating specific events, months, or seasons
|
| 81 |
+
- Downsampling hourly data to daily/monthly
|
| 82 |
+
|
| 83 |
+
### Workflow
|
| 84 |
+
1. **Time slice** — `.sel(time=slice('2023-01-01', '2023-12-31'))`.
|
| 85 |
+
2. **Filter** — Seasons: `.sel(time=ds.time.dt.season == 'DJF')`.
|
| 86 |
+
3. **Resample** — `.resample(time='1D').mean()` for daily means.
|
| 87 |
+
|
| 88 |
+
### Quality Checklist
|
| 89 |
+
- [ ] Aggregation matches variable: `.mean()` for T/wind, `.sum()` for precip
|
| 90 |
+
- [ ] Leap years handled if using day-of-year grouping
|
| 91 |
+
|
| 92 |
+
### Common Pitfalls
|
| 93 |
+
- ⚠️ DJF wraps across years — verify start/end boundaries.
|
| 94 |
+
- ⚠️ `.resample()` (continuous) ≠ `.groupby()` (climatological). Don't mix them up.
|
| 95 |
+
- ⚠️ Radiation variables (`ssr`, `ssrd`) are accumulated — need differencing, not averaging.
|
| 96 |
+
""",
|
| 97 |
+
|
| 98 |
+
# -------------------------------------------------------------------------
|
| 99 |
+
# STATISTICAL ANALYSIS
|
| 100 |
+
# -------------------------------------------------------------------------
|
| 101 |
+
"anomalies": """
|
| 102 |
+
## Anomaly Analysis
|
| 103 |
+
|
| 104 |
+
### When to use
|
| 105 |
+
- "How unusual was this period?"
|
| 106 |
+
- Comparing current conditions to "normal"
|
| 107 |
+
- Any "above/below average" question
|
| 108 |
+
|
| 109 |
+
### Workflow
|
| 110 |
+
1. **Define baseline** — ≥10 years (30 ideal). E.g. 1991-2020.
|
| 111 |
+
2. **Compute climatology** — `clim = ds.groupby('time.month').mean('time')`.
|
| 112 |
+
3. **Subtract** — `anomaly = ds.groupby('time.month') - clim`.
|
| 113 |
+
4. **Convert units** — Report in °C, mm, m/s (not K, m, Pa).
|
| 114 |
+
5. **Assess magnitude** — Compare to σ of the baseline period.
|
| 115 |
+
|
| 116 |
+
### Quality Checklist
|
| 117 |
+
- [ ] Baseline ≥10 years
|
| 118 |
+
- [ ] Same calendar grouping for clim and analysis
|
| 119 |
+
- [ ] Units converted for readability
|
| 120 |
+
- [ ] Spatial context: is anomaly regional or localized?
|
| 121 |
+
|
| 122 |
+
### Common Pitfalls
|
| 123 |
+
- ⚠️ Short baselines amplify noise.
|
| 124 |
+
- ⚠️ Daily climatologies with <30yr baseline are noisy → use monthly grouping.
|
| 125 |
+
- ⚠️ Be explicit: spatial anomaly vs temporal anomaly.
|
| 126 |
+
|
| 127 |
+
### Interpretation
|
| 128 |
+
- Positive = warmer/wetter/windier than normal.
|
| 129 |
+
- ±1σ = common, ±2σ = unusual (5%), ±3σ = extreme (0.3%).
|
| 130 |
+
- Maps: MUST use `RdBu_r` centered at zero via `TwoSlopeNorm`.
|
| 131 |
+
""",
|
| 132 |
+
|
| 133 |
+
"zscore": """
|
| 134 |
+
## Z-Score Analysis (Standardized Anomalies)
|
| 135 |
+
|
| 136 |
+
### When to use
|
| 137 |
+
- Comparing extremity across different variables
|
| 138 |
+
- Standardizing for regions with different variability
|
| 139 |
+
- Identifying statistically significant departures
|
| 140 |
+
|
| 141 |
+
### Workflow
|
| 142 |
+
1. **Compute baseline mean** — Grouped by month for seasonality.
|
| 143 |
+
2. **Compute baseline std** — Same period, same grouping.
|
| 144 |
+
3. **Standardize** — `z = (value - mean) / std`.
|
| 145 |
+
|
| 146 |
+
### Quality Checklist
|
| 147 |
+
- [ ] Standard deviation is non-zero everywhere
|
| 148 |
+
- [ ] Baseline period matches for mean and std
|
| 149 |
+
|
| 150 |
+
### Common Pitfalls
|
| 151 |
+
- ⚠️ Precipitation is NOT normally distributed — use SPI or percentiles instead of raw Z-scores.
|
| 152 |
+
- ⚠️ Z-scores near coastlines can be extreme due to mixed land/ocean std.
|
| 153 |
+
|
| 154 |
+
### Interpretation
|
| 155 |
+
- Z = 0: average. ±1: normal (68%). ±2: unusual (5%). ±3: extreme (0.3%).
|
| 156 |
+
""",
|
| 157 |
+
|
| 158 |
+
"trend_analysis": """
|
| 159 |
+
## Linear Trend Analysis
|
| 160 |
+
|
| 161 |
+
### When to use
|
| 162 |
+
- "Is it getting warmer/wetter over time?"
|
| 163 |
+
- Detecting long-term climate change signals
|
| 164 |
+
|
| 165 |
+
### Workflow
|
| 166 |
+
1. **Downsample** — Convert to annual/seasonal means first.
|
| 167 |
+
2. **Regress** — `scipy.stats.linregress` or `np.polyfit(degree=1)`.
|
| 168 |
+
3. **Significance** — Extract p-value for the slope.
|
| 169 |
+
4. **Scale** — Multiply annual slope by 10 → "per decade".
|
| 170 |
+
|
| 171 |
+
### Quality Checklist
|
| 172 |
+
- [ ] Period ≥20-30 years for meaningful trends
|
| 173 |
+
- [ ] Seasonal cycle removed before fitting
|
| 174 |
+
- [ ] Significance tested (p < 0.05)
|
| 175 |
+
- [ ] Report trend as units/decade
|
| 176 |
+
|
| 177 |
+
### Common Pitfalls
|
| 178 |
+
- ⚠️ Trend on daily data without removing seasonality → dominated by summer/winter swings.
|
| 179 |
+
- ⚠️ Short series have uncertain trends — report confidence intervals.
|
| 180 |
+
- ⚠️ Autocorrelation can inflate significance — consider using Mann-Kendall test.
|
| 181 |
+
|
| 182 |
+
### Interpretation
|
| 183 |
+
- Report as °C/decade. Use stippling on maps for significant areas.
|
| 184 |
+
""",
|
| 185 |
+
|
| 186 |
+
"eof_analysis": """
|
| 187 |
+
## EOF/PCA Analysis
|
| 188 |
+
|
| 189 |
+
### When to use
|
| 190 |
+
- Finding dominant spatial patterns (ENSO, NAO, PDO)
|
| 191 |
+
- Dimensionality reduction of spatiotemporal data
|
| 192 |
+
|
| 193 |
+
### Workflow
|
| 194 |
+
1. **Deseasonalize** — Compute anomalies to remove the seasonal cycle.
|
| 195 |
+
2. **Latitude weighting** — Multiply by `np.sqrt(np.cos(np.deg2rad(lat)))`.
|
| 196 |
+
3. **Decompose** — PCA on flattened space dimensions.
|
| 197 |
+
4. **Reconstruct** — Map PCs back to spatial grid (EOFs).
|
| 198 |
+
|
| 199 |
+
### Quality Checklist
|
| 200 |
+
- [ ] Seasonal cycle removed
|
| 201 |
+
- [ ] Latitude weighting applied
|
| 202 |
+
- [ ] Variance explained (%) calculated per mode
|
| 203 |
+
- [ ] Physical interpretation attempted for leading modes
|
| 204 |
+
|
| 205 |
+
### Common Pitfalls
|
| 206 |
+
- ⚠️ Unweighted EOFs inflate polar regions artificially.
|
| 207 |
+
- ⚠️ EOFs are mathematical constructs — not guaranteed to correspond to physical modes.
|
| 208 |
+
|
| 209 |
+
### Interpretation
|
| 210 |
+
- EOF1: dominant spatial pattern. PC1: its temporal evolution.
|
| 211 |
+
- If EOF1 explains >20% variance, it's highly dominant.
|
| 212 |
+
""",
|
| 213 |
+
|
| 214 |
+
"correlation_analysis": """
|
| 215 |
+
## Correlation Analysis
|
| 216 |
+
|
| 217 |
+
### When to use
|
| 218 |
+
- Spatial/temporal correlation mapping
|
| 219 |
+
- Lead-lag analysis (e.g., SST vs downstream precipitation)
|
| 220 |
+
- Teleconnection exploration
|
| 221 |
+
|
| 222 |
+
### Workflow
|
| 223 |
+
1. **Deseasonalize both variables** — Remove seasonal cycle from both.
|
| 224 |
+
2. **Align time coordinates** — Ensure identical time axes.
|
| 225 |
+
3. **Correlate** — `xr.corr(var1, var2, dim='time')`.
|
| 226 |
+
4. **Lead-lag** — Use `.shift(time=N)` month offsets to test delayed responses.
|
| 227 |
+
5. **Significance** — Compute p-values, mask insignificant areas.
|
| 228 |
+
|
| 229 |
+
### Quality Checklist
|
| 230 |
+
- [ ] Both variables deseasonalized
|
| 231 |
+
- [ ] p-values computed (p < 0.05 for significance)
|
| 232 |
+
- [ ] Sample size adequate (≥30 time points)
|
| 233 |
+
|
| 234 |
+
### Common Pitfalls
|
| 235 |
+
- ⚠️ Correlating raw data captures the seasonal cycle — everything correlates with summer.
|
| 236 |
+
- ⚠️ Spatial autocorrelation inflates field significance — apply Bonferroni or FDR correction.
|
| 237 |
+
|
| 238 |
+
### Interpretation
|
| 239 |
+
- R² gives variance explained. Lead-lag peak indicates response time.
|
| 240 |
+
- Plot spatial R maps with `RdBu_r`, stipple significant areas.
|
| 241 |
+
""",
|
| 242 |
+
|
| 243 |
+
"composite_analysis": """
|
| 244 |
+
## Composite Analysis
|
| 245 |
+
|
| 246 |
+
### When to use
|
| 247 |
+
- Average conditions during El Niño vs La Niña years
|
| 248 |
+
- Spatial fingerprint of specific extreme events
|
| 249 |
+
- "What does the atmosphere look like when X happens?"
|
| 250 |
+
|
| 251 |
+
### Workflow
|
| 252 |
+
1. **Define events** — Boolean mask of times exceeding a threshold (e.g., Niño3.4 > 0.5°C).
|
| 253 |
+
2. **Subset data** — `.where(mask, drop=True)`.
|
| 254 |
+
3. **Average** — Time mean of the subset = composite.
|
| 255 |
+
4. **Compare** — Subtract climatological mean → composite anomaly.
|
| 256 |
+
|
| 257 |
+
### Quality Checklist
|
| 258 |
+
- [ ] Sample size ≥10 events for robustness
|
| 259 |
+
- [ ] Baseline climatology matches the season of the events
|
| 260 |
+
- [ ] Significance tested via bootstrap or t-test
|
| 261 |
+
|
| 262 |
+
### Common Pitfalls
|
| 263 |
+
- ⚠️ Compositing n=2 events → noise, not a physical signal.
|
| 264 |
+
- ⚠️ Mixing seasons in composite (El Niño in DJF vs JJA) obscures the signal.
|
| 265 |
+
|
| 266 |
+
### Interpretation
|
| 267 |
+
- Shows the typical anomaly expected when event occurs.
|
| 268 |
+
- Plot with `RdBu_r` diverging colormap. Stipple significant areas.
|
| 269 |
+
""",
|
| 270 |
+
|
| 271 |
+
"diurnal_cycle": """
|
| 272 |
+
## Diurnal Cycle Analysis
|
| 273 |
+
|
| 274 |
+
### When to use
|
| 275 |
+
- Hourly variability within days (afternoon convection, nighttime cooling)
|
| 276 |
+
- Solar radiation patterns
|
| 277 |
+
|
| 278 |
+
### Workflow
|
| 279 |
+
1. **Group by hour** — `ds.groupby('time.hour').mean('time')`.
|
| 280 |
+
2. **Convert to local time** — ERA5 is UTC. `Local = UTC + Longitude/15`.
|
| 281 |
+
3. **Calculate amplitude** — `diurnal_range = max('hour') - min('hour')`.
|
| 282 |
+
|
| 283 |
+
### Quality Checklist
|
| 284 |
+
- [ ] Input data is hourly (not daily/monthly)
|
| 285 |
+
- [ ] UTC → local time conversion applied before labeling "afternoon"/"morning"
|
| 286 |
+
|
| 287 |
+
### Common Pitfalls
|
| 288 |
+
- ⚠️ Averaging global data by UTC hour mixes day and night across longitudes.
|
| 289 |
+
- ⚠️ Cloud cover (`tcc`) and radiation (`ssrd`) have strong diurnal signals — always check.
|
| 290 |
+
|
| 291 |
+
### Interpretation
|
| 292 |
+
- `blh` and `t2` peak mid-afternoon. Convective precip (`cp`) peaks late afternoon over land, early morning over oceans.
|
| 293 |
+
""",
|
| 294 |
+
|
| 295 |
+
"seasonal_decomposition": """
|
| 296 |
+
## Seasonal Decomposition
|
| 297 |
+
|
| 298 |
+
### When to use
|
| 299 |
+
- Separating the seasonal cycle from interannual variability
|
| 300 |
+
- Visualizing how a specific year deviates from the normal curve
|
| 301 |
+
|
| 302 |
+
### Workflow
|
| 303 |
+
1. **Compute climatology** — `.groupby('time.month').mean('time')`.
|
| 304 |
+
2. **Extract anomalies** — Subtract climatology from raw data.
|
| 305 |
+
3. **Smooth trend** — Apply 12-month rolling mean to extract multi-year trends.
|
| 306 |
+
|
| 307 |
+
### Quality Checklist
|
| 308 |
+
- [ ] Baseline robust (≥10 years)
|
| 309 |
+
- [ ] Residual = raw - seasonal - trend (should be ~white noise)
|
| 310 |
+
|
| 311 |
+
### Common Pitfalls
|
| 312 |
+
- ⚠️ Day-of-year climatologies over short baselines are noisy — smooth with 15-day window.
|
| 313 |
+
|
| 314 |
+
### Interpretation
|
| 315 |
+
- Separates variance into: seasonal (predictable), trend (long-term), residual (weather noise).
|
| 316 |
+
""",
|
| 317 |
+
|
| 318 |
+
"spectral_analysis": """
|
| 319 |
+
## Spectral Analysis
|
| 320 |
+
|
| 321 |
+
### When to use
|
| 322 |
+
- Periodicity detection (ENSO 3-7yr, MJO 30-60d, annual/semi-annual)
|
| 323 |
+
- Confirming suspected oscillatory behavior
|
| 324 |
+
|
| 325 |
+
### Workflow
|
| 326 |
+
1. **Prepare 1D series** — Spatial average or single point.
|
| 327 |
+
2. **Detrend** — Remove linear trend AND seasonal cycle.
|
| 328 |
+
3. **Compute spectrum** — `scipy.signal.welch` or `periodogram`.
|
| 329 |
+
4. **Plot as Period** — X-axis = 1/frequency (years or days), not raw frequency.
|
| 330 |
+
|
| 331 |
+
### Quality Checklist
|
| 332 |
+
- [ ] No NaNs in time series (interpolate or drop)
|
| 333 |
+
- [ ] Time coordinate evenly spaced
|
| 334 |
+
- [ ] Seasonal cycle removed
|
| 335 |
+
|
| 336 |
+
### Common Pitfalls
|
| 337 |
+
- ⚠️ Seasonal cycle dominates spectrum if not removed — drowns everything else.
|
| 338 |
+
- ⚠️ Short records can't resolve low-frequency oscillations (need ≥3× the period).
|
| 339 |
+
|
| 340 |
+
### Interpretation
|
| 341 |
+
- Peaks = dominant cycles. ENSO: 3-7yr. QBO: ~28mo. MJO: 30-60d. Annual: 12mo.
|
| 342 |
+
""",
|
| 343 |
+
|
| 344 |
+
"spatial_statistics": """
|
| 345 |
+
## Spatial Statistics & Area Averaging
|
| 346 |
+
|
| 347 |
+
### When to use
|
| 348 |
+
- Computing a single time series for a geographic region
|
| 349 |
+
- Area-weighted means for reporting
|
| 350 |
+
- Field significance testing
|
| 351 |
+
|
| 352 |
+
### Workflow
|
| 353 |
+
1. **Latitude weights** — `weights = np.cos(np.deg2rad(ds.latitude))`.
|
| 354 |
+
2. **Apply** — `ds.weighted(weights).mean(dim=['latitude', 'longitude'])`.
|
| 355 |
+
3. **Land/sea mask** — Apply if needed (e.g., ocean-only SST average).
|
| 356 |
+
|
| 357 |
+
### Quality Checklist
|
| 358 |
+
- [ ] Latitude weighting applied BEFORE spatial averaging
|
| 359 |
+
- [ ] Land-sea mask applied where relevant
|
| 360 |
+
- [ ] Units preserved correctly
|
| 361 |
+
|
| 362 |
+
### Common Pitfalls
|
| 363 |
+
- ⚠️ Unweighted averages bias toward poles (smaller grid cells over-counted).
|
| 364 |
+
- ⚠️ Global mean SST must exclude land points.
|
| 365 |
+
|
| 366 |
+
### Interpretation
|
| 367 |
+
- Produces physically accurate area-averaged time series.
|
| 368 |
+
""",
|
| 369 |
+
|
| 370 |
+
"multi_variable": """
|
| 371 |
+
## Multi-Variable Derived Quantities
|
| 372 |
+
|
| 373 |
+
### When to use
|
| 374 |
+
- Combining ERA5 variables for derived metrics
|
| 375 |
+
|
| 376 |
+
### Common Derivations
|
| 377 |
+
1. **Wind speed** — `wspd = np.sqrt(u10**2 + v10**2)` (or u100/v100 for hub-height).
|
| 378 |
+
2. **Wind direction** — `wdir = (270 - np.degrees(np.arctan2(v10, u10))) % 360`.
|
| 379 |
+
3. **Relative humidity** — From `t2` and `d2` using Magnus formula.
|
| 380 |
+
4. **Heat index** — Combine `t2` and `d2` (Steadman formula).
|
| 381 |
+
5. **Vapour transport** — `IVT ≈ tcwv * wspd` (surface proxy).
|
| 382 |
+
6. **Total precip check** — `tp ≈ cp + lsp`.
|
| 383 |
+
|
| 384 |
+
### Quality Checklist
|
| 385 |
+
- [ ] Variables share identical grids (time, lat, lon)
|
| 386 |
+
- [ ] Units matched before combining (both in °C, both in m/s, etc.)
|
| 387 |
+
|
| 388 |
+
### Common Pitfalls
|
| 389 |
+
- ⚠️ `mean(speed) ≠ speed_of_means` — always compute speed FIRST, then average.
|
| 390 |
+
- ⚠️ Wind direction requires proper 4-quadrant atan2, not naive arctan.
|
| 391 |
+
|
| 392 |
+
### Interpretation
|
| 393 |
+
- Derived metrics often better represent human/environmental impact than raw fields.
|
| 394 |
+
""",
|
| 395 |
+
|
| 396 |
+
"climatology_normals": """
|
| 397 |
+
## Climatology Normals (WMO Standard)
|
| 398 |
+
|
| 399 |
+
### When to use
|
| 400 |
+
- Computing 30-year normals
|
| 401 |
+
- Calculating "departure from normal"
|
| 402 |
+
|
| 403 |
+
### Workflow
|
| 404 |
+
1. **Select base period** — Standard WMO epoch: 1991-2020 (or 1981-2010).
|
| 405 |
+
2. **Compute monthly averages** — `normals = baseline.groupby('time.month').mean('time')`.
|
| 406 |
+
3. **Departure** — `departure = current.groupby('time.month') - normals`.
|
| 407 |
+
|
| 408 |
+
### Quality Checklist
|
| 409 |
+
- [ ] Exactly 30 years used
|
| 410 |
+
- [ ] Same months compared (don't mix Feb normals with March data)
|
| 411 |
+
|
| 412 |
+
### Common Pitfalls
|
| 413 |
+
- ⚠️ Moving baselines make comparisons with WMO climate reports inconsistent.
|
| 414 |
+
|
| 415 |
+
### Interpretation
|
| 416 |
+
- "Normal" = statistical baseline. Departures express how much current conditions deviate.
|
| 417 |
+
""",
|
| 418 |
+
|
| 419 |
+
# -------------------------------------------------------------------------
|
| 420 |
+
# CLIMATE INDICES & EXTREMES
|
| 421 |
+
# -------------------------------------------------------------------------
|
| 422 |
+
"climate_indices": """
|
| 423 |
+
## Climate Indices
|
| 424 |
+
|
| 425 |
+
### When to use
|
| 426 |
+
- Assessing ENSO, NAO, PDO, AMO teleconnections
|
| 427 |
+
- Correlating local weather with large-scale modes
|
| 428 |
+
|
| 429 |
+
### Key Indices
|
| 430 |
+
- **ENSO (Niño 3.4)**: `sst` anomaly, 5°S-5°N, 170°W-120°W. El Niño > +0.5°C, La Niña < -0.5°C.
|
| 431 |
+
- **NAO**: `mslp` difference, Azores High minus Icelandic Low. Positive → mild European winters.
|
| 432 |
+
- **PDO**: Leading EOF of North Pacific `sst` (north of 20°N). 20-30yr phases.
|
| 433 |
+
- **AMO**: Detrended North Atlantic `sst` average. ~60-70yr cycle.
|
| 434 |
+
|
| 435 |
+
### Workflow
|
| 436 |
+
1. **Extract region** — Use standard geographic bounds.
|
| 437 |
+
2. **Compute anomaly** — Area-averaged, against 30yr baseline.
|
| 438 |
+
3. **Smooth** — 3-to-5 month rolling mean.
|
| 439 |
+
|
| 440 |
+
### Quality Checklist
|
| 441 |
+
- [ ] Standard geographic bounds strictly followed
|
| 442 |
+
- [ ] Rolling mean applied to filter weather noise
|
| 443 |
+
- [ ] Latitude-weighted area average
|
| 444 |
+
|
| 445 |
+
### Common Pitfalls
|
| 446 |
+
- ⚠️ Without rolling mean, the index is too noisy for classification.
|
| 447 |
+
- ⚠️ Using incorrect region bounds produces a different (invalid) index.
|
| 448 |
+
""",
|
| 449 |
+
|
| 450 |
+
"extremes": """
|
| 451 |
+
## Extreme Event Analysis
|
| 452 |
+
|
| 453 |
+
### When to use
|
| 454 |
+
- Heat/cold extremes, heavy precipitation, tail-risk assessment
|
| 455 |
+
- Threshold exceedance frequency
|
| 456 |
+
|
| 457 |
+
### Workflow
|
| 458 |
+
1. **Define threshold** — Absolute (e.g., T > 35°C) or percentile-based (> 95th pctl of baseline).
|
| 459 |
+
2. **Create mask** — Boolean where condition is met.
|
| 460 |
+
3. **Count** — Sum over time for extreme days per year/month.
|
| 461 |
+
4. **Trend** — Check if frequency is increasing over time.
|
| 462 |
+
|
| 463 |
+
### Quality Checklist
|
| 464 |
+
- [ ] Percentiles from robust baseline (≥30 years)
|
| 465 |
+
- [ ] Use daily data, not monthly averages
|
| 466 |
+
- [ ] Units converted before applying thresholds
|
| 467 |
+
|
| 468 |
+
### Common Pitfalls
|
| 469 |
+
- ⚠️ 99th percentile on monthly averages misses true daily extremes entirely.
|
| 470 |
+
- ⚠️ Absolute thresholds (e.g., 35°C) are region-dependent — 35°C is normal in Sahara, extreme in London.
|
| 471 |
+
|
| 472 |
+
### Interpretation
|
| 473 |
+
- Increasing frequency of extremes = non-linear climate change impact.
|
| 474 |
+
- Report as "N days/year exceeding threshold" or "return period shortened from X to Y years".
|
| 475 |
+
""",
|
| 476 |
+
|
| 477 |
+
"drought_analysis": """
|
| 478 |
+
## Drought Analysis
|
| 479 |
+
|
| 480 |
+
### When to use
|
| 481 |
+
- Prolonged precipitation deficits
|
| 482 |
+
- Agricultural/hydrological impact assessment
|
| 483 |
+
- SPI (Standardized Precipitation Index) proxy
|
| 484 |
+
|
| 485 |
+
### Workflow
|
| 486 |
+
1. **Extract precip** — Use `tp` in mm (×1000 from meters).
|
| 487 |
+
2. **Accumulate** — Rolling sums: `tp.rolling(time=3).sum()` for 3-month SPI.
|
| 488 |
+
3. **Standardize** — `(accumulated - mean) / std` → SPI proxy.
|
| 489 |
+
4. **Cross-check** — Verify with `swvl1` (soil moisture) for ground-truth.
|
| 490 |
+
|
| 491 |
+
### Quality Checklist
|
| 492 |
+
- [ ] Monthly data used (not hourly)
|
| 493 |
+
- [ ] Baseline ≥30 years for stable statistics
|
| 494 |
+
- [ ] Multiple accumulation periods tested (1, 3, 6, 12 months)
|
| 495 |
+
|
| 496 |
+
### Common Pitfalls
|
| 497 |
+
- ⚠️ Absolute precipitation deficits are meaningless in deserts — always standardize.
|
| 498 |
+
- ⚠️ Gamma distribution fit (proper SPI) is better than raw Z-score for precip.
|
| 499 |
+
|
| 500 |
+
### Interpretation
|
| 501 |
+
- SPI < -1.0: Moderate drought. < -1.5: Severe. < -2.0: Extreme.
|
| 502 |
+
""",
|
| 503 |
+
|
| 504 |
+
"heatwave_detection": """
|
| 505 |
+
## Heatwave Detection
|
| 506 |
+
|
| 507 |
+
### When to use
|
| 508 |
+
- Identifying heatwave events using standard definitions
|
| 509 |
+
- Assessing heat-related risk periods
|
| 510 |
+
|
| 511 |
+
### Workflow
|
| 512 |
+
1. **Daily data** — Must be daily resolution (resample hourly if needed).
|
| 513 |
+
2. **Threshold** — 90th percentile of `t2` per calendar day from baseline.
|
| 514 |
+
3. **Exceedance mask** — `is_hot = t2_daily > threshold_90`.
|
| 515 |
+
4. **Streak detection** — Find ≥3 consecutive hot days using rolling sum ≥ 3.
|
| 516 |
+
|
| 517 |
+
### Quality Checklist
|
| 518 |
+
- [ ] Daily data (not monthly!)
|
| 519 |
+
- [ ] `t2` converted to °C
|
| 520 |
+
- [ ] Threshold is per-calendar-day (not a single annual value)
|
| 521 |
+
- [ ] Duration criterion applied (≥3 days)
|
| 522 |
+
|
| 523 |
+
### Common Pitfalls
|
| 524 |
+
- ⚠️ Monthly data — physically impossible to detect heatwaves.
|
| 525 |
+
- ⚠️ A single hot day is not a heatwave — duration matters.
|
| 526 |
+
- ⚠️ Nighttime temperatures (`t2` at 00/06 UTC) also matter for health impact.
|
| 527 |
+
|
| 528 |
+
### Interpretation
|
| 529 |
+
- Heatwaves require BOTH intensity (high T) AND duration (consecutive days).
|
| 530 |
+
- Report: number of events per year, mean duration, max intensity.
|
| 531 |
+
""",
|
| 532 |
+
|
| 533 |
+
"atmospheric_rivers": """
|
| 534 |
+
## Atmospheric Rivers Detection
|
| 535 |
+
|
| 536 |
+
### When to use
|
| 537 |
+
- Detecting AR events from integrated vapour transport proxy
|
| 538 |
+
- Extreme precipitation risk at landfall
|
| 539 |
+
|
| 540 |
+
### Workflow
|
| 541 |
+
1. **Extract** — `tcwv` + `u10`, `v10`.
|
| 542 |
+
2. **Compute IVT proxy** — `ivt = tcwv * np.sqrt(u10**2 + v10**2)`.
|
| 543 |
+
3. **Threshold** — IVT proxy > 250 kg/m/s (approximate).
|
| 544 |
+
4. **Shape check** — Feature should be elongated (>2000km long, <1000km wide).
|
| 545 |
+
|
| 546 |
+
### Quality Checklist
|
| 547 |
+
- [ ] Acknowledge this is surface-wind proxy (true IVT needs pressure-level data)
|
| 548 |
+
- [ ] Cross-validate with heavy `tp` at landfall
|
| 549 |
+
- [ ] Check for persistent (���24h) plume features
|
| 550 |
+
|
| 551 |
+
### Common Pitfalls
|
| 552 |
+
- ⚠️ Tropical moisture pools are NOT ARs — wind-speed multiplier is essential to distinguish.
|
| 553 |
+
- ⚠️ This surface proxy underestimates true IVT — use conservative thresholds.
|
| 554 |
+
|
| 555 |
+
### Interpretation
|
| 556 |
+
- High `tcwv` + strong directed wind at coast = extreme flood risk.
|
| 557 |
+
- Map with `YlGnBu` for moisture intensity.
|
| 558 |
+
""",
|
| 559 |
+
|
| 560 |
+
"blocking_events": """
|
| 561 |
+
## Atmospheric Blocking Detection
|
| 562 |
+
|
| 563 |
+
### When to use
|
| 564 |
+
- Identifying persistent high-pressure blocks from MSLP
|
| 565 |
+
- Explaining prolonged heatwaves, droughts, or cold spells
|
| 566 |
+
|
| 567 |
+
### Workflow
|
| 568 |
+
1. **Extract** — `mslp` in hPa (÷100 from Pa).
|
| 569 |
+
2. **Compute anomalies** — Daily anomalies from climatology.
|
| 570 |
+
3. **Detect** — Find positive anomalies > 1.5σ persisting ≥5 days.
|
| 571 |
+
4. **Location** — Focus on mid-to-high latitudes (40-70°N typically).
|
| 572 |
+
|
| 573 |
+
### Quality Checklist
|
| 574 |
+
- [ ] 3-5 day rolling mean applied to filter transient ridges
|
| 575 |
+
- [ ] Persistence criterion enforced (≥5 days)
|
| 576 |
+
- [ ] Mid-latitude focus
|
| 577 |
+
|
| 578 |
+
### Common Pitfalls
|
| 579 |
+
- ⚠️ Fast-moving ridges are NOT blocks — persistence is key.
|
| 580 |
+
- ⚠️ Blocks in the Southern Hemisphere are rarer and weaker.
|
| 581 |
+
|
| 582 |
+
### Interpretation
|
| 583 |
+
- Blocks force storms to detour, causing prolonged rain on flanks and drought/heat underneath.
|
| 584 |
+
""",
|
| 585 |
+
|
| 586 |
+
"energy_budget": """
|
| 587 |
+
## Surface Energy Budget
|
| 588 |
+
|
| 589 |
+
### When to use
|
| 590 |
+
- Analyzing radiation balance and surface heating
|
| 591 |
+
- Solar energy potential assessment
|
| 592 |
+
|
| 593 |
+
### Workflow
|
| 594 |
+
1. **Extract radiation** — `ssrd` (incoming solar), `ssr` (net solar after reflection).
|
| 595 |
+
2. **Convert units** — J/m² to W/m² by dividing by accumulation period (3600s for hourly).
|
| 596 |
+
3. **Compute albedo proxy** — `albedo ≈ 1 - (ssr / ssrd)` where ssrd > 0.
|
| 597 |
+
4. **Seasonal patterns** — Group by month to see radiation cycle.
|
| 598 |
+
|
| 599 |
+
### Quality Checklist
|
| 600 |
+
- [ ] Accumulation period properly accounted for (hourly vs daily sums)
|
| 601 |
+
- [ ] Division by zero protected (nighttime ssrd = 0)
|
| 602 |
+
- [ ] Units clearly stated: W/m² or MJ/m²/day
|
| 603 |
+
|
| 604 |
+
### Common Pitfalls
|
| 605 |
+
- ⚠️ ERA5 radiation is ACCUMULATED over the forecast step — must difference consecutive steps for instantaneous values.
|
| 606 |
+
- ⚠️ `ssr` already accounts for clouds and albedo — don't double-correct.
|
| 607 |
+
|
| 608 |
+
### Interpretation
|
| 609 |
+
- Higher `ssrd` - High solar potential. Low `ssr/ssrd` ratio → high cloudiness or reflective surface (snow/ice).
|
| 610 |
+
""",
|
| 611 |
+
|
| 612 |
+
"wind_energy": """
|
| 613 |
+
## Wind Energy Assessment
|
| 614 |
+
|
| 615 |
+
### When to use
|
| 616 |
+
- Wind power density analysis
|
| 617 |
+
- Turbine hub-height wind resource mapping
|
| 618 |
+
|
| 619 |
+
### Workflow
|
| 620 |
+
1. **Use hub-height winds** — `u100`, `v100` (100m, not 10m surface winds).
|
| 621 |
+
2. **Compute speed** — `wspd100 = np.sqrt(u100**2 + v100**2)`.
|
| 622 |
+
3. **Power density** — `P = 0.5 * rho * wspd100**3` where rho ≈ 1.225 kg/m³.
|
| 623 |
+
4. **Capacity factor** — Fraction of time wind exceeds cut-in speed (~3 m/s) and stays below cut-out (~25 m/s).
|
| 624 |
+
5. **Weibull fit** — Fit shape (k) and scale (A) parameters to the wind speed distribution.
|
| 625 |
+
|
| 626 |
+
### Quality Checklist
|
| 627 |
+
- [ ] Using 100m winds, NOT 10m (turbines don't operate at surface)
|
| 628 |
+
- [ ] Power density in W/m²
|
| 629 |
+
- [ ] Seasonal variation checked (winter vs summer)
|
| 630 |
+
|
| 631 |
+
### Common Pitfalls
|
| 632 |
+
- ⚠️ Using 10m winds severely underestimates wind energy potential.
|
| 633 |
+
- ⚠️ Mean wind speed misleads — power depends on speed CUBED, so variability matters enormously.
|
| 634 |
+
|
| 635 |
+
### Interpretation
|
| 636 |
+
- Power density >400 W/m² = excellent wind resource.
|
| 637 |
+
- Report Weibull k parameter: k < 2 = gusty/variable, k > 3 = steady flow.
|
| 638 |
+
""",
|
| 639 |
+
|
| 640 |
+
"moisture_budget": """
|
| 641 |
+
## Moisture Budget Analysis
|
| 642 |
+
|
| 643 |
+
### When to use
|
| 644 |
+
- Understanding precipitation sources
|
| 645 |
+
- Tracking moisture plumes and convergence zones
|
| 646 |
+
|
| 647 |
+
### Workflow
|
| 648 |
+
1. **Extract** — `tcwv` (precipitable water), `tcw` (total column water incl. liquid/ice).
|
| 649 |
+
2. **Temporal evolution** — Track `tcwv` changes to infer moisture convergence.
|
| 650 |
+
3. **Relate to precip** — Compare `tcwv` peaks with `tp` to see conversion efficiency.
|
| 651 |
+
4. **Spatial patterns** — Map `tcwv` to identify moisture corridors.
|
| 652 |
+
|
| 653 |
+
### Quality Checklist
|
| 654 |
+
- [ ] Distinguish `tcwv` (vapour only) from `tcw` (vapour + liquid + ice)
|
| 655 |
+
- [ ] Units: kg/m² (equivalent to mm of water)
|
| 656 |
+
|
| 657 |
+
### Common Pitfalls
|
| 658 |
+
- ⚠️ High `tcwv` doesn't guarantee rain — need a lifting mechanism.
|
| 659 |
+
- ⚠️ `tcw - tcwv` gives cloud water + ice content (proxy for cloud thickness).
|
| 660 |
+
|
| 661 |
+
### Interpretation
|
| 662 |
+
- `tcwv` > 50 kg/m² in tropics = moisture-laden atmosphere primed for heavy precip.
|
| 663 |
+
""",
|
| 664 |
+
|
| 665 |
+
"convective_potential": """
|
| 666 |
+
## Convective Potential (Thunderstorm Risk)
|
| 667 |
+
|
| 668 |
+
### When to use
|
| 669 |
+
- Thunderstorm forecasting and climatology
|
| 670 |
+
- Severe weather risk assessment
|
| 671 |
+
|
| 672 |
+
### Workflow
|
| 673 |
+
1. **Extract CAPE** — Already available as `cape` variable (J/kg).
|
| 674 |
+
2. **Classify risk** — Low (<300), Moderate (300-1000), High (1000-2500), Extreme (>2500 J/kg).
|
| 675 |
+
3. **Combine with moisture** — High CAPE + high `tcwv` → heavy convective storms.
|
| 676 |
+
4. **Check trigger** — Fronts, orography, or strong daytime heating (`t2` diurnal cycle).
|
| 677 |
+
|
| 678 |
+
### Quality Checklist
|
| 679 |
+
- [ ] CAPE alone is insufficient — need a trigger mechanism
|
| 680 |
+
- [ ] Check `blh` (boundary layer height) — deep BLH aids convective initiation
|
| 681 |
+
|
| 682 |
+
### Common Pitfalls
|
| 683 |
+
- ⚠️ CAPE = potential energy, not a guarantee. High CAPE + strong capping inversion = no storms.
|
| 684 |
+
- ⚠️ CAPE is most meaningful in afternoon hours — avoid pre-dawn values.
|
| 685 |
+
|
| 686 |
+
### Interpretation
|
| 687 |
+
- CAPE > 1000 J/kg with deep BLH (>2km) and high `tcwv` = significant thunderstorm risk.
|
| 688 |
+
""",
|
| 689 |
+
|
| 690 |
+
"snow_cover": """
|
| 691 |
+
## Snow Cover & Melt Analysis
|
| 692 |
+
|
| 693 |
+
### When to use
|
| 694 |
+
- Tracking snow accumulation and melt timing
|
| 695 |
+
- Climate change impacts on snowpack
|
| 696 |
+
|
| 697 |
+
### Workflow
|
| 698 |
+
1. **Extract** — `sd` (Snow Depth in m water equivalent).
|
| 699 |
+
2. **Seasonal cycle** — Track start/end of snow season per grid point.
|
| 700 |
+
3. **Melt timing** — Find the date when `sd` drops below threshold.
|
| 701 |
+
4. **Trend** — Check if snow season is shortening over decades.
|
| 702 |
+
5. **Compare with `stl1`/`t2`** — Warming soil accelerates melt.
|
| 703 |
+
|
| 704 |
+
### Quality Checklist
|
| 705 |
+
- [ ] Units: meters of water equivalent
|
| 706 |
+
- [ ] Focus on mid/high latitudes and mountain regions
|
| 707 |
+
- [ ] Inter-annual variability large — use multi-year analysis
|
| 708 |
+
|
| 709 |
+
### Common Pitfalls
|
| 710 |
+
- ⚠️ ERA5 snow depth is modeled, not observed — cross-reference with station data.
|
| 711 |
+
- ⚠️ Rain-on-snow events can cause rapid melt not captured well in reanalysis.
|
| 712 |
+
|
| 713 |
+
### Interpretation
|
| 714 |
+
- Earlier melt = less summer water supply. Map with `Blues`, reversed for snowless areas.
|
| 715 |
+
""",
|
| 716 |
+
|
| 717 |
+
# -------------------------------------------------------------------------
|
| 718 |
+
# VISUALIZATION
|
| 719 |
+
# -------------------------------------------------------------------------
|
| 720 |
+
"visualization_spatial": """
|
| 721 |
+
## Spatial Map Visualization
|
| 722 |
+
|
| 723 |
+
### When to use
|
| 724 |
+
- Mapping absolute climate fields (Temp, Wind, Precip, Pressure)
|
| 725 |
+
|
| 726 |
+
### Workflow
|
| 727 |
+
1. **Figure** — `fig, ax = plt.subplots(figsize=(12, 8))`.
|
| 728 |
+
2. **Meshgrid** — `lons, lats = np.meshgrid(data.longitude, data.latitude)`.
|
| 729 |
+
3. **Plot** — `ax.pcolormesh(lons, lats, data, cmap=..., shading='auto')`.
|
| 730 |
+
4. **Colorbar** — ALWAYS: `plt.colorbar(mesh, ax=ax, label='Units', shrink=0.8)`.
|
| 731 |
+
5. **Cartopy** — Optional: add coastlines, land fill. Graceful fallback if not installed.
|
| 732 |
+
|
| 733 |
+
### Quality Checklist
|
| 734 |
+
- [ ] Figure 12×8 for maps
|
| 735 |
+
- [ ] Colormap matches variable:
|
| 736 |
+
- Temp: `RdYlBu_r` | Wind: `YlOrRd` | Precip: `YlGnBu`
|
| 737 |
+
- Pressure: `viridis` | Cloud: `Greys` | Anomalies: `RdBu_r`
|
| 738 |
+
- [ ] NEVER use `jet`
|
| 739 |
+
- [ ] Colorbar has label with units
|
| 740 |
+
|
| 741 |
+
### Common Pitfalls
|
| 742 |
+
- ⚠️ Diverging cmap on absolute data is misleading — diverging only for anomalies.
|
| 743 |
+
- ⚠️ Missing `shading='auto'` triggers deprecation warning.
|
| 744 |
+
""",
|
| 745 |
+
|
| 746 |
+
"visualization_timeseries": """
|
| 747 |
+
## Time Series Visualization
|
| 748 |
+
|
| 749 |
+
### When to use
|
| 750 |
+
- Temporal evolution of a variable at a point or region
|
| 751 |
+
|
| 752 |
+
### Workflow
|
| 753 |
+
1. **Area average** — `ts = data.mean(dim=['latitude', 'longitude'])` (with lat weighting!).
|
| 754 |
+
2. **Figure** — `fig, ax = plt.subplots(figsize=(10, 6))`.
|
| 755 |
+
3. **Raw line** — `ax.plot(ts.time, ts, linewidth=1.5)`.
|
| 756 |
+
4. **Smoothing** — Add rolling mean overlay with contrasting color.
|
| 757 |
+
5. **Date formatting** — `fig.autofmt_xdate(rotation=30)`.
|
| 758 |
+
|
| 759 |
+
### Quality Checklist
|
| 760 |
+
- [ ] Figure 10×6
|
| 761 |
+
- [ ] Y-axis has explicit units
|
| 762 |
+
- [ ] Legend included if multiple lines
|
| 763 |
+
- [ ] Trend line if requested: dashed with slope annotation
|
| 764 |
+
|
| 765 |
+
### Enhancements
|
| 766 |
+
- **Uncertainty band**: `ax.fill_between(time, mean-std, mean+std, alpha=0.2)`
|
| 767 |
+
- **Event markers**: `ax.axvline(date, color='red', ls='--')`
|
| 768 |
+
- **Twin axis**: `ax2 = ax.twinx()` for second variable
|
| 769 |
+
|
| 770 |
+
### Common Pitfalls
|
| 771 |
+
- ⚠️ Hourly data over 10+ years → unreadable block of ink. Resample to daily first.
|
| 772 |
+
""",
|
| 773 |
+
|
| 774 |
+
"visualization_anomaly_map": """
|
| 775 |
+
## Anomaly Map Visualization
|
| 776 |
+
|
| 777 |
+
### When to use
|
| 778 |
+
- Diverging data: departures, trends, z-scores
|
| 779 |
+
- Any map that has positive AND negative values
|
| 780 |
+
|
| 781 |
+
### Workflow
|
| 782 |
+
1. **Center at zero** — `from matplotlib.colors import TwoSlopeNorm`.
|
| 783 |
+
2. **Norm** — `norm = TwoSlopeNorm(vmin=data.min(), vcenter=0, vmax=data.max())`.
|
| 784 |
+
3. **Plot** — `pcolormesh(..., cmap='RdBu_r', norm=norm)`.
|
| 785 |
+
4. **Stippling** — Overlay significance: `contourf(..., levels=[0, 0.05], hatches=['...'], colors='none')`.
|
| 786 |
+
|
| 787 |
+
### Quality Checklist
|
| 788 |
+
- [ ] Zero is EXACTLY white/neutral in the colorbar
|
| 789 |
+
- [ ] Warm/dry = Red; Cool/wet = Blue
|
| 790 |
+
- [ ] Precip anomalies: consider `BrBG` instead of `RdBu_r`
|
| 791 |
+
|
| 792 |
+
### Common Pitfalls
|
| 793 |
+
- ⚠️ Without `TwoSlopeNorm`, skewed data makes 0 appear colored → reader is misled.
|
| 794 |
+
- ⚠️ Symmetric vmin/vmax (`vmax = max(abs(data))`) can also work but wastes color range.
|
| 795 |
+
""",
|
| 796 |
+
|
| 797 |
+
"visualization_wind": """
|
| 798 |
+
## Wind & Vector Visualization
|
| 799 |
+
|
| 800 |
+
### When to use
|
| 801 |
+
- Circulation patterns, wind fields, quiver/streamline plots
|
| 802 |
+
|
| 803 |
+
### Workflow
|
| 804 |
+
1. **Speed background** — `wspd` with `pcolormesh` + `YlOrRd`.
|
| 805 |
+
2. **Subsample vectors** — `skip = (slice(None, None, 5), slice(None, None, 5))` to avoid solid black.
|
| 806 |
+
3. **Quiver** — `ax.quiver(lons[skip], lats[skip], u[skip], v[skip], color='black')`.
|
| 807 |
+
4. **Alternative** — `ax.streamplot()` for flow visualization (less cluttered).
|
| 808 |
+
|
| 809 |
+
### Quality Checklist
|
| 810 |
+
- [ ] Background heatmap shows magnitude
|
| 811 |
+
- [ ] Vectors sparse enough to be readable
|
| 812 |
+
- [ ] Wind barbs: `ax.barbs()` for meteorological display
|
| 813 |
+
|
| 814 |
+
### Common Pitfalls
|
| 815 |
+
- ⚠️ Full-resolution quiver = completely black, unreadable mess.
|
| 816 |
+
- ⚠️ Check arrow scaling — default autoscale can make light winds invisible.
|
| 817 |
+
|
| 818 |
+
### Interpretation
|
| 819 |
+
- Arrows = direction, background color = magnitude. Cyclonic rotation = storm.
|
| 820 |
+
""",
|
| 821 |
+
|
| 822 |
+
"visualization_comparison": """
|
| 823 |
+
## Multi-Panel Comparison
|
| 824 |
+
|
| 825 |
+
### When to use
|
| 826 |
+
- Before/after, two periods, difference maps
|
| 827 |
+
- Multi-variable side-by-side
|
| 828 |
+
|
| 829 |
+
### Workflow
|
| 830 |
+
1. **Grid** — `fig, axes = plt.subplots(1, 3, figsize=(18, 6))`.
|
| 831 |
+
2. **Panels 1 & 2** — Absolute values with SHARED `vmin`/`vmax`.
|
| 832 |
+
3. **Panel 3** — Difference (A-B) with diverging cmap centered at zero.
|
| 833 |
+
|
| 834 |
+
### Quality Checklist
|
| 835 |
+
- [ ] Panels 1 & 2 share EXACT same vmin/vmax (otherwise visual comparison is invalid)
|
| 836 |
+
- [ ] Panel 3 has its own divergent colorbar centered at zero
|
| 837 |
+
- [ ] Titles clearly label what each panel shows
|
| 838 |
+
|
| 839 |
+
### Common Pitfalls
|
| 840 |
+
- ⚠️ Auto-scaled panels = impossible to compare visually. Always lock limits.
|
| 841 |
+
""",
|
| 842 |
+
|
| 843 |
+
"visualization_profile": """
|
| 844 |
+
## Hovmöller Diagrams
|
| 845 |
+
|
| 846 |
+
### When to use
|
| 847 |
+
- Lat-time or lon-time cross-sections
|
| 848 |
+
- Tracking wave propagation, ITCZ migration, monsoon onset
|
| 849 |
+
|
| 850 |
+
### Workflow
|
| 851 |
+
1. **Average out one dimension** — e.g., average across latitudes to get (lon, time).
|
| 852 |
+
2. **Transpose** — X=Time, Y=Lon/Lat.
|
| 853 |
+
3. **Plot** — `contourf` or `pcolormesh`, figure 12×6.
|
| 854 |
+
|
| 855 |
+
### Quality Checklist
|
| 856 |
+
- [ ] X-axis uses date formatting
|
| 857 |
+
- [ ] Y-axis labels state the averaged geographic slice
|
| 858 |
+
- [ ] Colormap matches variable type
|
| 859 |
+
|
| 860 |
+
### Common Pitfalls
|
| 861 |
+
- ⚠️ Swapping axes makes the diagram unintuitive. Time → X-axis convention.
|
| 862 |
+
|
| 863 |
+
### Interpretation
|
| 864 |
+
- Diagonal banding = propagating waves/systems. Vertical banding = stationary patterns.
|
| 865 |
+
""",
|
| 866 |
+
|
| 867 |
+
"visualization_distribution": """
|
| 868 |
+
## Distribution Visualization
|
| 869 |
+
|
| 870 |
+
### When to use
|
| 871 |
+
- Histograms, PDFs, box plots
|
| 872 |
+
- Comparing two time periods or regions
|
| 873 |
+
|
| 874 |
+
### Workflow
|
| 875 |
+
1. **Flatten** — `.values.flatten()`, drop NaNs.
|
| 876 |
+
2. **Shared bins** — `np.linspace(min, max, 50)`.
|
| 877 |
+
3. **Plot** — `ax.hist(data, bins=bins, alpha=0.5, density=True, label='Period')`.
|
| 878 |
+
4. **Median/mean markers** — Vertical lines with annotation.
|
| 879 |
+
|
| 880 |
+
### Quality Checklist
|
| 881 |
+
- [ ] `density=True` for comparing different-sized samples
|
| 882 |
+
- [ ] `alpha=0.5` for overlapping distributions
|
| 883 |
+
- [ ] Legend when comparing multiple distributions
|
| 884 |
+
|
| 885 |
+
### Common Pitfalls
|
| 886 |
+
- ⚠️ Raw counts (not density) skew comparison between periods with different sample sizes.
|
| 887 |
+
- ⚠️ Too few bins = lost detail. Too many = noisy. 30-50 bins is usually good.
|
| 888 |
+
|
| 889 |
+
### Interpretation
|
| 890 |
+
- Rightward shift = warming. Flatter + wider = more variability = more extremes.
|
| 891 |
+
""",
|
| 892 |
+
|
| 893 |
+
"visualization_animation": """
|
| 894 |
+
## Animated/Sequential Maps
|
| 895 |
+
|
| 896 |
+
### When to use
|
| 897 |
+
- Monthly/seasonal evolution of a field
|
| 898 |
+
- Event lifecycle (genesis → peak → decay)
|
| 899 |
+
|
| 900 |
+
### Workflow
|
| 901 |
+
1. **Global limits** — Find absolute vmin/vmax across ALL timesteps.
|
| 902 |
+
2. **Multi-panel grid** — `fig, axes = plt.subplots(2, 3, figsize=(18, 12))` for 6 timesteps.
|
| 903 |
+
3. **Lock colorbars** — Same vmin/vmax on every panel.
|
| 904 |
+
4. **Shared colorbar** — Remove per-panel colorbars, add one at the bottom.
|
| 905 |
+
|
| 906 |
+
### Quality Checklist
|
| 907 |
+
- [ ] Colorbar limits LOCKED across all panels (no jumping colors)
|
| 908 |
+
- [ ] Timestamps clearly labeled on each panel
|
| 909 |
+
- [ ] Static grid preferred over video (headless environment)
|
| 910 |
+
|
| 911 |
+
### Common Pitfalls
|
| 912 |
+
- ⚠️ Auto-scaled panels flash/jump between frames — always lock limits.
|
| 913 |
+
- ⚠️ MP4/GIF generation may fail in headless — use PNG grids instead.
|
| 914 |
+
""",
|
| 915 |
+
|
| 916 |
+
"visualization_dashboard": """
|
| 917 |
+
## Summary Dashboard
|
| 918 |
+
|
| 919 |
+
### When to use
|
| 920 |
+
- Comprehensive overview: map + time series + statistics in one figure
|
| 921 |
+
- Publication-ready event summaries
|
| 922 |
+
|
| 923 |
+
### Workflow
|
| 924 |
+
1. **Layout** — `fig = plt.figure(figsize=(16, 10))` + `matplotlib.gridspec`.
|
| 925 |
+
2. **Top row** — Large spatial map (anomaly or mean field).
|
| 926 |
+
3. **Bottom left** — Time series of regional mean.
|
| 927 |
+
4. **Bottom right** — Distribution histogram or box plot.
|
| 928 |
+
|
| 929 |
+
### Quality Checklist
|
| 930 |
+
- [ ] `plt.tight_layout()` or `constrained_layout=True` to prevent overlap
|
| 931 |
+
- [ ] Consistent color theme across all panels
|
| 932 |
+
- [ ] Clear panel labels (a, b, c)
|
| 933 |
+
|
| 934 |
+
### Common Pitfalls
|
| 935 |
+
- ⚠️ Cramming too much into small figure → illegible text. Scale figure size up.
|
| 936 |
+
- ⚠️ Different aspect ratios between map and time series need explicit gridspec ratios.
|
| 937 |
+
""",
|
| 938 |
+
|
| 939 |
+
"visualization_contour": """
|
| 940 |
+
## Contour & Isobar Plots
|
| 941 |
+
|
| 942 |
+
### When to use
|
| 943 |
+
- Pressure maps with isobars
|
| 944 |
+
- Temperature isotherms
|
| 945 |
+
- Any smoothly varying field where specific levels matter
|
| 946 |
+
|
| 947 |
+
### Workflow
|
| 948 |
+
1. **Define levels** — `levels = np.arange(990, 1040, 4)` for MSLP isobars.
|
| 949 |
+
2. **Filled contour** — `ax.contourf(lons, lats, data, levels=levels, cmap=...)`.
|
| 950 |
+
3. **Contour lines** — `cs = ax.contour(lons, lats, data, levels=levels, colors='black', linewidths=0.5)`.
|
| 951 |
+
4. **Labels** — `ax.clabel(cs, inline=True, fontsize=8)`.
|
| 952 |
+
|
| 953 |
+
### Quality Checklist
|
| 954 |
+
- [ ] Level spacing is physically meaningful (e.g., 4 hPa for MSLP)
|
| 955 |
+
- [ ] Contour labels don't overlap
|
| 956 |
+
- [ ] Filled + line contours combined for best readability
|
| 957 |
+
|
| 958 |
+
### Common Pitfalls
|
| 959 |
+
- ⚠️ Too many levels → cluttered, unreadable. 10-15 levels max.
|
| 960 |
+
- ⚠️ Non-uniform level spacing requires manual colorbar ticks.
|
| 961 |
+
|
| 962 |
+
### Interpretation
|
| 963 |
+
- Tightly packed isobars = strong pressure gradient = high winds.
|
| 964 |
+
""",
|
| 965 |
+
|
| 966 |
+
"visualization_correlation_map": """
|
| 967 |
+
## Spatial Correlation Maps
|
| 968 |
+
|
| 969 |
+
### When to use
|
| 970 |
+
- Showing where a variable correlates with an index (e.g., ENSO vs global precip)
|
| 971 |
+
- Teleconnection mapping
|
| 972 |
+
|
| 973 |
+
### Workflow
|
| 974 |
+
1. **Compute index** — 1D time series (e.g., Niño3.4 SST anomaly).
|
| 975 |
+
2. **Correlate** — `xr.corr(index, spatial_field, dim='time')` → 2D R-map.
|
| 976 |
+
3. **Significance** — Compute p-values from sample size and R.
|
| 977 |
+
4. **Plot** — Map R values with `RdBu_r` centered at zero. Stipple p < 0.05.
|
| 978 |
+
|
| 979 |
+
### Quality Checklist
|
| 980 |
+
- [ ] Both index and field deseasonalized
|
| 981 |
+
- [ ] R-map centered at zero (TwoSlopeNorm or symmetric limits)
|
| 982 |
+
- [ ] Significant areas stippled or hatched
|
| 983 |
+
- [ ] Sample size ≥30 stated
|
| 984 |
+
|
| 985 |
+
### Common Pitfalls
|
| 986 |
+
- ⚠️ Raw data correlations dominated by shared seasonal cycle.
|
| 987 |
+
- ⚠️ Field significance: many grid points → some will be significant by chance. Apply FDR correction.
|
| 988 |
+
|
| 989 |
+
### Interpretation
|
| 990 |
+
- R > 0: in-phase with index. R < 0: out-of-phase. |R| > 0.5 = strong relationship.
|
| 991 |
+
""",
|
| 992 |
+
|
| 993 |
+
# -------------------------------------------------------------------------
|
| 994 |
+
# MARITIME ANALYSIS
|
| 995 |
+
# -------------------------------------------------------------------------
|
| 996 |
+
"maritime_route": """
|
| 997 |
+
## Maritime Route Risk Analysis
|
| 998 |
+
|
| 999 |
+
### When to use
|
| 1000 |
+
- Analyzing weather risks along calculated shipping lanes
|
| 1001 |
+
- Voyage planning and hazard assessment
|
| 1002 |
+
|
| 1003 |
+
### Workflow
|
| 1004 |
+
1. **Route** — Call `calculate_maritime_route` → waypoints + bounding box.
|
| 1005 |
+
2. **Data** — Download `u10`, `v10` for route bbox, target month, last 3 years.
|
| 1006 |
+
3. **Wind speed** — `wspd = np.sqrt(u10**2 + v10**2)`.
|
| 1007 |
+
4. **Extract** — Loop waypoints: `.sel(lat=lat, lon=lon, method='nearest')`.
|
| 1008 |
+
5. **Risk classify** — Safe (<10), Caution (10-17), Danger (17-24), Extreme (>24 m/s).
|
| 1009 |
+
6. **Statistics** — P95 wind speed at each waypoint, % time in each risk category.
|
| 1010 |
+
|
| 1011 |
+
### Quality Checklist
|
| 1012 |
+
- [ ] Bounding box from route tool used DIRECTLY (don't convert coords)
|
| 1013 |
+
- [ ] 3-year period for climatological context, not just one date
|
| 1014 |
+
- [ ] Risk categories applied at waypoint level
|
| 1015 |
+
|
| 1016 |
+
### Common Pitfalls
|
| 1017 |
+
- ⚠️ Global hourly downloads → timeout. Subset tightly to route bbox.
|
| 1018 |
+
- ⚠️ Don't use bounding box mean — extract AT waypoints for route-specific risk.
|
| 1019 |
+
""",
|
| 1020 |
+
|
| 1021 |
+
"maritime_visualization": """
|
| 1022 |
+
## Maritime Route Risk Visualization
|
| 1023 |
+
|
| 1024 |
+
### When to use
|
| 1025 |
+
- Plotting route risk maps with waypoint-level risk coloring
|
| 1026 |
+
|
| 1027 |
+
### Workflow
|
| 1028 |
+
1. **Background** — Map mean `wspd` with `pcolormesh` + `YlOrRd`.
|
| 1029 |
+
2. **Route line** — Dashed line connecting waypoints.
|
| 1030 |
+
3. **Waypoint scatter** — Color by risk: Green (<10), Amber (10-17), Coral (17-24), Red (>24 m/s).
|
| 1031 |
+
4. **Labels** — "ORIGIN" and "DEST" annotations.
|
| 1032 |
+
5. **Legend** — Custom 4-category legend (mandatory).
|
| 1033 |
+
|
| 1034 |
+
### Quality Checklist
|
| 1035 |
+
- [ ] 4-category risk legend ALWAYS included
|
| 1036 |
+
- [ ] Origin/Destination labeled
|
| 1037 |
+
- [ ] Colormap: `YlOrRd` for wind speed
|
| 1038 |
+
- [ ] Saved to PLOTS_DIR
|
| 1039 |
+
|
| 1040 |
+
### Common Pitfalls
|
| 1041 |
+
- ⚠️ No legend → colored dots are meaningless to the user.
|
| 1042 |
+
- ⚠️ Route line + waypoints must be on top (high zorder) to not be hidden by background.
|
| 1043 |
+
""",
|
| 1044 |
+
}
|
| 1045 |
+
|
| 1046 |
+
|
| 1047 |
+
# =============================================================================
|
| 1048 |
+
# ARGUMENT SCHEMA
|
| 1049 |
+
# =============================================================================
|
| 1050 |
+
|
| 1051 |
+
class AnalysisGuideArgs(BaseModel):
|
| 1052 |
+
"""Arguments for analysis guide retrieval."""
|
| 1053 |
+
|
| 1054 |
+
topic: Literal[
|
| 1055 |
+
# Data operations
|
| 1056 |
+
"load_data",
|
| 1057 |
+
"spatial_subset",
|
| 1058 |
+
"temporal_subset",
|
| 1059 |
+
# Statistical analysis
|
| 1060 |
+
"anomalies",
|
| 1061 |
+
"zscore",
|
| 1062 |
+
"trend_analysis",
|
| 1063 |
+
"eof_analysis",
|
| 1064 |
+
# Advanced analysis
|
| 1065 |
+
"correlation_analysis",
|
| 1066 |
+
"composite_analysis",
|
| 1067 |
+
"diurnal_cycle",
|
| 1068 |
+
"seasonal_decomposition",
|
| 1069 |
+
"spectral_analysis",
|
| 1070 |
+
"spatial_statistics",
|
| 1071 |
+
"multi_variable",
|
| 1072 |
+
"climatology_normals",
|
| 1073 |
+
# Climate indices & extremes
|
| 1074 |
+
"climate_indices",
|
| 1075 |
+
"extremes",
|
| 1076 |
+
"drought_analysis",
|
| 1077 |
+
"heatwave_detection",
|
| 1078 |
+
"atmospheric_rivers",
|
| 1079 |
+
"blocking_events",
|
| 1080 |
+
# Domain-specific
|
| 1081 |
+
"energy_budget",
|
| 1082 |
+
"wind_energy",
|
| 1083 |
+
"moisture_budget",
|
| 1084 |
+
"convective_potential",
|
| 1085 |
+
"snow_cover",
|
| 1086 |
+
# Visualization
|
| 1087 |
+
"visualization_spatial",
|
| 1088 |
+
"visualization_timeseries",
|
| 1089 |
+
"visualization_anomaly_map",
|
| 1090 |
+
"visualization_wind",
|
| 1091 |
+
"visualization_comparison",
|
| 1092 |
+
"visualization_profile",
|
| 1093 |
+
"visualization_distribution",
|
| 1094 |
+
"visualization_animation",
|
| 1095 |
+
"visualization_dashboard",
|
| 1096 |
+
"visualization_contour",
|
| 1097 |
+
"visualization_correlation_map",
|
| 1098 |
+
# Maritime
|
| 1099 |
+
"maritime_route",
|
| 1100 |
+
"maritime_visualization",
|
| 1101 |
+
] = Field(
|
| 1102 |
+
description="Analysis topic to get guidance for"
|
| 1103 |
+
)
|
| 1104 |
+
|
| 1105 |
+
|
| 1106 |
+
# =============================================================================
|
| 1107 |
+
# TOOL FUNCTION
|
| 1108 |
+
# =============================================================================
|
| 1109 |
+
|
| 1110 |
+
def get_analysis_guide(topic: str) -> str:
|
| 1111 |
+
"""
|
| 1112 |
+
Get methodological guidance for climate data analysis.
|
| 1113 |
+
|
| 1114 |
+
Returns text instructions for using python_repl to perform the analysis.
|
| 1115 |
+
"""
|
| 1116 |
+
guide = ANALYSIS_GUIDES.get(topic)
|
| 1117 |
+
|
| 1118 |
+
if not guide:
|
| 1119 |
+
available = ", ".join(sorted(ANALYSIS_GUIDES.keys()))
|
| 1120 |
+
return f"Unknown topic: {topic}. Available: {available}"
|
| 1121 |
+
|
| 1122 |
+
return f"""
|
| 1123 |
+
# Analysis Guide: {topic.replace('_', ' ').title()}
|
| 1124 |
+
|
| 1125 |
+
{guide}
|
| 1126 |
+
|
| 1127 |
+
---
|
| 1128 |
+
Use python_repl to implement this analysis with your downloaded ERA5 data.
|
| 1129 |
+
"""
|
| 1130 |
+
|
| 1131 |
+
|
| 1132 |
+
# =============================================================================
|
| 1133 |
+
# TOOL DEFINITIONS
|
| 1134 |
+
# =============================================================================
|
| 1135 |
+
|
| 1136 |
+
analysis_guide_tool = StructuredTool.from_function(
|
| 1137 |
+
func=get_analysis_guide,
|
| 1138 |
+
name="get_analysis_guide",
|
| 1139 |
+
description="""
|
| 1140 |
+
Get methodological guidance for climate data analysis.
|
| 1141 |
+
|
| 1142 |
+
Returns workflow steps, quality checklists, and pitfall warnings for:
|
| 1143 |
+
- Data: load_data, spatial_subset, temporal_subset
|
| 1144 |
+
- Statistics: anomalies, zscore, trend_analysis, eof_analysis
|
| 1145 |
+
- Advanced: correlation_analysis, composite_analysis, diurnal_cycle,
|
| 1146 |
+
seasonal_decomposition, spectral_analysis, spatial_statistics,
|
| 1147 |
+
multi_variable, climatology_normals
|
| 1148 |
+
- Climate: climate_indices, extremes, drought_analysis, heatwave_detection,
|
| 1149 |
+
atmospheric_rivers, blocking_events
|
| 1150 |
+
- Domain: energy_budget, wind_energy, moisture_budget, convective_potential, snow_cover
|
| 1151 |
+
- Visualization: visualization_spatial, visualization_timeseries,
|
| 1152 |
+
visualization_anomaly_map, visualization_wind, visualization_comparison,
|
| 1153 |
+
visualization_profile, visualization_distribution, visualization_animation,
|
| 1154 |
+
visualization_dashboard, visualization_contour, visualization_correlation_map
|
| 1155 |
+
- Maritime: maritime_route, maritime_visualization
|
| 1156 |
+
|
| 1157 |
+
Use this BEFORE writing analysis code in python_repl.
|
| 1158 |
+
""",
|
| 1159 |
+
args_schema=AnalysisGuideArgs,
|
| 1160 |
+
)
|
| 1161 |
+
|
| 1162 |
+
|
| 1163 |
+
# Visualization guide - alias for backward compatibility
|
| 1164 |
+
visualization_guide_tool = StructuredTool.from_function(
|
| 1165 |
+
func=get_analysis_guide,
|
| 1166 |
+
name="get_visualization_guide",
|
| 1167 |
+
description="""
|
| 1168 |
+
Get publication-grade visualization instructions for ERA5 climate data.
|
| 1169 |
+
|
| 1170 |
+
CALL THIS BEFORE creating any plot to get:
|
| 1171 |
+
- Correct colormap choices
|
| 1172 |
+
- Standard value ranges
|
| 1173 |
+
- Required map elements
|
| 1174 |
+
- Best practices
|
| 1175 |
+
|
| 1176 |
+
Available visualization topics:
|
| 1177 |
+
- visualization_spatial: Maps with proper projections
|
| 1178 |
+
- visualization_timeseries: Time series plots
|
| 1179 |
+
- visualization_anomaly_map: Diverging anomaly maps
|
| 1180 |
+
- visualization_wind: Quiver/streamline plots
|
| 1181 |
+
- visualization_comparison: Multi-panel comparisons
|
| 1182 |
+
- visualization_profile: Hovmöller diagrams
|
| 1183 |
+
- visualization_distribution: Histograms/PDFs
|
| 1184 |
+
- visualization_animation: Sequential map grids
|
| 1185 |
+
- visualization_dashboard: Multi-panel summaries
|
| 1186 |
+
- visualization_contour: Isobar/isotherm plots
|
| 1187 |
+
- visualization_correlation_map: Spatial correlation maps
|
| 1188 |
+
- maritime_visualization: Route risk maps
|
| 1189 |
+
""",
|
| 1190 |
+
args_schema=AnalysisGuideArgs,
|
| 1191 |
+
)
|
src/eurus/tools/era5.py
ADDED
|
@@ -0,0 +1,204 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ERA5 Data Retrieval Tool (Wrapper)
|
| 3 |
+
==================================
|
| 4 |
+
LangChain tool definition. Imports core logic from ..retrieval
|
| 5 |
+
|
| 6 |
+
This is a THIN WRAPPER - all retrieval logic lives in eurus/retrieval.py
|
| 7 |
+
|
| 8 |
+
QUERY_TYPE IS AUTO-DETECTED based on time/area rules:
|
| 9 |
+
- TEMPORAL: time > 1 day AND area < 30°×30°
|
| 10 |
+
- SPATIAL: time ≤ 1 day OR area ≥ 30°×30°
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import logging
|
| 14 |
+
from typing import Optional
|
| 15 |
+
from datetime import datetime
|
| 16 |
+
|
| 17 |
+
from pydantic import BaseModel, Field, field_validator
|
| 18 |
+
from langchain_core.tools import StructuredTool
|
| 19 |
+
|
| 20 |
+
# IMPORT CORE LOGIC FROM RETRIEVAL MODULE - SINGLE SOURCE OF TRUTH
|
| 21 |
+
from ..retrieval import retrieve_era5_data as _retrieve_era5_data
|
| 22 |
+
from ..config import get_short_name
|
| 23 |
+
|
| 24 |
+
logger = logging.getLogger(__name__)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# ============================================================================
|
| 28 |
+
# ARGUMENT SCHEMA (NO query_type - it's auto-detected!)
|
| 29 |
+
# ============================================================================
|
| 30 |
+
|
| 31 |
+
class ERA5RetrievalArgs(BaseModel):
|
| 32 |
+
"""Arguments for ERA5 data retrieval. query_type is AUTO-DETECTED."""
|
| 33 |
+
|
| 34 |
+
variable_id: str = Field(
|
| 35 |
+
description=(
|
| 36 |
+
"ERA5 variable short name. Available variables (22 total):\n"
|
| 37 |
+
"Ocean: sst (Sea Surface Temperature)\n"
|
| 38 |
+
"Temperature: t2 (2m Air Temp), d2 (2m Dewpoint), skt (Skin Temp)\n"
|
| 39 |
+
"Wind 10m: u10 (Eastward), v10 (Northward)\n"
|
| 40 |
+
"Wind 100m: u100 (Eastward), v100 (Northward)\n"
|
| 41 |
+
"Pressure: sp (Surface), mslp (Mean Sea Level)\n"
|
| 42 |
+
"Boundary Layer: blh (BL Height), cape (CAPE)\n"
|
| 43 |
+
"Cloud/Precip: tcc (Cloud Cover), cp (Convective), lsp (Large-scale), tp (Total Precip)\n"
|
| 44 |
+
"Radiation: ssr (Net Solar), ssrd (Solar Downwards)\n"
|
| 45 |
+
"Moisture: tcw (Total Column Water), tcwv (Water Vapour)\n"
|
| 46 |
+
"Land: sd (Snow Depth), stl1 (Soil Temp L1), swvl1 (Soil Water L1)"
|
| 47 |
+
)
|
| 48 |
+
)
|
| 49 |
+
|
| 50 |
+
start_date: str = Field(
|
| 51 |
+
description="Start date in YYYY-MM-DD format (e.g., '2021-02-01')"
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
end_date: str = Field(
|
| 55 |
+
description="End date in YYYY-MM-DD format (e.g., '2023-02-28')"
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
min_latitude: float = Field(
|
| 59 |
+
ge=-90.0, le=90.0,
|
| 60 |
+
description="Southern latitude bound (-90 to 90)"
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
max_latitude: float = Field(
|
| 64 |
+
ge=-90.0, le=90.0,
|
| 65 |
+
description="Northern latitude bound (-90 to 90)"
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
min_longitude: float = Field(
|
| 69 |
+
ge=-180.0, le=360.0,
|
| 70 |
+
description="Western longitude bound. Use -180 to 180 for Europe/Atlantic."
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
max_longitude: float = Field(
|
| 74 |
+
ge=-180.0, le=360.0,
|
| 75 |
+
description="Eastern longitude bound. Use -180 to 180 for Europe/Atlantic."
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
region: Optional[str] = Field(
|
| 79 |
+
default=None,
|
| 80 |
+
description=(
|
| 81 |
+
"Optional predefined region (overrides lat/lon if specified):\n"
|
| 82 |
+
"north_atlantic, mediterranean, nino34, global"
|
| 83 |
+
)
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
+
@field_validator('start_date', 'end_date')
|
| 87 |
+
@classmethod
|
| 88 |
+
def validate_date_format(cls, v: str) -> str:
|
| 89 |
+
try:
|
| 90 |
+
datetime.strptime(v, '%Y-%m-%d')
|
| 91 |
+
except ValueError:
|
| 92 |
+
raise ValueError(f"Date must be in YYYY-MM-DD format, got: {v}")
|
| 93 |
+
return v
|
| 94 |
+
|
| 95 |
+
@field_validator('variable_id')
|
| 96 |
+
@classmethod
|
| 97 |
+
def validate_variable(cls, v: str) -> str:
|
| 98 |
+
from ..config import get_all_short_names
|
| 99 |
+
short_name = get_short_name(v)
|
| 100 |
+
valid_vars = get_all_short_names() # DRY: use config as single source of truth
|
| 101 |
+
if short_name not in valid_vars:
|
| 102 |
+
logger.warning(f"Variable '{v}' may not be available. Will attempt anyway.")
|
| 103 |
+
return v
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
# ============================================================================
|
| 107 |
+
# AUTO-DETECT QUERY TYPE
|
| 108 |
+
# ============================================================================
|
| 109 |
+
|
| 110 |
+
def _auto_detect_query_type(
|
| 111 |
+
start_date: str,
|
| 112 |
+
end_date: str,
|
| 113 |
+
min_lat: float,
|
| 114 |
+
max_lat: float,
|
| 115 |
+
min_lon: float,
|
| 116 |
+
max_lon: float
|
| 117 |
+
) -> str:
|
| 118 |
+
"""
|
| 119 |
+
Auto-detect optimal query_type based on time/area rules.
|
| 120 |
+
|
| 121 |
+
RULES:
|
| 122 |
+
- TEMPORAL: time > 1 day AND area < 30°×30° (900 sq degrees)
|
| 123 |
+
- SPATIAL: time ≤ 1 day OR area ≥ 30°×30°
|
| 124 |
+
"""
|
| 125 |
+
# Calculate time span in days
|
| 126 |
+
start = datetime.strptime(start_date, '%Y-%m-%d')
|
| 127 |
+
end = datetime.strptime(end_date, '%Y-%m-%d')
|
| 128 |
+
time_days = (end - start).days + 1 # inclusive
|
| 129 |
+
|
| 130 |
+
# Calculate area in square degrees
|
| 131 |
+
lat_span = abs(max_lat - min_lat)
|
| 132 |
+
lon_span = abs(max_lon - min_lon)
|
| 133 |
+
area = lat_span * lon_span
|
| 134 |
+
|
| 135 |
+
# Decision logic
|
| 136 |
+
if time_days > 1 and area < 900:
|
| 137 |
+
query_type = "temporal"
|
| 138 |
+
else:
|
| 139 |
+
query_type = "spatial"
|
| 140 |
+
|
| 141 |
+
logger.info(f"Auto-detected query_type: {query_type} "
|
| 142 |
+
f"(time={time_days}d, area={area:.0f}sq°)")
|
| 143 |
+
|
| 144 |
+
return query_type
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# ============================================================================
|
| 148 |
+
# WRAPPER FUNCTION (auto-adds query_type)
|
| 149 |
+
# ============================================================================
|
| 150 |
+
|
| 151 |
+
def retrieve_era5_data(
|
| 152 |
+
variable_id: str,
|
| 153 |
+
start_date: str,
|
| 154 |
+
end_date: str,
|
| 155 |
+
min_latitude: float,
|
| 156 |
+
max_latitude: float,
|
| 157 |
+
min_longitude: float,
|
| 158 |
+
max_longitude: float,
|
| 159 |
+
region: Optional[str] = None
|
| 160 |
+
) -> str:
|
| 161 |
+
"""
|
| 162 |
+
Wrapper that auto-detects query_type and calls the real retrieval function.
|
| 163 |
+
"""
|
| 164 |
+
# Auto-detect query type
|
| 165 |
+
query_type = _auto_detect_query_type(
|
| 166 |
+
start_date, end_date,
|
| 167 |
+
min_latitude, max_latitude,
|
| 168 |
+
min_longitude, max_longitude
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
# Call the real retrieval function
|
| 172 |
+
return _retrieve_era5_data(
|
| 173 |
+
query_type=query_type,
|
| 174 |
+
variable_id=variable_id,
|
| 175 |
+
start_date=start_date,
|
| 176 |
+
end_date=end_date,
|
| 177 |
+
min_latitude=min_latitude,
|
| 178 |
+
max_latitude=max_latitude,
|
| 179 |
+
min_longitude=min_longitude,
|
| 180 |
+
max_longitude=max_longitude,
|
| 181 |
+
region=region
|
| 182 |
+
)
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# ============================================================================
|
| 186 |
+
# LANGCHAIN TOOL CREATION
|
| 187 |
+
# ============================================================================
|
| 188 |
+
|
| 189 |
+
era5_tool = StructuredTool.from_function(
|
| 190 |
+
func=retrieve_era5_data,
|
| 191 |
+
name="retrieve_era5_data",
|
| 192 |
+
description=(
|
| 193 |
+
"Retrieves ERA5 climate reanalysis data from Earthmover's cloud archive.\n\n"
|
| 194 |
+
"⚠️ query_type is AUTO-DETECTED - you don't need to specify it!\n\n"
|
| 195 |
+
"Just provide:\n"
|
| 196 |
+
"- variable_id: one of 22 ERA5 variables (sst, t2, d2, skt, u10, v10, u100, v100, "
|
| 197 |
+
"sp, mslp, blh, cape, tcc, cp, lsp, tp, ssr, ssrd, tcw, tcwv, sd, stl1, swvl1)\n"
|
| 198 |
+
"- start_date, end_date: YYYY-MM-DD format\n"
|
| 199 |
+
"- lat/lon bounds: Use values from maritime route bounding box!\n\n"
|
| 200 |
+
"DATA: 1975-2024.\n"
|
| 201 |
+
"Returns file path. Load with: xr.open_zarr('PATH')"
|
| 202 |
+
),
|
| 203 |
+
args_schema=ERA5RetrievalArgs
|
| 204 |
+
)
|
src/eurus/tools/repl.py
ADDED
|
@@ -0,0 +1,564 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Superb Python REPL Tool
|
| 3 |
+
=======================
|
| 4 |
+
A persistent Python execution environment for the agent.
|
| 5 |
+
Uses a SUBPROCESS for true process isolation — can be cleanly killed on timeout.
|
| 6 |
+
|
| 7 |
+
PLOT CAPTURE: When running in web mode, plots are captured via callback.
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import sys
|
| 11 |
+
import io
|
| 12 |
+
import json
|
| 13 |
+
import logging
|
| 14 |
+
import gc
|
| 15 |
+
import os
|
| 16 |
+
import re
|
| 17 |
+
import base64
|
| 18 |
+
import tempfile
|
| 19 |
+
import subprocess
|
| 20 |
+
import threading
|
| 21 |
+
import traceback
|
| 22 |
+
import matplotlib
|
| 23 |
+
# Force non-interactive backend to prevent crashes on headless servers
|
| 24 |
+
matplotlib.use('Agg')
|
| 25 |
+
import matplotlib.pyplot as plt
|
| 26 |
+
import matplotlib.colors as mcolors # Pre-import for custom colormaps
|
| 27 |
+
|
| 28 |
+
logger = logging.getLogger(__name__)
|
| 29 |
+
import matplotlib.cm as cm # Pre-import for colormap access
|
| 30 |
+
|
| 31 |
+
# =============================================================================
|
| 32 |
+
# PUBLICATION-GRADE LIGHT THEME (white background for academic papers)
|
| 33 |
+
# =============================================================================
|
| 34 |
+
_EURUS_STYLE = {
|
| 35 |
+
# ── Figure ──
|
| 36 |
+
"figure.figsize": (10, 6),
|
| 37 |
+
"figure.dpi": 150,
|
| 38 |
+
"figure.facecolor": "white",
|
| 39 |
+
"figure.edgecolor": "white",
|
| 40 |
+
"savefig.facecolor": "white",
|
| 41 |
+
"savefig.edgecolor": "white",
|
| 42 |
+
"savefig.dpi": 300, # 300 DPI for print-quality
|
| 43 |
+
"savefig.bbox": "tight",
|
| 44 |
+
"savefig.pad_inches": 0.15,
|
| 45 |
+
# ── Axes ──
|
| 46 |
+
"axes.facecolor": "white",
|
| 47 |
+
"axes.edgecolor": "#333333",
|
| 48 |
+
"axes.labelcolor": "#1a1a1a",
|
| 49 |
+
"axes.titlecolor": "#000000",
|
| 50 |
+
"axes.labelsize": 12,
|
| 51 |
+
"axes.titlesize": 14,
|
| 52 |
+
"axes.titleweight": "bold",
|
| 53 |
+
"axes.titlepad": 12,
|
| 54 |
+
"axes.grid": True,
|
| 55 |
+
"axes.spines.top": False,
|
| 56 |
+
"axes.spines.right": False,
|
| 57 |
+
"axes.linewidth": 0.8,
|
| 58 |
+
# ── Grid ──
|
| 59 |
+
"grid.color": "#d0d0d0",
|
| 60 |
+
"grid.alpha": 0.5,
|
| 61 |
+
"grid.linewidth": 0.5,
|
| 62 |
+
"grid.linestyle": "--",
|
| 63 |
+
# ── Ticks ──
|
| 64 |
+
"xtick.color": "#333333",
|
| 65 |
+
"ytick.color": "#333333",
|
| 66 |
+
"xtick.labelsize": 10,
|
| 67 |
+
"ytick.labelsize": 10,
|
| 68 |
+
"xtick.direction": "out",
|
| 69 |
+
"ytick.direction": "out",
|
| 70 |
+
# ── Text ──
|
| 71 |
+
"text.color": "#1a1a1a",
|
| 72 |
+
"font.family": "sans-serif",
|
| 73 |
+
"font.sans-serif": ["DejaVu Sans", "Arial", "Helvetica"],
|
| 74 |
+
"font.size": 11,
|
| 75 |
+
# ── Lines ──
|
| 76 |
+
"lines.linewidth": 1.8,
|
| 77 |
+
"lines.antialiased": True,
|
| 78 |
+
"lines.markersize": 5,
|
| 79 |
+
# ── Legend ──
|
| 80 |
+
"legend.facecolor": "white",
|
| 81 |
+
"legend.edgecolor": "#cccccc",
|
| 82 |
+
"legend.fontsize": 10,
|
| 83 |
+
"legend.framealpha": 0.95,
|
| 84 |
+
"legend.shadow": False,
|
| 85 |
+
# ── Colorbar ──
|
| 86 |
+
"image.cmap": "viridis",
|
| 87 |
+
# ── Patches ──
|
| 88 |
+
"patch.edgecolor": "#333333",
|
| 89 |
+
}
|
| 90 |
+
matplotlib.rcParams.update(_EURUS_STYLE)
|
| 91 |
+
|
| 92 |
+
# Curated color cycle for white backgrounds (high-contrast, publication-safe)
|
| 93 |
+
_EURUS_COLORS = [
|
| 94 |
+
"#1f77b4", # steel blue
|
| 95 |
+
"#d62728", # brick red
|
| 96 |
+
"#2ca02c", # forest green
|
| 97 |
+
"#ff7f0e", # orange
|
| 98 |
+
"#9467bd", # muted purple
|
| 99 |
+
"#17becf", # cyan
|
| 100 |
+
"#e377c2", # pink
|
| 101 |
+
"#8c564b", # brown
|
| 102 |
+
]
|
| 103 |
+
matplotlib.rcParams["axes.prop_cycle"] = matplotlib.cycler(color=_EURUS_COLORS)
|
| 104 |
+
|
| 105 |
+
from typing import Dict, Optional, Type, Callable
|
| 106 |
+
from pathlib import Path
|
| 107 |
+
from pydantic import BaseModel, Field
|
| 108 |
+
from langchain_core.tools import BaseTool
|
| 109 |
+
|
| 110 |
+
# Import PLOTS_DIR for correct plot saving location
|
| 111 |
+
from eurus.config import PLOTS_DIR
|
| 112 |
+
|
| 113 |
+
# Pre-import common scientific libraries for convenience (parent-side only)
|
| 114 |
+
import pandas as pd
|
| 115 |
+
import numpy as np
|
| 116 |
+
import xarray as xr
|
| 117 |
+
from datetime import datetime, timedelta
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
# =============================================================================
|
| 122 |
+
# PERSISTENT SUBPROCESS REPL
|
| 123 |
+
# =============================================================================
|
| 124 |
+
|
| 125 |
+
# The Python script that runs inside the subprocess.
|
| 126 |
+
# It receives JSON commands on stdin and sends JSON responses on stdout.
|
| 127 |
+
_SUBPROCESS_SCRIPT = r'''
|
| 128 |
+
import sys
|
| 129 |
+
import os
|
| 130 |
+
import json
|
| 131 |
+
import gc
|
| 132 |
+
from io import StringIO
|
| 133 |
+
|
| 134 |
+
# Apply Eurus matplotlib style INSIDE the subprocess
|
| 135 |
+
import matplotlib
|
| 136 |
+
matplotlib.use("Agg")
|
| 137 |
+
import matplotlib.pyplot as plt
|
| 138 |
+
import matplotlib.colors as mcolors
|
| 139 |
+
import matplotlib.cm as cm
|
| 140 |
+
|
| 141 |
+
_style = json.loads(os.environ.get("EURUS_MPL_STYLE", "{}"))
|
| 142 |
+
if _style:
|
| 143 |
+
matplotlib.rcParams.update(_style)
|
| 144 |
+
_colors = json.loads(os.environ.get("EURUS_MPL_COLORS", "[]"))
|
| 145 |
+
if _colors:
|
| 146 |
+
matplotlib.rcParams["axes.prop_cycle"] = matplotlib.cycler(color=_colors)
|
| 147 |
+
|
| 148 |
+
# Pre-import scientific stack
|
| 149 |
+
import pandas as pd
|
| 150 |
+
import numpy as np
|
| 151 |
+
import xarray as xr
|
| 152 |
+
from datetime import datetime, timedelta
|
| 153 |
+
|
| 154 |
+
# Set up execution globals with pre-loaded libraries
|
| 155 |
+
exec_globals = {
|
| 156 |
+
"__builtins__": __builtins__,
|
| 157 |
+
"pd": pd,
|
| 158 |
+
"np": np,
|
| 159 |
+
"xr": xr,
|
| 160 |
+
"plt": plt,
|
| 161 |
+
"mcolors": mcolors,
|
| 162 |
+
"cm": cm,
|
| 163 |
+
"datetime": datetime,
|
| 164 |
+
"timedelta": timedelta,
|
| 165 |
+
"PLOTS_DIR": os.environ.get("EURUS_PLOTS_DIR", "plots"),
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
# Signal readiness
|
| 169 |
+
print("SUBPROCESS_READY", flush=True)
|
| 170 |
+
|
| 171 |
+
while True:
|
| 172 |
+
try:
|
| 173 |
+
line = input()
|
| 174 |
+
if line == "EXIT_SUBPROCESS":
|
| 175 |
+
break
|
| 176 |
+
|
| 177 |
+
cmd = json.loads(line)
|
| 178 |
+
|
| 179 |
+
if cmd["type"] == "exec":
|
| 180 |
+
code = cmd["code"]
|
| 181 |
+
|
| 182 |
+
stdout_capture = StringIO()
|
| 183 |
+
stderr_capture = StringIO()
|
| 184 |
+
old_stdout, old_stderr = sys.stdout, sys.stderr
|
| 185 |
+
|
| 186 |
+
try:
|
| 187 |
+
sys.stdout = stdout_capture
|
| 188 |
+
sys.stderr = stderr_capture
|
| 189 |
+
|
| 190 |
+
# Try eval first (expression mode), fall back to exec
|
| 191 |
+
try:
|
| 192 |
+
compiled = compile(code, "<repl>", "eval")
|
| 193 |
+
result = eval(compiled, exec_globals)
|
| 194 |
+
output = stdout_capture.getvalue()
|
| 195 |
+
if result is not None:
|
| 196 |
+
output += repr(result)
|
| 197 |
+
if not output.strip():
|
| 198 |
+
output = repr(result) if result is not None else "(No output)"
|
| 199 |
+
except SyntaxError:
|
| 200 |
+
# Jupyter-style: auto-print last expression in multi-line code
|
| 201 |
+
import ast as _ast
|
| 202 |
+
try:
|
| 203 |
+
tree = _ast.parse(code)
|
| 204 |
+
if tree.body and isinstance(tree.body[-1], _ast.Expr):
|
| 205 |
+
# Separate the last expression from preceding stmts
|
| 206 |
+
last_expr_node = tree.body.pop()
|
| 207 |
+
if tree.body:
|
| 208 |
+
exec(compile(_ast.Module(body=tree.body, type_ignores=[]), "<repl>", "exec"), exec_globals)
|
| 209 |
+
result = eval(compile(_ast.Expression(body=last_expr_node.value), "<repl>", "eval"), exec_globals)
|
| 210 |
+
output = stdout_capture.getvalue()
|
| 211 |
+
if result is not None:
|
| 212 |
+
output += repr(result) if not output.strip() else "\n" + repr(result)
|
| 213 |
+
else:
|
| 214 |
+
exec(code, exec_globals)
|
| 215 |
+
output = stdout_capture.getvalue()
|
| 216 |
+
except SyntaxError:
|
| 217 |
+
exec(code, exec_globals)
|
| 218 |
+
output = stdout_capture.getvalue()
|
| 219 |
+
if not output.strip():
|
| 220 |
+
output = "(Executed successfully. Use print() to see results.)"
|
| 221 |
+
|
| 222 |
+
sys.stdout, sys.stderr = old_stdout, old_stderr
|
| 223 |
+
result_json = {
|
| 224 |
+
"status": "success",
|
| 225 |
+
"stdout": output.strip(),
|
| 226 |
+
"stderr": stderr_capture.getvalue(),
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
except Exception as e:
|
| 230 |
+
sys.stdout, sys.stderr = old_stdout, old_stderr
|
| 231 |
+
import traceback
|
| 232 |
+
result_json = {
|
| 233 |
+
"status": "error",
|
| 234 |
+
"error": f"Error: {str(e)}\n{traceback.format_exc()}",
|
| 235 |
+
"stdout": stdout_capture.getvalue(),
|
| 236 |
+
"stderr": stderr_capture.getvalue(),
|
| 237 |
+
}
|
| 238 |
+
finally:
|
| 239 |
+
plt.close("all")
|
| 240 |
+
gc.collect()
|
| 241 |
+
|
| 242 |
+
print(json.dumps(result_json), flush=True)
|
| 243 |
+
|
| 244 |
+
except EOFError:
|
| 245 |
+
break
|
| 246 |
+
except Exception as e:
|
| 247 |
+
# Fatal error in the communication loop itself
|
| 248 |
+
old_stdout = sys.__stdout__
|
| 249 |
+
sys.stdout = old_stdout
|
| 250 |
+
print(json.dumps({"status": "fatal", "error": str(e)}), flush=True)
|
| 251 |
+
'''
|
| 252 |
+
|
| 253 |
+
|
| 254 |
+
class PersistentREPL:
|
| 255 |
+
"""
|
| 256 |
+
Manages a persistent Python subprocess for code execution.
|
| 257 |
+
Provides true process isolation with clean kill on timeout.
|
| 258 |
+
"""
|
| 259 |
+
|
| 260 |
+
def __init__(self, working_dir: str = "."):
|
| 261 |
+
self._working_dir = working_dir
|
| 262 |
+
self._process: Optional[subprocess.Popen] = None
|
| 263 |
+
self._temp_script: Optional[str] = None
|
| 264 |
+
self._lock = threading.Lock() # Serialize access per instance
|
| 265 |
+
self._start_subprocess()
|
| 266 |
+
|
| 267 |
+
def _start_subprocess(self):
|
| 268 |
+
"""Start a new Python subprocess with Eurus environment."""
|
| 269 |
+
# Write the subprocess script to a temp file
|
| 270 |
+
with tempfile.NamedTemporaryFile(
|
| 271 |
+
mode="w", suffix=".py", delete=False, prefix="eurus_repl_"
|
| 272 |
+
) as f:
|
| 273 |
+
f.write(_SUBPROCESS_SCRIPT)
|
| 274 |
+
self._temp_script = f.name
|
| 275 |
+
|
| 276 |
+
# Build env: inject matplotlib style + PLOTS_DIR
|
| 277 |
+
env = os.environ.copy()
|
| 278 |
+
env["EURUS_MPL_STYLE"] = json.dumps(
|
| 279 |
+
{k: v for k, v in _EURUS_STYLE.items() if isinstance(v, (int, float, str, bool))}
|
| 280 |
+
)
|
| 281 |
+
env["EURUS_MPL_COLORS"] = json.dumps(_EURUS_COLORS)
|
| 282 |
+
env["EURUS_PLOTS_DIR"] = str(PLOTS_DIR)
|
| 283 |
+
env["MPLBACKEND"] = "Agg"
|
| 284 |
+
env["PYTHONUNBUFFERED"] = "1"
|
| 285 |
+
|
| 286 |
+
self._process = subprocess.Popen(
|
| 287 |
+
[sys.executable, "-u", self._temp_script],
|
| 288 |
+
stdin=subprocess.PIPE,
|
| 289 |
+
stdout=subprocess.PIPE,
|
| 290 |
+
stderr=subprocess.PIPE,
|
| 291 |
+
text=True,
|
| 292 |
+
bufsize=0,
|
| 293 |
+
cwd=self._working_dir if os.path.isdir(self._working_dir) else None,
|
| 294 |
+
env=env,
|
| 295 |
+
)
|
| 296 |
+
|
| 297 |
+
# Wait for ready signal
|
| 298 |
+
ready_line = self._process.stdout.readline()
|
| 299 |
+
if "SUBPROCESS_READY" not in ready_line:
|
| 300 |
+
raise RuntimeError(f"Subprocess failed to start: {ready_line!r}")
|
| 301 |
+
|
| 302 |
+
logger.info("Started REPL subprocess (PID: %d)", self._process.pid)
|
| 303 |
+
|
| 304 |
+
def _ensure_alive(self):
|
| 305 |
+
"""Restart subprocess if it has died."""
|
| 306 |
+
if self._process is None or self._process.poll() is not None:
|
| 307 |
+
logger.warning("REPL subprocess died — restarting")
|
| 308 |
+
self._cleanup_process()
|
| 309 |
+
self._start_subprocess()
|
| 310 |
+
|
| 311 |
+
def run(self, code: str, timeout: int = 300) -> str:
|
| 312 |
+
"""Execute code in the subprocess. Returns output string."""
|
| 313 |
+
with self._lock:
|
| 314 |
+
self._ensure_alive()
|
| 315 |
+
|
| 316 |
+
cmd = json.dumps({"type": "exec", "code": code}) + "\n"
|
| 317 |
+
try:
|
| 318 |
+
self._process.stdin.write(cmd)
|
| 319 |
+
self._process.stdin.flush()
|
| 320 |
+
except (BrokenPipeError, OSError) as e:
|
| 321 |
+
logger.error("Subprocess stdin broken: %s — restarting", e)
|
| 322 |
+
self._cleanup_process()
|
| 323 |
+
self._start_subprocess()
|
| 324 |
+
return f"Error: REPL subprocess crashed. Please re-run your code."
|
| 325 |
+
|
| 326 |
+
# Read response with timeout
|
| 327 |
+
result_line = self._read_with_timeout(timeout)
|
| 328 |
+
|
| 329 |
+
if result_line is None:
|
| 330 |
+
# Timeout — kill subprocess and restart
|
| 331 |
+
logger.warning("REPL execution timed out after %ds — killing subprocess", timeout)
|
| 332 |
+
self._kill_subprocess()
|
| 333 |
+
self._start_subprocess()
|
| 334 |
+
return (
|
| 335 |
+
"TIMEOUT ERROR: Execution exceeded "
|
| 336 |
+
f"{timeout} seconds ({timeout // 60} min). "
|
| 337 |
+
"TIP: Resample data to daily/monthly before plotting "
|
| 338 |
+
"(e.g., ds.resample(time='D').mean())."
|
| 339 |
+
)
|
| 340 |
+
|
| 341 |
+
try:
|
| 342 |
+
result = json.loads(result_line)
|
| 343 |
+
except json.JSONDecodeError:
|
| 344 |
+
return f"Error: Malformed response from subprocess: {result_line!r}"
|
| 345 |
+
|
| 346 |
+
if result["status"] == "success":
|
| 347 |
+
output = result.get("stdout", "")
|
| 348 |
+
stderr = result.get("stderr", "")
|
| 349 |
+
if stderr:
|
| 350 |
+
output = f"{output}\n{stderr}" if output else stderr
|
| 351 |
+
return output or "(No output)"
|
| 352 |
+
elif result["status"] == "error":
|
| 353 |
+
return result.get("error", "Unknown error")
|
| 354 |
+
else:
|
| 355 |
+
return f"Fatal subprocess error: {result.get('error', 'Unknown')}"
|
| 356 |
+
|
| 357 |
+
def _read_with_timeout(self, timeout: int) -> Optional[str]:
|
| 358 |
+
"""Read one line from subprocess stdout with a timeout."""
|
| 359 |
+
result = [None]
|
| 360 |
+
|
| 361 |
+
def _reader():
|
| 362 |
+
try:
|
| 363 |
+
result[0] = self._process.stdout.readline()
|
| 364 |
+
except Exception:
|
| 365 |
+
pass
|
| 366 |
+
|
| 367 |
+
reader_thread = threading.Thread(target=_reader, daemon=True)
|
| 368 |
+
reader_thread.start()
|
| 369 |
+
reader_thread.join(timeout=timeout)
|
| 370 |
+
|
| 371 |
+
if reader_thread.is_alive():
|
| 372 |
+
return None # Timed out
|
| 373 |
+
return result[0] if result[0] else None
|
| 374 |
+
|
| 375 |
+
def _kill_subprocess(self):
|
| 376 |
+
"""Force-kill the subprocess."""
|
| 377 |
+
if self._process:
|
| 378 |
+
try:
|
| 379 |
+
self._process.terminate()
|
| 380 |
+
try:
|
| 381 |
+
self._process.wait(timeout=3)
|
| 382 |
+
except subprocess.TimeoutExpired:
|
| 383 |
+
self._process.kill()
|
| 384 |
+
self._process.wait(timeout=2)
|
| 385 |
+
except Exception as e:
|
| 386 |
+
logger.error("Error killing subprocess: %s", e)
|
| 387 |
+
self._process = None
|
| 388 |
+
|
| 389 |
+
def _cleanup_process(self):
|
| 390 |
+
"""Clean up subprocess and temp files."""
|
| 391 |
+
self._kill_subprocess()
|
| 392 |
+
if self._temp_script and os.path.exists(self._temp_script):
|
| 393 |
+
try:
|
| 394 |
+
os.unlink(self._temp_script)
|
| 395 |
+
except OSError:
|
| 396 |
+
pass
|
| 397 |
+
self._temp_script = None
|
| 398 |
+
|
| 399 |
+
def _update_plots_dir(self, plots_dir: str):
|
| 400 |
+
"""Update the PLOTS_DIR used by the subprocess."""
|
| 401 |
+
if self._process and self._process.poll() is None:
|
| 402 |
+
try:
|
| 403 |
+
# Send a command to update the plots directory in the subprocess
|
| 404 |
+
cmd = f"import os; os.environ['EURUS_PLOTS_DIR'] = {plots_dir!r}; PLOTS_DIR = {plots_dir!r}\n"
|
| 405 |
+
self._process.stdin.write(cmd)
|
| 406 |
+
self._process.stdin.flush()
|
| 407 |
+
# Clear the response
|
| 408 |
+
self._read_response(timeout=2)
|
| 409 |
+
except Exception as e:
|
| 410 |
+
logger.warning("Failed to update plots_dir in subprocess: %s", e)
|
| 411 |
+
|
| 412 |
+
def close(self):
|
| 413 |
+
"""Gracefully shutdown the subprocess."""
|
| 414 |
+
if self._process and self._process.poll() is None:
|
| 415 |
+
try:
|
| 416 |
+
self._process.stdin.write("EXIT_SUBPROCESS\n")
|
| 417 |
+
self._process.stdin.flush()
|
| 418 |
+
self._process.wait(timeout=3)
|
| 419 |
+
logger.info("REPL subprocess exited gracefully (PID: %d)", self._process.pid)
|
| 420 |
+
except Exception:
|
| 421 |
+
self._kill_subprocess()
|
| 422 |
+
self._cleanup_process()
|
| 423 |
+
|
| 424 |
+
|
| 425 |
+
# =============================================================================
|
| 426 |
+
# LANGCHAIN TOOL
|
| 427 |
+
# =============================================================================
|
| 428 |
+
|
| 429 |
+
class PythonREPLInput(BaseModel):
|
| 430 |
+
code: str = Field(description="The Python code to execute.")
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
class PythonREPLTool(BaseTool):
|
| 434 |
+
name: str = "python_repl"
|
| 435 |
+
description: str = (
|
| 436 |
+
"A Python REPL for data analysis and visualization.\n\n"
|
| 437 |
+
"CRITICAL PLOTTING RULES:\n"
|
| 438 |
+
"1. ALWAYS save to PLOTS_DIR: plt.savefig(f'{PLOTS_DIR}/filename.png')\n"
|
| 439 |
+
"2. Use descriptive filenames (e.g., 'route_risk_map.png')\n"
|
| 440 |
+
"\n\n"
|
| 441 |
+
"MEMORY RULES:\n"
|
| 442 |
+
"1. NEVER use .load() or .compute() on large datasets\n"
|
| 443 |
+
"2. Resample multi-year data first: ds.resample(time='D').mean()\n"
|
| 444 |
+
"3. Use .sel() to subset data before operations\n\n"
|
| 445 |
+
"Pre-loaded: pd, np, xr, plt, mcolors, cm, datetime, timedelta, PLOTS_DIR (string path)"
|
| 446 |
+
)
|
| 447 |
+
args_schema: Type[BaseModel] = PythonREPLInput
|
| 448 |
+
working_dir: str = "."
|
| 449 |
+
_repl: Optional[PersistentREPL] = None
|
| 450 |
+
_plot_callback: Optional[Callable] = None # For web interface
|
| 451 |
+
_displayed_plots: set = set()
|
| 452 |
+
_plots_dir: Optional[str] = None # Session-specific plot directory
|
| 453 |
+
|
| 454 |
+
def __init__(self, working_dir: str = ".", plots_dir: Optional[str] = None, **kwargs):
|
| 455 |
+
super().__init__(**kwargs)
|
| 456 |
+
self.working_dir = working_dir
|
| 457 |
+
self._plot_callback = None
|
| 458 |
+
self._displayed_plots = set()
|
| 459 |
+
self._plots_dir = plots_dir or str(PLOTS_DIR)
|
| 460 |
+
# Ensure the plots directory exists
|
| 461 |
+
Path(self._plots_dir).mkdir(parents=True, exist_ok=True)
|
| 462 |
+
self._repl = PersistentREPL(working_dir=working_dir)
|
| 463 |
+
# Override the subprocess PLOTS_DIR env var to use session-specific dir
|
| 464 |
+
if plots_dir:
|
| 465 |
+
self._repl._update_plots_dir(plots_dir)
|
| 466 |
+
|
| 467 |
+
def set_plot_callback(self, callback: Callable):
|
| 468 |
+
"""Set callback for plot capture (used by web interface)."""
|
| 469 |
+
self._plot_callback = callback
|
| 470 |
+
|
| 471 |
+
def close(self):
|
| 472 |
+
"""Clean up subprocess resources."""
|
| 473 |
+
if self._repl:
|
| 474 |
+
self._repl.close()
|
| 475 |
+
self._repl = None
|
| 476 |
+
|
| 477 |
+
def _display_image_in_terminal(self, filepath: str, base64_data: str):
|
| 478 |
+
"""Display image in terminal — iTerm2/VSCode inline, or macOS Preview fallback."""
|
| 479 |
+
# Skip if already displayed this file in this session
|
| 480 |
+
if filepath in self._displayed_plots:
|
| 481 |
+
return
|
| 482 |
+
self._displayed_plots.add(filepath)
|
| 483 |
+
|
| 484 |
+
try:
|
| 485 |
+
term_program = os.environ.get("TERM_PROGRAM", "")
|
| 486 |
+
|
| 487 |
+
# iTerm2 inline image protocol (only iTerm2 supports this)
|
| 488 |
+
if "iTerm.app" in term_program:
|
| 489 |
+
sys.stdout.write(f"\033]1337;File=inline=1;width=auto;preserveAspectRatio=1:{base64_data}\a\n")
|
| 490 |
+
sys.stdout.flush()
|
| 491 |
+
return
|
| 492 |
+
|
| 493 |
+
# Fallback: open in Preview on macOS (only in CLI, not web)
|
| 494 |
+
if not self._plot_callback and os.path.exists(filepath):
|
| 495 |
+
import subprocess as _sp
|
| 496 |
+
_sp.Popen(["open", filepath], stdout=_sp.DEVNULL, stderr=_sp.DEVNULL)
|
| 497 |
+
|
| 498 |
+
except Exception as e:
|
| 499 |
+
logger.warning(f"Failed to display image in terminal: {e}")
|
| 500 |
+
|
| 501 |
+
def _capture_and_notify_plots(self, saved_files: list, code: str = ""):
|
| 502 |
+
"""Capture plots and notify via callback."""
|
| 503 |
+
for filepath in saved_files:
|
| 504 |
+
try:
|
| 505 |
+
if os.path.exists(filepath):
|
| 506 |
+
with open(filepath, 'rb') as f:
|
| 507 |
+
img_data = f.read()
|
| 508 |
+
b64_data = base64.b64encode(img_data).decode('utf-8')
|
| 509 |
+
|
| 510 |
+
# Display in terminal
|
| 511 |
+
self._display_image_in_terminal(filepath, b64_data)
|
| 512 |
+
|
| 513 |
+
# Send to web UI via callback
|
| 514 |
+
if self._plot_callback:
|
| 515 |
+
self._plot_callback(b64_data, filepath, code)
|
| 516 |
+
except Exception as e:
|
| 517 |
+
print(f"Warning: Failed to capture plot {filepath}: {e}")
|
| 518 |
+
|
| 519 |
+
def _run(self, code: str) -> str:
|
| 520 |
+
"""Execute the python code in the subprocess and return the output."""
|
| 521 |
+
plots_dir = self._plots_dir or str(PLOTS_DIR)
|
| 522 |
+
|
| 523 |
+
# Snapshot plots directory BEFORE execution
|
| 524 |
+
image_exts = {'.png', '.jpg', '.jpeg', '.svg', '.pdf', '.gif', '.webp'}
|
| 525 |
+
try:
|
| 526 |
+
before_files = {
|
| 527 |
+
f: os.path.getmtime(os.path.join(plots_dir, f))
|
| 528 |
+
for f in os.listdir(plots_dir)
|
| 529 |
+
if os.path.splitext(f)[1].lower() in image_exts
|
| 530 |
+
}
|
| 531 |
+
except FileNotFoundError:
|
| 532 |
+
before_files = {}
|
| 533 |
+
|
| 534 |
+
# Execute in subprocess
|
| 535 |
+
output = self._repl.run(code, timeout=300)
|
| 536 |
+
|
| 537 |
+
# Detect NEW plot files by comparing directory snapshots
|
| 538 |
+
try:
|
| 539 |
+
after_files = {
|
| 540 |
+
f: os.path.getmtime(os.path.join(plots_dir, f))
|
| 541 |
+
for f in os.listdir(plots_dir)
|
| 542 |
+
if os.path.splitext(f)[1].lower() in image_exts
|
| 543 |
+
}
|
| 544 |
+
except FileNotFoundError:
|
| 545 |
+
after_files = {}
|
| 546 |
+
|
| 547 |
+
new_files = []
|
| 548 |
+
for fname, mtime in after_files.items():
|
| 549 |
+
full_path = os.path.join(plots_dir, fname)
|
| 550 |
+
if fname not in before_files or mtime > before_files[fname]:
|
| 551 |
+
if full_path not in self._displayed_plots:
|
| 552 |
+
new_files.append(full_path)
|
| 553 |
+
|
| 554 |
+
if new_files:
|
| 555 |
+
print(f"📊 {len(new_files)} plot(s) saved")
|
| 556 |
+
self._capture_and_notify_plots(new_files, code)
|
| 557 |
+
|
| 558 |
+
return output
|
| 559 |
+
|
| 560 |
+
async def _arun(self, code: str) -> str:
|
| 561 |
+
"""Use the tool asynchronously — avoids blocking the event loop."""
|
| 562 |
+
import asyncio
|
| 563 |
+
loop = asyncio.get_event_loop()
|
| 564 |
+
return await loop.run_in_executor(None, self._run, code)
|
src/eurus/tools/routing.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Maritime Routing Tool
|
| 3 |
+
=====================
|
| 4 |
+
Strictly calculates maritime routes using global shipping lane graphs.
|
| 5 |
+
Does NOT perform weather analysis. Returns waypoints for the Agent to analyze.
|
| 6 |
+
|
| 7 |
+
Dependencies:
|
| 8 |
+
- scgraph (for maritime pathfinding)
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import logging
|
| 12 |
+
from datetime import datetime, timedelta
|
| 13 |
+
from typing import List, Tuple, Any
|
| 14 |
+
from pydantic import BaseModel, Field
|
| 15 |
+
|
| 16 |
+
from langchain_core.tools import StructuredTool
|
| 17 |
+
|
| 18 |
+
logger = logging.getLogger(__name__)
|
| 19 |
+
|
| 20 |
+
# Check for optional dependencies
|
| 21 |
+
HAS_ROUTING_DEPS = False
|
| 22 |
+
try:
|
| 23 |
+
import scgraph
|
| 24 |
+
from scgraph.geographs.marnet import marnet_geograph
|
| 25 |
+
HAS_ROUTING_DEPS = True
|
| 26 |
+
except ImportError:
|
| 27 |
+
pass
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
# ============================================================================
|
| 31 |
+
# HELPER FUNCTIONS
|
| 32 |
+
# ============================================================================
|
| 33 |
+
|
| 34 |
+
def _normalize_lon(lon: float) -> float:
|
| 35 |
+
"""Convert longitude to -180 to 180 range (scgraph format)."""
|
| 36 |
+
# Efficient modulo operation - prevents infinite loop on extreme values
|
| 37 |
+
return ((lon + 180) % 360) - 180
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def _get_maritime_path(origin: Tuple[float, float], dest: Tuple[float, float]) -> List[Tuple[float, float]]:
|
| 44 |
+
"""Calculate shortest maritime path using scgraph."""
|
| 45 |
+
if not HAS_ROUTING_DEPS:
|
| 46 |
+
raise ImportError("Dependency 'scgraph' is missing.")
|
| 47 |
+
|
| 48 |
+
# Normalize longitudes for scgraph (-180 to 180)
|
| 49 |
+
origin_lon = _normalize_lon(origin[1])
|
| 50 |
+
dest_lon = _normalize_lon(dest[1])
|
| 51 |
+
|
| 52 |
+
graph = marnet_geograph
|
| 53 |
+
path_dict = graph.get_shortest_path(
|
| 54 |
+
origin_node={"latitude": origin[0], "longitude": origin_lon},
|
| 55 |
+
destination_node={"latitude": dest[0], "longitude": dest_lon}
|
| 56 |
+
)
|
| 57 |
+
return [(p[0], p[1]) for p in path_dict.get('coordinate_path', [])]
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def _interpolate_route(
|
| 61 |
+
path: List[Tuple[float, float]],
|
| 62 |
+
speed_knots: float,
|
| 63 |
+
departure: datetime
|
| 64 |
+
) -> List[dict]:
|
| 65 |
+
"""Convert path to waypoints with timestamps. Keeps ALL points for risk assessment."""
|
| 66 |
+
try:
|
| 67 |
+
from geopy.distance import great_circle
|
| 68 |
+
except ImportError:
|
| 69 |
+
# Proper Haversine fallback for accurate distance at all latitudes
|
| 70 |
+
import math
|
| 71 |
+
from collections import namedtuple
|
| 72 |
+
Distance = namedtuple('Distance', ['km'])
|
| 73 |
+
def great_circle(p1, p2):
|
| 74 |
+
lat1, lon1 = math.radians(p1[0]), math.radians(p1[1])
|
| 75 |
+
lat2, lon2 = math.radians(p2[0]), math.radians(p2[1])
|
| 76 |
+
dlat = lat2 - lat1
|
| 77 |
+
dlon = lon2 - lon1
|
| 78 |
+
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
|
| 79 |
+
c = 2 * math.asin(math.sqrt(min(1.0, a)))
|
| 80 |
+
return Distance(km=6371 * c) # Earth radius in km
|
| 81 |
+
|
| 82 |
+
speed_kmh = speed_knots * 1.852
|
| 83 |
+
waypoints = []
|
| 84 |
+
current_time = departure
|
| 85 |
+
|
| 86 |
+
# Add ALL points from scgraph - each is a navigation waypoint
|
| 87 |
+
# Risk assessment needs every geographic point, not time-filtered ones
|
| 88 |
+
for i, point in enumerate(path):
|
| 89 |
+
if i == 0:
|
| 90 |
+
step = "Origin"
|
| 91 |
+
elif i == len(path) - 1:
|
| 92 |
+
step = "Destination"
|
| 93 |
+
else:
|
| 94 |
+
step = f"Waypoint {i}"
|
| 95 |
+
|
| 96 |
+
# Calculate time to reach this point
|
| 97 |
+
if i > 0:
|
| 98 |
+
prev = path[i-1]
|
| 99 |
+
dist = great_circle(prev, point).km
|
| 100 |
+
hours = dist / speed_kmh if speed_kmh > 0 else 0
|
| 101 |
+
current_time += timedelta(hours=hours)
|
| 102 |
+
|
| 103 |
+
waypoints.append({
|
| 104 |
+
"lat": point[0],
|
| 105 |
+
"lon": point[1],
|
| 106 |
+
"time": current_time.strftime("%Y-%m-%d %H:%M"),
|
| 107 |
+
"step": step
|
| 108 |
+
})
|
| 109 |
+
|
| 110 |
+
return waypoints
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
# ============================================================================
|
| 114 |
+
# TOOL FUNCTION
|
| 115 |
+
# ============================================================================
|
| 116 |
+
|
| 117 |
+
def calculate_maritime_route(
|
| 118 |
+
origin_lat: float,
|
| 119 |
+
origin_lon: float,
|
| 120 |
+
dest_lat: float,
|
| 121 |
+
dest_lon: float,
|
| 122 |
+
month: int,
|
| 123 |
+
year: int = None,
|
| 124 |
+
speed_knots: float = 14.0
|
| 125 |
+
) -> str:
|
| 126 |
+
"""
|
| 127 |
+
Calculates the detailed maritime route waypoints.
|
| 128 |
+
"""
|
| 129 |
+
if not HAS_ROUTING_DEPS:
|
| 130 |
+
return "Error: 'scgraph' not installed."
|
| 131 |
+
|
| 132 |
+
if not (1 <= month <= 12):
|
| 133 |
+
return f"Error: month must be 1-12, got {month}."
|
| 134 |
+
|
| 135 |
+
try:
|
| 136 |
+
path = _get_maritime_path((origin_lat, origin_lon), (dest_lat, dest_lon))
|
| 137 |
+
|
| 138 |
+
# Use provided year or calculate based on current date
|
| 139 |
+
if year is None:
|
| 140 |
+
now = datetime.now()
|
| 141 |
+
year = now.year if month >= now.month else now.year + 1
|
| 142 |
+
departure = datetime(year, month, 15)
|
| 143 |
+
|
| 144 |
+
waypoints = _interpolate_route(path, speed_knots, departure)
|
| 145 |
+
|
| 146 |
+
# Calculate bounding box with buffer for weather data
|
| 147 |
+
lats = [w['lat'] for w in waypoints]
|
| 148 |
+
lons = [w['lon'] for w in waypoints]
|
| 149 |
+
|
| 150 |
+
min_lat = max(-90, min(lats) - 5)
|
| 151 |
+
max_lat = min(90, max(lats) + 5)
|
| 152 |
+
|
| 153 |
+
# Detect dateline crossing: if lon range > 180°, the route crosses -180/+180
|
| 154 |
+
lon_range = max(lons) - min(lons)
|
| 155 |
+
if lon_range > 180:
|
| 156 |
+
# Route crosses dateline - need to recalculate
|
| 157 |
+
# Split lons into positive and negative, find the gap
|
| 158 |
+
pos_lons = [l for l in lons if l >= 0]
|
| 159 |
+
neg_lons = [l for l in lons if l < 0]
|
| 160 |
+
if pos_lons and neg_lons:
|
| 161 |
+
# Route goes from ~+179 to ~-179 - use 0-360 system
|
| 162 |
+
lons_360 = [(l + 360) if l < 0 else l for l in lons]
|
| 163 |
+
min_lon = max(0, min(lons_360) - 5)
|
| 164 |
+
max_lon = min(360, max(lons_360) + 5)
|
| 165 |
+
else:
|
| 166 |
+
min_lon = max(-180, min(lons) - 5)
|
| 167 |
+
max_lon = min(180, max(lons) + 5)
|
| 168 |
+
else:
|
| 169 |
+
min_lon = max(-180, min(lons) - 5)
|
| 170 |
+
max_lon = min(180, max(lons) + 5)
|
| 171 |
+
|
| 172 |
+
# Format waypoints as Python-ready list (keep original -180/+180 format)
|
| 173 |
+
waypoint_list = "[\n" + ",\n".join([
|
| 174 |
+
f" ({w['lat']:.2f}, {w['lon']:.2f})"
|
| 175 |
+
for w in waypoints
|
| 176 |
+
]) + "\n]"
|
| 177 |
+
|
| 178 |
+
# Calculate total distance
|
| 179 |
+
total_nm = 0
|
| 180 |
+
try:
|
| 181 |
+
from geopy.distance import great_circle
|
| 182 |
+
for i in range(1, len(waypoints)):
|
| 183 |
+
d = great_circle(
|
| 184 |
+
(waypoints[i-1]['lat'], waypoints[i-1]['lon']),
|
| 185 |
+
(waypoints[i]['lat'], waypoints[i]['lon'])
|
| 186 |
+
).nautical
|
| 187 |
+
total_nm += d
|
| 188 |
+
except ImportError:
|
| 189 |
+
# Haversine fallback for distance calculation
|
| 190 |
+
import math
|
| 191 |
+
for i in range(1, len(waypoints)):
|
| 192 |
+
lat1, lon1 = math.radians(waypoints[i-1]['lat']), math.radians(waypoints[i-1]['lon'])
|
| 193 |
+
lat2, lon2 = math.radians(waypoints[i]['lat']), math.radians(waypoints[i]['lon'])
|
| 194 |
+
dlat, dlon = lat2 - lat1, lon2 - lon1
|
| 195 |
+
a = math.sin(dlat/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(dlon/2)**2
|
| 196 |
+
c = 2 * math.asin(math.sqrt(min(1.0, a)))
|
| 197 |
+
total_nm += 6371 * c / 1.852 # km to nm
|
| 198 |
+
|
| 199 |
+
eta_days = total_nm / (speed_knots * 24)
|
| 200 |
+
|
| 201 |
+
output = f"""
|
| 202 |
+
================================================================================
|
| 203 |
+
MARITIME ROUTE CALCULATION COMPLETE
|
| 204 |
+
================================================================================
|
| 205 |
+
|
| 206 |
+
ROUTE SUMMARY:
|
| 207 |
+
Origin: ({origin_lat:.2f}, {origin_lon:.2f})
|
| 208 |
+
Destination: ({dest_lat:.2f}, {dest_lon:.2f})
|
| 209 |
+
Distance: ~{total_nm:.0f} nautical miles
|
| 210 |
+
Speed: {speed_knots} knots
|
| 211 |
+
ETA: ~{eta_days:.1f} days
|
| 212 |
+
Waypoints: {len(waypoints)} checkpoints
|
| 213 |
+
|
| 214 |
+
WAYPOINT COORDINATES (for risk analysis):
|
| 215 |
+
{waypoint_list}
|
| 216 |
+
|
| 217 |
+
DATA REGION (with 5° buffer):
|
| 218 |
+
Latitude: [{min_lat:.1f}, {max_lat:.1f}]
|
| 219 |
+
Longitude: [{min_lon:.1f}, {max_lon:.1f}]
|
| 220 |
+
|
| 221 |
+
================================================================================
|
| 222 |
+
MANDATORY RISK ASSESSMENT PROTOCOL
|
| 223 |
+
================================================================================
|
| 224 |
+
|
| 225 |
+
STEP 1: DOWNLOAD CLIMATOLOGICAL DATA
|
| 226 |
+
Call `retrieve_era5_data` with:
|
| 227 |
+
- variable: 'u10' and 'v10' (10m wind components) for wind speed analysis
|
| 228 |
+
- query_type: 'spatial'
|
| 229 |
+
- region bounds: lat=[{min_lat:.1f}, {max_lat:.1f}], lon=[{min_lon:.1f}, {max_lon:.1f}]
|
| 230 |
+
- dates: Month {month} for LAST 3 YEARS (e.g., {month}/2021, {month}/2022, {month}/2023)
|
| 231 |
+
|
| 232 |
+
⚠️ WARNING: Large bounding boxes can cause OOM/timeout!
|
| 233 |
+
If (max_lon - min_lon) > 60° or (max_lat - min_lat) > 40°:
|
| 234 |
+
- Do NOT download spatial data for the whole route at once
|
| 235 |
+
- Instead, iterate through waypoints and download small chunks
|
| 236 |
+
- Or sample every Nth waypoint for point-based temporal queries
|
| 237 |
+
|
| 238 |
+
WHY 3 YEARS? To build climatological statistics, not just one snapshot.
|
| 239 |
+
|
| 240 |
+
STEP 2: GET ANALYSIS PROTOCOL
|
| 241 |
+
Call `get_analysis_guide(topic='maritime_visualization')`
|
| 242 |
+
|
| 243 |
+
Or for full workflow: `get_analysis_guide(topic='maritime_route')`
|
| 244 |
+
|
| 245 |
+
This will provide methodology for:
|
| 246 |
+
- Lagrangian risk assessment (ship vs. stationary climate data)
|
| 247 |
+
- Threshold definitions (what wind speed is dangerous)
|
| 248 |
+
- Risk aggregation formulas
|
| 249 |
+
- Route deviation recommendations
|
| 250 |
+
|
| 251 |
+
STEP 3: EXECUTE ANALYSIS
|
| 252 |
+
Use python_repl to:
|
| 253 |
+
1. Load the downloaded data
|
| 254 |
+
2. Extract values at each waypoint
|
| 255 |
+
3. Calculate risk metrics per the methodology
|
| 256 |
+
4. Generate risk map and report
|
| 257 |
+
|
| 258 |
+
================================================================================
|
| 259 |
+
"""
|
| 260 |
+
return output
|
| 261 |
+
|
| 262 |
+
except Exception as e:
|
| 263 |
+
return f"Routing Calculation Failed: {str(e)}"
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
# ============================================================================
|
| 267 |
+
# ARGUMENT SCHEMA
|
| 268 |
+
# ============================================================================
|
| 269 |
+
|
| 270 |
+
class RouteArgs(BaseModel):
|
| 271 |
+
origin_lat: float = Field(description="Latitude of origin")
|
| 272 |
+
origin_lon: float = Field(description="Longitude of origin")
|
| 273 |
+
dest_lat: float = Field(description="Latitude of destination")
|
| 274 |
+
dest_lon: float = Field(description="Longitude of destination")
|
| 275 |
+
month: int = Field(description="Month of travel (1-12)")
|
| 276 |
+
year: int = Field(default=None, description="Year for analysis. Defaults to upcoming occurrence of month.")
|
| 277 |
+
speed_knots: float = Field(default=14.0, description="Speed in knots")
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
# ============================================================================
|
| 281 |
+
# LANGCHAIN TOOL
|
| 282 |
+
# ============================================================================
|
| 283 |
+
|
| 284 |
+
routing_tool = StructuredTool.from_function(
|
| 285 |
+
func=calculate_maritime_route,
|
| 286 |
+
name="calculate_maritime_route",
|
| 287 |
+
description="Calculates a realistic maritime route (avoiding land). Returns a list of time-stamped waypoints. DOES NOT check weather.",
|
| 288 |
+
args_schema=RouteArgs
|
| 289 |
+
)
|
tests/test_config.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pytest
|
| 2 |
+
from eurus.config import ERA5_VARIABLES, VARIABLE_ALIASES, get_variable_info, get_short_name
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
# All 22 variables in the Arraylake dataset
|
| 6 |
+
ALL_ARRAYLAKE_VARS = [
|
| 7 |
+
"blh", "cape", "cp", "d2", "lsp", "mslp", "sd", "skt", "sp",
|
| 8 |
+
"ssr", "ssrd", "sst", "stl1", "swvl1", "t2", "tcc", "tcw",
|
| 9 |
+
"tcwv", "u10", "u100", "v10", "v100",
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
# tp is a derived/accumulated variable kept for convenience
|
| 13 |
+
ALL_CATALOG_VARS = sorted(ALL_ARRAYLAKE_VARS + ["tp"])
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def test_variable_catalog_has_all_22():
|
| 17 |
+
"""Every Arraylake variable must appear in ERA5_VARIABLES."""
|
| 18 |
+
for var in ALL_ARRAYLAKE_VARS:
|
| 19 |
+
assert var in ERA5_VARIABLES, f"Missing variable: {var}"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_total_variable_count():
|
| 23 |
+
"""Catalog should contain at least 22 variables (22 Arraylake + tp)."""
|
| 24 |
+
assert len(ERA5_VARIABLES) >= 22
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def test_variable_loading():
|
| 28 |
+
"""Test that ERA5 variables are loaded correctly."""
|
| 29 |
+
assert "sst" in ERA5_VARIABLES
|
| 30 |
+
assert "t2" in ERA5_VARIABLES
|
| 31 |
+
assert "u10" in ERA5_VARIABLES
|
| 32 |
+
|
| 33 |
+
sst_info = ERA5_VARIABLES["sst"]
|
| 34 |
+
assert sst_info.units == "K"
|
| 35 |
+
assert sst_info.short_name == "sst"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def test_new_variables_metadata():
|
| 39 |
+
"""Spot-check metadata on newly added variables."""
|
| 40 |
+
# Boundary layer height
|
| 41 |
+
blh = ERA5_VARIABLES["blh"]
|
| 42 |
+
assert blh.units == "m"
|
| 43 |
+
assert blh.category == "atmosphere"
|
| 44 |
+
|
| 45 |
+
# Dewpoint
|
| 46 |
+
d2 = ERA5_VARIABLES["d2"]
|
| 47 |
+
assert d2.units == "K"
|
| 48 |
+
|
| 49 |
+
# Soil moisture
|
| 50 |
+
swvl1 = ERA5_VARIABLES["swvl1"]
|
| 51 |
+
assert "m³/m³" in swvl1.units
|
| 52 |
+
assert swvl1.category == "land_surface"
|
| 53 |
+
|
| 54 |
+
# 100m wind
|
| 55 |
+
u100 = ERA5_VARIABLES["u100"]
|
| 56 |
+
assert u100.units == "m/s"
|
| 57 |
+
|
| 58 |
+
# Radiation
|
| 59 |
+
ssrd = ERA5_VARIABLES["ssrd"]
|
| 60 |
+
assert "J/m²" in ssrd.units
|
| 61 |
+
assert ssrd.category == "radiation"
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def test_get_variable_info():
|
| 65 |
+
"""Test helper function for retrieving variable info."""
|
| 66 |
+
# Test case insensitive
|
| 67 |
+
assert get_variable_info("SST") == ERA5_VARIABLES["sst"]
|
| 68 |
+
assert get_variable_info("Sea_Surface_Temperature") == ERA5_VARIABLES["sst"]
|
| 69 |
+
assert get_variable_info("non_existent_var") is None
|
| 70 |
+
|
| 71 |
+
# Test new aliases
|
| 72 |
+
assert get_variable_info("dewpoint") == ERA5_VARIABLES["d2"]
|
| 73 |
+
assert get_variable_info("soil_moisture") == ERA5_VARIABLES["swvl1"]
|
| 74 |
+
assert get_variable_info("boundary_layer_height") == ERA5_VARIABLES["blh"]
|
| 75 |
+
assert get_variable_info("snow_depth") == ERA5_VARIABLES["sd"]
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
def test_get_short_name():
|
| 79 |
+
"""Test retrieval of short names."""
|
| 80 |
+
assert get_short_name("SST") == "sst"
|
| 81 |
+
assert get_short_name("Sea_Surface_Temperature") == "sst"
|
| 82 |
+
# Fallback to lower case input
|
| 83 |
+
assert get_short_name("UNKNOWN_VAR") == "unknown_var"
|
| 84 |
+
|
| 85 |
+
# New aliases
|
| 86 |
+
assert get_short_name("skin_temperature") == "skt"
|
| 87 |
+
assert get_short_name("100m_u_component_of_wind") == "u100"
|
| 88 |
+
assert get_short_name("total_column_water_vapour") == "tcwv"
|
| 89 |
+
|
| 90 |
+
|
| 91 |
+
def test_agent_prompt_branding():
|
| 92 |
+
"""Test that the system prompt contains the Eurus branding."""
|
| 93 |
+
from eurus.config import AGENT_SYSTEM_PROMPT
|
| 94 |
+
assert "Eurus" in AGENT_SYSTEM_PROMPT
|
| 95 |
+
assert "Comrade Copernicus" not in AGENT_SYSTEM_PROMPT
|
| 96 |
+
assert "PANGAEA" not in AGENT_SYSTEM_PROMPT
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def test_agent_prompt_lists_all_variables():
|
| 100 |
+
"""System prompt should mention all 22 Arraylake variable short names."""
|
| 101 |
+
from eurus.config import AGENT_SYSTEM_PROMPT
|
| 102 |
+
for var in ALL_ARRAYLAKE_VARS:
|
| 103 |
+
assert var in AGENT_SYSTEM_PROMPT, (
|
| 104 |
+
f"System prompt missing variable: {var}"
|
| 105 |
+
)
|
tests/test_e2e.py
ADDED
|
@@ -0,0 +1,368 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
End-to-End Tests for Eurus
|
| 3 |
+
===========================
|
| 4 |
+
These tests use REAL API calls to verify the complete workflow.
|
| 5 |
+
Requires valid API keys in .env file.
|
| 6 |
+
|
| 7 |
+
Run with: pytest tests/test_e2e.py -v -s
|
| 8 |
+
Use -s flag to see output from data retrieval.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import pytest
|
| 13 |
+
import tempfile
|
| 14 |
+
import shutil
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
from datetime import datetime, timedelta
|
| 17 |
+
from dotenv import load_dotenv
|
| 18 |
+
|
| 19 |
+
# Load .env file
|
| 20 |
+
load_dotenv()
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
# ============================================================================
|
| 24 |
+
# FIXTURES
|
| 25 |
+
# ============================================================================
|
| 26 |
+
|
| 27 |
+
@pytest.fixture(scope="module")
|
| 28 |
+
def temp_data_dir():
|
| 29 |
+
"""Create temporary data directory for tests."""
|
| 30 |
+
temp_dir = tempfile.mkdtemp(prefix="eurus_e2e_")
|
| 31 |
+
yield temp_dir
|
| 32 |
+
# Cleanup after all tests
|
| 33 |
+
shutil.rmtree(temp_dir, ignore_errors=True)
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@pytest.fixture(scope="module")
|
| 37 |
+
def has_arraylake_key():
|
| 38 |
+
"""Check if Arraylake API key is available."""
|
| 39 |
+
key = os.environ.get("ARRAYLAKE_API_KEY")
|
| 40 |
+
if not key:
|
| 41 |
+
pytest.skip("ARRAYLAKE_API_KEY not found in environment")
|
| 42 |
+
return True
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
# ============================================================================
|
| 46 |
+
# E2E: ERA5 DATA RETRIEVAL
|
| 47 |
+
# ============================================================================
|
| 48 |
+
|
| 49 |
+
class TestERA5Retrieval:
|
| 50 |
+
"""End-to-end tests for ERA5 data retrieval."""
|
| 51 |
+
|
| 52 |
+
@pytest.mark.slow
|
| 53 |
+
def test_retrieve_sst_temporal_small_region(self, has_arraylake_key, temp_data_dir):
|
| 54 |
+
"""
|
| 55 |
+
E2E Test: Retrieve SST data for a small region and short time period.
|
| 56 |
+
This tests the complete retrieval pipeline.
|
| 57 |
+
"""
|
| 58 |
+
from eurus.retrieval import retrieve_era5_data
|
| 59 |
+
from eurus.memory import reset_memory
|
| 60 |
+
|
| 61 |
+
# Reset memory for clean state
|
| 62 |
+
reset_memory()
|
| 63 |
+
|
| 64 |
+
# Use a small request to minimize download time
|
| 65 |
+
result = retrieve_era5_data(
|
| 66 |
+
query_type="temporal",
|
| 67 |
+
variable_id="sst",
|
| 68 |
+
start_date="2023-01-01",
|
| 69 |
+
end_date="2023-01-07", # Just 1 week
|
| 70 |
+
min_latitude=25.0,
|
| 71 |
+
max_latitude=30.0,
|
| 72 |
+
min_longitude=260.0, # Gulf of Mexico
|
| 73 |
+
max_longitude=265.0,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
print(f"\n=== ERA5 Retrieval Result ===\n{result}\n")
|
| 77 |
+
|
| 78 |
+
# Verify success
|
| 79 |
+
assert "SUCCESS" in result or "CACHE HIT" in result
|
| 80 |
+
assert "sst" in result.lower()
|
| 81 |
+
assert ".zarr" in result
|
| 82 |
+
|
| 83 |
+
@pytest.mark.slow
|
| 84 |
+
def test_retrieve_t2m_spatial(self, has_arraylake_key, temp_data_dir):
|
| 85 |
+
"""
|
| 86 |
+
E2E Test: Retrieve 2m temperature as spatial data.
|
| 87 |
+
Tests spatial query type.
|
| 88 |
+
"""
|
| 89 |
+
from eurus.retrieval import retrieve_era5_data
|
| 90 |
+
from eurus.memory import reset_memory
|
| 91 |
+
|
| 92 |
+
reset_memory()
|
| 93 |
+
|
| 94 |
+
result = retrieve_era5_data(
|
| 95 |
+
query_type="spatial",
|
| 96 |
+
variable_id="t2", # 2m temperature
|
| 97 |
+
start_date="2023-06-01",
|
| 98 |
+
end_date="2023-06-03", # Just 3 days
|
| 99 |
+
min_latitude=40.0,
|
| 100 |
+
max_latitude=50.0,
|
| 101 |
+
min_longitude=0.0,
|
| 102 |
+
max_longitude=10.0, # Western Europe
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
print(f"\n=== T2M Spatial Result ===\n{result}\n")
|
| 106 |
+
|
| 107 |
+
assert "SUCCESS" in result or "CACHE HIT" in result
|
| 108 |
+
|
| 109 |
+
@pytest.mark.slow
|
| 110 |
+
def test_retrieve_and_load_dataset(self, has_arraylake_key, temp_data_dir):
|
| 111 |
+
"""
|
| 112 |
+
E2E Test: Retrieve data and verify it can be loaded with xarray.
|
| 113 |
+
Tests the full data integrity pipeline.
|
| 114 |
+
"""
|
| 115 |
+
import xarray as xr
|
| 116 |
+
from eurus.retrieval import retrieve_era5_data
|
| 117 |
+
from eurus.memory import reset_memory, get_memory
|
| 118 |
+
|
| 119 |
+
reset_memory()
|
| 120 |
+
|
| 121 |
+
result = retrieve_era5_data(
|
| 122 |
+
query_type="temporal",
|
| 123 |
+
variable_id="sst",
|
| 124 |
+
start_date="2023-02-01",
|
| 125 |
+
end_date="2023-02-05",
|
| 126 |
+
min_latitude=20.0,
|
| 127 |
+
max_latitude=25.0,
|
| 128 |
+
min_longitude=270.0,
|
| 129 |
+
max_longitude=275.0,
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
assert "SUCCESS" in result or "CACHE HIT" in result
|
| 133 |
+
|
| 134 |
+
# Extract path from result
|
| 135 |
+
# Look for the path in the result string
|
| 136 |
+
lines = result.split('\n')
|
| 137 |
+
path = None
|
| 138 |
+
for line in lines:
|
| 139 |
+
if "Path:" in line:
|
| 140 |
+
path = line.split("Path:")[-1].strip()
|
| 141 |
+
break
|
| 142 |
+
if ".zarr" in line and "Load with" not in line:
|
| 143 |
+
# Try to find zarr path
|
| 144 |
+
parts = line.split()
|
| 145 |
+
for part in parts:
|
| 146 |
+
if ".zarr" in part:
|
| 147 |
+
path = part.strip()
|
| 148 |
+
break
|
| 149 |
+
|
| 150 |
+
if path and os.path.exists(path):
|
| 151 |
+
# Load and verify dataset
|
| 152 |
+
ds = xr.open_dataset(path, engine='zarr')
|
| 153 |
+
|
| 154 |
+
print(f"\n=== Loaded Dataset ===")
|
| 155 |
+
print(f"Variables: {list(ds.data_vars)}")
|
| 156 |
+
print(f"Dimensions: {dict(ds.dims)}")
|
| 157 |
+
print(f"Time range: {ds.time.values[0]} to {ds.time.values[-1]}")
|
| 158 |
+
|
| 159 |
+
assert 'sst' in ds.data_vars
|
| 160 |
+
assert 'time' in ds.dims
|
| 161 |
+
assert ds.dims['time'] > 0
|
| 162 |
+
|
| 163 |
+
ds.close()
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
# ============================================================================
|
| 167 |
+
# E2E: PYTHON REPL ANALYSIS
|
| 168 |
+
# ============================================================================
|
| 169 |
+
|
| 170 |
+
class TestREPLAnalysis:
|
| 171 |
+
"""End-to-end tests for REPL-based data analysis."""
|
| 172 |
+
|
| 173 |
+
def test_repl_numpy_computation(self):
|
| 174 |
+
"""
|
| 175 |
+
E2E Test: Use REPL to perform numpy computation.
|
| 176 |
+
"""
|
| 177 |
+
from eurus.tools.repl import PythonREPLTool
|
| 178 |
+
|
| 179 |
+
repl = PythonREPLTool()
|
| 180 |
+
|
| 181 |
+
code = """
|
| 182 |
+
import numpy as np
|
| 183 |
+
data = np.random.randn(100)
|
| 184 |
+
mean = np.mean(data)
|
| 185 |
+
std = np.std(data)
|
| 186 |
+
print(f"Mean: {mean:.4f}, Std: {std:.4f}")
|
| 187 |
+
"""
|
| 188 |
+
result = repl._run(code)
|
| 189 |
+
print(f"\n=== REPL Result ===\n{result}\n")
|
| 190 |
+
|
| 191 |
+
assert "Mean:" in result
|
| 192 |
+
assert "Std:" in result
|
| 193 |
+
assert "Error" not in result
|
| 194 |
+
|
| 195 |
+
def test_repl_pandas_dataframe(self):
|
| 196 |
+
"""
|
| 197 |
+
E2E Test: Use REPL to create and manipulate pandas DataFrame.
|
| 198 |
+
"""
|
| 199 |
+
from eurus.tools.repl import PythonREPLTool
|
| 200 |
+
|
| 201 |
+
repl = PythonREPLTool()
|
| 202 |
+
|
| 203 |
+
code = """
|
| 204 |
+
import pandas as pd
|
| 205 |
+
import numpy as np
|
| 206 |
+
|
| 207 |
+
df = pd.DataFrame({
|
| 208 |
+
'date': pd.date_range('2023-01-01', periods=10),
|
| 209 |
+
'temperature': np.random.randn(10) * 5 + 20,
|
| 210 |
+
'humidity': np.random.randn(10) * 10 + 60
|
| 211 |
+
})
|
| 212 |
+
|
| 213 |
+
print("DataFrame created:")
|
| 214 |
+
print(df.head())
|
| 215 |
+
print(f"\\nStats: Mean temp = {df['temperature'].mean():.2f}")
|
| 216 |
+
"""
|
| 217 |
+
result = repl._run(code)
|
| 218 |
+
print(f"\n=== Pandas Result ===\n{result}\n")
|
| 219 |
+
|
| 220 |
+
assert "DataFrame created" in result
|
| 221 |
+
assert "temperature" in result
|
| 222 |
+
assert "Error" not in result
|
| 223 |
+
|
| 224 |
+
@pytest.mark.slow
|
| 225 |
+
def test_repl_load_and_analyze_data(self, has_arraylake_key):
|
| 226 |
+
"""
|
| 227 |
+
E2E Test: Retrieve ERA5 data, then analyze it in REPL.
|
| 228 |
+
Full workflow test.
|
| 229 |
+
"""
|
| 230 |
+
from eurus.retrieval import retrieve_era5_data
|
| 231 |
+
from eurus.tools.repl import PythonREPLTool
|
| 232 |
+
from eurus.memory import reset_memory
|
| 233 |
+
import xarray as xr
|
| 234 |
+
|
| 235 |
+
reset_memory()
|
| 236 |
+
|
| 237 |
+
# Step 1: Retrieve data
|
| 238 |
+
result = retrieve_era5_data(
|
| 239 |
+
query_type="temporal",
|
| 240 |
+
variable_id="sst",
|
| 241 |
+
start_date="2023-03-01",
|
| 242 |
+
end_date="2023-03-05",
|
| 243 |
+
min_latitude=25.0,
|
| 244 |
+
max_latitude=28.0,
|
| 245 |
+
min_longitude=265.0,
|
| 246 |
+
max_longitude=268.0,
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
assert "SUCCESS" in result or "CACHE HIT" in result
|
| 250 |
+
|
| 251 |
+
# Extract path
|
| 252 |
+
path = None
|
| 253 |
+
for line in result.split('\n'):
|
| 254 |
+
if "Path:" in line:
|
| 255 |
+
path = line.split("Path:")[-1].strip()
|
| 256 |
+
break
|
| 257 |
+
|
| 258 |
+
if not path or not os.path.exists(path):
|
| 259 |
+
pytest.skip("Could not extract data path")
|
| 260 |
+
|
| 261 |
+
# Step 2: Analyze in REPL
|
| 262 |
+
repl = PythonREPLTool()
|
| 263 |
+
|
| 264 |
+
analysis_code = f"""
|
| 265 |
+
import xarray as xr
|
| 266 |
+
import numpy as np
|
| 267 |
+
|
| 268 |
+
# Load the dataset
|
| 269 |
+
ds = xr.open_dataset('{path}', engine='zarr')
|
| 270 |
+
data = ds['sst']
|
| 271 |
+
|
| 272 |
+
# Calculate statistics
|
| 273 |
+
spatial_mean = data.mean(dim=['latitude', 'longitude'])
|
| 274 |
+
time_mean = data.mean(dim='time')
|
| 275 |
+
|
| 276 |
+
print("=== SST Analysis ===")
|
| 277 |
+
print(f"Time points: {{len(data.time)}}")
|
| 278 |
+
print(f"Spatial shape: {{data.shape}}")
|
| 279 |
+
print(f"Overall mean: {{float(data.mean()):.2f}} K")
|
| 280 |
+
print(f"Overall std: {{float(data.std()):.2f}} K")
|
| 281 |
+
print(f"Min: {{float(data.min()):.2f}} K, Max: {{float(data.max()):.2f}} K")
|
| 282 |
+
"""
|
| 283 |
+
analysis_result = repl._run(analysis_code)
|
| 284 |
+
print(f"\n=== Analysis Result ===\n{analysis_result}\n")
|
| 285 |
+
|
| 286 |
+
assert "SST Analysis" in analysis_result
|
| 287 |
+
assert "Error" not in analysis_result or "Security" not in analysis_result
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
# ============================================================================
|
| 293 |
+
# E2E: MEMORY PERSISTENCE
|
| 294 |
+
# ============================================================================
|
| 295 |
+
|
| 296 |
+
class TestMemoryPersistence:
|
| 297 |
+
"""End-to-end tests for memory and dataset tracking."""
|
| 298 |
+
|
| 299 |
+
@pytest.mark.slow
|
| 300 |
+
def test_memory_tracks_downloaded_data(self, has_arraylake_key):
|
| 301 |
+
"""
|
| 302 |
+
E2E Test: Verify memory tracks downloaded datasets.
|
| 303 |
+
"""
|
| 304 |
+
from eurus.retrieval import retrieve_era5_data
|
| 305 |
+
from eurus.memory import reset_memory, get_memory
|
| 306 |
+
|
| 307 |
+
reset_memory()
|
| 308 |
+
memory = get_memory()
|
| 309 |
+
|
| 310 |
+
# Initial state - no datasets
|
| 311 |
+
initial_datasets = memory.list_datasets()
|
| 312 |
+
|
| 313 |
+
# Download data
|
| 314 |
+
result = retrieve_era5_data(
|
| 315 |
+
query_type="temporal",
|
| 316 |
+
variable_id="sst",
|
| 317 |
+
start_date="2023-04-01",
|
| 318 |
+
end_date="2023-04-03",
|
| 319 |
+
min_latitude=30.0,
|
| 320 |
+
max_latitude=32.0,
|
| 321 |
+
min_longitude=275.0,
|
| 322 |
+
max_longitude=278.0,
|
| 323 |
+
)
|
| 324 |
+
|
| 325 |
+
# Check memory registered the dataset
|
| 326 |
+
datasets = memory.list_datasets()
|
| 327 |
+
print(f"\n=== Registered Datasets ===\n{datasets}\n")
|
| 328 |
+
|
| 329 |
+
# Should have at least one dataset now
|
| 330 |
+
if "SUCCESS" in result:
|
| 331 |
+
assert len(datasets) > len(initial_datasets)
|
| 332 |
+
|
| 333 |
+
|
| 334 |
+
# ============================================================================
|
| 335 |
+
# E2E: ROUTING (if scgraph installed)
|
| 336 |
+
# ============================================================================
|
| 337 |
+
|
| 338 |
+
class TestRouting:
|
| 339 |
+
"""End-to-end tests for maritime routing."""
|
| 340 |
+
|
| 341 |
+
def test_routing_without_deps(self):
|
| 342 |
+
"""
|
| 343 |
+
E2E Test: Verify routing handles missing dependencies gracefully.
|
| 344 |
+
"""
|
| 345 |
+
from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route
|
| 346 |
+
|
| 347 |
+
if not HAS_ROUTING_DEPS:
|
| 348 |
+
# Should return helpful error message
|
| 349 |
+
result = calculate_maritime_route(
|
| 350 |
+
origin_lat=53.5,
|
| 351 |
+
origin_lon=8.5,
|
| 352 |
+
dest_lat=52.4,
|
| 353 |
+
dest_lon=4.9,
|
| 354 |
+
month=6
|
| 355 |
+
)
|
| 356 |
+
print(f"\n=== Routing (no deps) ===\n{result}\n")
|
| 357 |
+
assert "scgraph" in result.lower() or "install" in result.lower()
|
| 358 |
+
else:
|
| 359 |
+
pytest.skip("scgraph is installed, skipping no-deps test")
|
| 360 |
+
|
| 361 |
+
|
| 362 |
+
# ============================================================================
|
| 363 |
+
# RUN WITH: pytest tests/test_e2e.py -v -s --tb=short
|
| 364 |
+
# Add -m "not slow" to skip slow tests
|
| 365 |
+
# ============================================================================
|
| 366 |
+
|
| 367 |
+
if __name__ == "__main__":
|
| 368 |
+
pytest.main([__file__, "-v", "-s", "--tb=short"])
|
tests/test_edge_cases.py
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Edge-Case & Hardening Tests for Eurus
|
| 3 |
+
=======================================
|
| 4 |
+
Focused on retrieval edge cases discovered during manual testing:
|
| 5 |
+
prime-meridian crossing, future dates, invalid variables, filename
|
| 6 |
+
generation, cache behaviour, and routing with real dependencies.
|
| 7 |
+
|
| 8 |
+
Run with: pytest tests/test_edge_cases.py -v -s
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import os
|
| 12 |
+
import pytest
|
| 13 |
+
from pathlib import Path
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
|
| 16 |
+
load_dotenv()
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# ============================================================================
|
| 20 |
+
# RETRIEVAL HELPERS — pure-logic, no API calls
|
| 21 |
+
# ============================================================================
|
| 22 |
+
|
| 23 |
+
class TestFilenameGeneration:
|
| 24 |
+
"""Tests for generate_filename edge cases."""
|
| 25 |
+
|
| 26 |
+
def test_negative_longitude_in_filename(self):
|
| 27 |
+
from eurus.retrieval import generate_filename
|
| 28 |
+
name = generate_filename(
|
| 29 |
+
"sst", "temporal", "2023-01-01", "2023-01-31",
|
| 30 |
+
min_latitude=30.0, max_latitude=46.0,
|
| 31 |
+
min_longitude=-6.0, max_longitude=36.0,
|
| 32 |
+
)
|
| 33 |
+
assert name.endswith(".zarr")
|
| 34 |
+
assert "lat30.00_46.00" in name
|
| 35 |
+
assert "lon-6.00_36.00" in name
|
| 36 |
+
|
| 37 |
+
def test_region_tag_overrides_coords(self):
|
| 38 |
+
from eurus.retrieval import generate_filename
|
| 39 |
+
name = generate_filename(
|
| 40 |
+
"sst", "temporal", "2023-07-01", "2023-07-31",
|
| 41 |
+
min_latitude=30, max_latitude=46,
|
| 42 |
+
min_longitude=354, max_longitude=42,
|
| 43 |
+
region="mediterranean",
|
| 44 |
+
)
|
| 45 |
+
assert "mediterranean" in name
|
| 46 |
+
assert "lat" not in name # region tag replaces coord string
|
| 47 |
+
|
| 48 |
+
def test_format_coord_near_zero(self):
|
| 49 |
+
from eurus.retrieval import _format_coord
|
| 50 |
+
assert _format_coord(0.003) == "0.00"
|
| 51 |
+
assert _format_coord(-0.004) == "0.00"
|
| 52 |
+
assert _format_coord(0.01) == "0.01"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
class TestFutureDateRejection:
|
| 56 |
+
"""Ensure retrieval rejects future start dates without touching the API."""
|
| 57 |
+
|
| 58 |
+
def test_future_date_returns_error(self):
|
| 59 |
+
from eurus.retrieval import retrieve_era5_data
|
| 60 |
+
result = retrieve_era5_data(
|
| 61 |
+
query_type="temporal",
|
| 62 |
+
variable_id="sst",
|
| 63 |
+
start_date="2099-01-01",
|
| 64 |
+
end_date="2099-01-31",
|
| 65 |
+
min_latitude=0, max_latitude=10,
|
| 66 |
+
min_longitude=250, max_longitude=260,
|
| 67 |
+
)
|
| 68 |
+
assert "future" in result.lower()
|
| 69 |
+
assert "Error" in result
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
# ============================================================================
|
| 73 |
+
# E2E RETRIEVAL — require ARRAYLAKE_API_KEY
|
| 74 |
+
# ============================================================================
|
| 75 |
+
|
| 76 |
+
@pytest.fixture(scope="module")
|
| 77 |
+
def has_arraylake_key():
|
| 78 |
+
key = os.environ.get("ARRAYLAKE_API_KEY")
|
| 79 |
+
if not key:
|
| 80 |
+
pytest.skip("ARRAYLAKE_API_KEY not set")
|
| 81 |
+
return True
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
class TestPrimeMeridianCrossing:
|
| 85 |
+
"""Verify data integrity when the request spans the 0° meridian."""
|
| 86 |
+
|
| 87 |
+
@pytest.mark.slow
|
| 88 |
+
def test_cross_meridian_longitude_continuity(self, has_arraylake_key):
|
| 89 |
+
"""
|
| 90 |
+
Request u10 from -10°E to 15°E and check that the returned
|
| 91 |
+
longitude axis has no gaps (step ≈ 0.25° everywhere).
|
| 92 |
+
"""
|
| 93 |
+
import numpy as np
|
| 94 |
+
import xarray as xr
|
| 95 |
+
from eurus.retrieval import retrieve_era5_data
|
| 96 |
+
from eurus.memory import reset_memory
|
| 97 |
+
|
| 98 |
+
reset_memory()
|
| 99 |
+
result = retrieve_era5_data(
|
| 100 |
+
query_type="temporal",
|
| 101 |
+
variable_id="u10",
|
| 102 |
+
start_date="2024-01-15",
|
| 103 |
+
end_date="2024-01-17", # small window
|
| 104 |
+
min_latitude=50.0,
|
| 105 |
+
max_latitude=55.0,
|
| 106 |
+
min_longitude=-10.0,
|
| 107 |
+
max_longitude=15.0,
|
| 108 |
+
)
|
| 109 |
+
assert "SUCCESS" in result or "CACHE HIT" in result
|
| 110 |
+
|
| 111 |
+
# Extract path and load
|
| 112 |
+
path = None
|
| 113 |
+
for line in result.split("\n"):
|
| 114 |
+
if "Path:" in line:
|
| 115 |
+
path = line.split("Path:")[-1].strip()
|
| 116 |
+
break
|
| 117 |
+
assert path and os.path.exists(path)
|
| 118 |
+
|
| 119 |
+
ds = xr.open_dataset(path, engine="zarr")
|
| 120 |
+
lons = ds["u10"].longitude.values
|
| 121 |
+
diffs = np.diff(lons)
|
| 122 |
+
# uniform step — no jump across 0°
|
| 123 |
+
assert diffs.max() < 1.0, f"Gap in longitude: max step = {diffs.max()}"
|
| 124 |
+
ds.close()
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
class TestInvalidVariableHandling:
|
| 128 |
+
"""Ensure retrieval returns a clear error for unavailable variables."""
|
| 129 |
+
|
| 130 |
+
@pytest.mark.slow
|
| 131 |
+
def test_swh_not_available(self, has_arraylake_key):
|
| 132 |
+
from eurus.retrieval import retrieve_era5_data
|
| 133 |
+
from eurus.memory import reset_memory
|
| 134 |
+
|
| 135 |
+
reset_memory()
|
| 136 |
+
result = retrieve_era5_data(
|
| 137 |
+
query_type="temporal",
|
| 138 |
+
variable_id="swh",
|
| 139 |
+
start_date="2023-06-01",
|
| 140 |
+
end_date="2023-06-07",
|
| 141 |
+
min_latitude=40, max_latitude=50,
|
| 142 |
+
min_longitude=0, max_longitude=10,
|
| 143 |
+
)
|
| 144 |
+
assert "not found" in result.lower() or "Error" in result
|
| 145 |
+
assert "Available variables" in result or "available" in result.lower()
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
class TestCacheHitBehaviour:
|
| 149 |
+
"""Verify that repeated identical requests return CACHE HIT."""
|
| 150 |
+
|
| 151 |
+
@pytest.mark.slow
|
| 152 |
+
def test_second_request_is_cache_hit(self, has_arraylake_key):
|
| 153 |
+
from eurus.retrieval import retrieve_era5_data
|
| 154 |
+
from eurus.memory import reset_memory
|
| 155 |
+
|
| 156 |
+
reset_memory()
|
| 157 |
+
params = dict(
|
| 158 |
+
query_type="temporal",
|
| 159 |
+
variable_id="sst",
|
| 160 |
+
start_date="2023-08-01",
|
| 161 |
+
end_date="2023-08-03",
|
| 162 |
+
min_latitude=35.0, max_latitude=37.0,
|
| 163 |
+
min_longitude=15.0, max_longitude=18.0,
|
| 164 |
+
)
|
| 165 |
+
first = retrieve_era5_data(**params)
|
| 166 |
+
assert "SUCCESS" in first or "CACHE HIT" in first
|
| 167 |
+
|
| 168 |
+
second = retrieve_era5_data(**params)
|
| 169 |
+
assert "CACHE HIT" in second
|
| 170 |
+
|
| 171 |
+
|
| 172 |
+
# ============================================================================
|
| 173 |
+
# ROUTING WITH REAL DEPENDENCIES
|
| 174 |
+
# ============================================================================
|
| 175 |
+
|
| 176 |
+
class TestRoutingIntegration:
|
| 177 |
+
"""Tests that use real scgraph (if installed)."""
|
| 178 |
+
|
| 179 |
+
def test_hamburg_rotterdam_route(self):
|
| 180 |
+
from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route
|
| 181 |
+
if not HAS_ROUTING_DEPS:
|
| 182 |
+
pytest.skip("scgraph not installed")
|
| 183 |
+
|
| 184 |
+
result = calculate_maritime_route(
|
| 185 |
+
origin_lat=53.5, origin_lon=8.5,
|
| 186 |
+
dest_lat=52.4, dest_lon=4.9,
|
| 187 |
+
month=6,
|
| 188 |
+
)
|
| 189 |
+
assert "MARITIME ROUTE CALCULATION COMPLETE" in result
|
| 190 |
+
assert "Waypoints" in result or "waypoints" in result.lower()
|
| 191 |
+
# distance should be reasonable (100–500 nm)
|
| 192 |
+
assert "nautical miles" in result.lower()
|
| 193 |
+
|
| 194 |
+
def test_long_route_across_atlantic(self):
|
| 195 |
+
from eurus.tools.routing import HAS_ROUTING_DEPS, calculate_maritime_route
|
| 196 |
+
if not HAS_ROUTING_DEPS:
|
| 197 |
+
pytest.skip("scgraph not installed")
|
| 198 |
+
|
| 199 |
+
result = calculate_maritime_route(
|
| 200 |
+
origin_lat=40.7, origin_lon=-74.0, # New York
|
| 201 |
+
dest_lat=51.9, dest_lon=4.5, # Rotterdam
|
| 202 |
+
month=1,
|
| 203 |
+
)
|
| 204 |
+
assert "MARITIME ROUTE CALCULATION COMPLETE" in result
|
| 205 |
+
# trans-Atlantic should produce plenty of waypoints
|
| 206 |
+
assert "nautical miles" in result.lower()
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
if __name__ == "__main__":
|
| 210 |
+
pytest.main([__file__, "-v", "-s", "--tb=short"])
|
tests/test_server_integration.py
ADDED
|
@@ -0,0 +1,365 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Server and Integration Tests
|
| 3 |
+
============================
|
| 4 |
+
Tests for server module, retrieval helpers, and integration scenarios.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import pytest
|
| 8 |
+
from unittest.mock import patch, MagicMock
|
| 9 |
+
from datetime import datetime
|
| 10 |
+
import tempfile
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
import json
|
| 13 |
+
import os
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
# ============================================================================
|
| 17 |
+
# SERVER MODULE TESTS
|
| 18 |
+
# ============================================================================
|
| 19 |
+
|
| 20 |
+
class TestServerModule:
|
| 21 |
+
"""Tests for eurus.server module."""
|
| 22 |
+
|
| 23 |
+
def test_server_class_exists(self):
|
| 24 |
+
"""Test Server class can be imported."""
|
| 25 |
+
from eurus.server import Server
|
| 26 |
+
assert Server is not None
|
| 27 |
+
|
| 28 |
+
def test_server_instance_exists(self):
|
| 29 |
+
"""Test server instance can be imported."""
|
| 30 |
+
from eurus.server import server
|
| 31 |
+
assert server is not None
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
# ============================================================================
|
| 35 |
+
# RETRIEVAL HELPERS TESTS
|
| 36 |
+
# ============================================================================
|
| 37 |
+
|
| 38 |
+
class TestRetrievalHelpers:
|
| 39 |
+
"""Tests for retrieval helper functions."""
|
| 40 |
+
|
| 41 |
+
def test_format_coord_positive(self):
|
| 42 |
+
"""Test coordinate formatting for positive values."""
|
| 43 |
+
from eurus.retrieval import _format_coord
|
| 44 |
+
assert _format_coord(25.5) == "25.50"
|
| 45 |
+
|
| 46 |
+
def test_format_coord_negative(self):
|
| 47 |
+
"""Test coordinate formatting for negative values."""
|
| 48 |
+
from eurus.retrieval import _format_coord
|
| 49 |
+
assert _format_coord(-10.333) == "-10.33"
|
| 50 |
+
|
| 51 |
+
def test_format_coord_zero(self):
|
| 52 |
+
"""Test coordinate formatting for near-zero values."""
|
| 53 |
+
from eurus.retrieval import _format_coord
|
| 54 |
+
# Values very close to zero should be formatted as 0.00
|
| 55 |
+
result = _format_coord(0.001)
|
| 56 |
+
assert "0.00" in result or "0.01" in result
|
| 57 |
+
|
| 58 |
+
def test_format_file_size_bytes(self):
|
| 59 |
+
"""Test file size formatting for bytes."""
|
| 60 |
+
from eurus.retrieval import format_file_size
|
| 61 |
+
assert "B" in format_file_size(500)
|
| 62 |
+
|
| 63 |
+
def test_format_file_size_kb(self):
|
| 64 |
+
"""Test file size formatting for kilobytes."""
|
| 65 |
+
from eurus.retrieval import format_file_size
|
| 66 |
+
assert "KB" in format_file_size(2048)
|
| 67 |
+
|
| 68 |
+
def test_format_file_size_mb(self):
|
| 69 |
+
"""Test file size formatting for megabytes."""
|
| 70 |
+
from eurus.retrieval import format_file_size
|
| 71 |
+
assert "MB" in format_file_size(5 * 1024 * 1024)
|
| 72 |
+
|
| 73 |
+
def test_format_file_size_gb(self):
|
| 74 |
+
"""Test file size formatting for gigabytes."""
|
| 75 |
+
from eurus.retrieval import format_file_size
|
| 76 |
+
assert "GB" in format_file_size(5 * 1024 * 1024 * 1024)
|
| 77 |
+
|
| 78 |
+
def test_ensure_aws_region_sets_env_from_repo_metadata(self, monkeypatch):
|
| 79 |
+
"""Auto-populate AWS vars when metadata includes region_name."""
|
| 80 |
+
import eurus.retrieval as _retrieval
|
| 81 |
+
from eurus.retrieval import _ensure_aws_region
|
| 82 |
+
|
| 83 |
+
# Reset one-shot flag so the function actually runs
|
| 84 |
+
_retrieval._aws_region_set = False
|
| 85 |
+
|
| 86 |
+
for key in ("AWS_REGION", "AWS_DEFAULT_REGION", "AWS_ENDPOINT_URL", "AWS_S3_ENDPOINT"):
|
| 87 |
+
monkeypatch.delenv(key, raising=False)
|
| 88 |
+
|
| 89 |
+
response = MagicMock()
|
| 90 |
+
response.read.return_value = json.dumps(
|
| 91 |
+
{"bucket": {"extra_config": {"region_name": "eu-north-1"}}}
|
| 92 |
+
).encode("utf-8")
|
| 93 |
+
context_manager = MagicMock()
|
| 94 |
+
context_manager.__enter__.return_value = response
|
| 95 |
+
|
| 96 |
+
with patch("eurus.retrieval.urlopen", return_value=context_manager) as mock_urlopen:
|
| 97 |
+
_ensure_aws_region("token", "earthmover-public/era5-surface-aws")
|
| 98 |
+
|
| 99 |
+
assert os.environ["AWS_REGION"] == "eu-north-1"
|
| 100 |
+
assert os.environ["AWS_DEFAULT_REGION"] == "eu-north-1"
|
| 101 |
+
assert os.environ["AWS_ENDPOINT_URL"] == "https://s3.eu-north-1.amazonaws.com"
|
| 102 |
+
assert os.environ["AWS_S3_ENDPOINT"] == "https://s3.eu-north-1.amazonaws.com"
|
| 103 |
+
|
| 104 |
+
req = mock_urlopen.call_args.args[0]
|
| 105 |
+
assert req.full_url == "https://api.earthmover.io/repos/earthmover-public/era5-surface-aws"
|
| 106 |
+
|
| 107 |
+
def test_ensure_aws_region_does_not_override_existing_env(self, monkeypatch):
|
| 108 |
+
"""Keep explicit user-provided AWS endpoint config untouched."""
|
| 109 |
+
import eurus.retrieval as _retrieval
|
| 110 |
+
from eurus.retrieval import _ensure_aws_region
|
| 111 |
+
|
| 112 |
+
# Reset one-shot flag so the function actually runs
|
| 113 |
+
_retrieval._aws_region_set = False
|
| 114 |
+
|
| 115 |
+
monkeypatch.setenv("AWS_REGION", "custom-region")
|
| 116 |
+
monkeypatch.setenv("AWS_DEFAULT_REGION", "custom-default")
|
| 117 |
+
monkeypatch.setenv("AWS_ENDPOINT_URL", "https://custom.endpoint")
|
| 118 |
+
monkeypatch.setenv("AWS_S3_ENDPOINT", "https://custom.s3.endpoint")
|
| 119 |
+
|
| 120 |
+
response = MagicMock()
|
| 121 |
+
response.read.return_value = json.dumps(
|
| 122 |
+
{"bucket": {"extra_config": {"region_name": "us-west-2"}}}
|
| 123 |
+
).encode("utf-8")
|
| 124 |
+
context_manager = MagicMock()
|
| 125 |
+
context_manager.__enter__.return_value = response
|
| 126 |
+
|
| 127 |
+
with patch("eurus.retrieval.urlopen", return_value=context_manager):
|
| 128 |
+
_ensure_aws_region("token")
|
| 129 |
+
|
| 130 |
+
assert os.environ["AWS_REGION"] == "custom-region"
|
| 131 |
+
assert os.environ["AWS_DEFAULT_REGION"] == "custom-default"
|
| 132 |
+
assert os.environ["AWS_ENDPOINT_URL"] == "https://custom.endpoint"
|
| 133 |
+
assert os.environ["AWS_S3_ENDPOINT"] == "https://custom.s3.endpoint"
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ============================================================================
|
| 139 |
+
# ANALYSIS GUIDE TESTS
|
| 140 |
+
# ============================================================================
|
| 141 |
+
|
| 142 |
+
class TestAnalysisGuide:
|
| 143 |
+
"""Tests for analysis guide module."""
|
| 144 |
+
|
| 145 |
+
def test_analysis_guide_tool_exists(self):
|
| 146 |
+
"""Test analysis guide tool can be imported."""
|
| 147 |
+
from eurus.tools.analysis_guide import analysis_guide_tool
|
| 148 |
+
assert analysis_guide_tool is not None
|
| 149 |
+
|
| 150 |
+
def test_analysis_guide_returns_content(self):
|
| 151 |
+
"""Test analysis guide returns useful content."""
|
| 152 |
+
from eurus.tools.analysis_guide import get_analysis_guide
|
| 153 |
+
result = get_analysis_guide("timeseries")
|
| 154 |
+
assert len(result) > 100 # Should have substantial content
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
# ============================================================================
|
| 158 |
+
# ERA5 TOOL EXTENDED TESTS
|
| 159 |
+
# ============================================================================
|
| 160 |
+
|
| 161 |
+
class TestERA5ToolValidation:
|
| 162 |
+
"""Tests for ERA5 tool validation and edge cases."""
|
| 163 |
+
|
| 164 |
+
def test_era5_args_date_validation(self):
|
| 165 |
+
"""Test date format validation works."""
|
| 166 |
+
from eurus.tools.era5 import ERA5RetrievalArgs
|
| 167 |
+
# Valid dates should work
|
| 168 |
+
args = ERA5RetrievalArgs(
|
| 169 |
+
variable_id="sst",
|
| 170 |
+
start_date="2023-01-01",
|
| 171 |
+
end_date="2023-12-31",
|
| 172 |
+
min_latitude=20.0,
|
| 173 |
+
max_latitude=30.0,
|
| 174 |
+
min_longitude=260.0,
|
| 175 |
+
max_longitude=280.0
|
| 176 |
+
)
|
| 177 |
+
assert args.start_date == "2023-01-01"
|
| 178 |
+
|
| 179 |
+
def test_era5_args_latitude_range(self):
|
| 180 |
+
"""Test latitude range parameters."""
|
| 181 |
+
from eurus.tools.era5 import ERA5RetrievalArgs
|
| 182 |
+
args = ERA5RetrievalArgs(
|
| 183 |
+
variable_id="t2",
|
| 184 |
+
start_date="2023-01-01",
|
| 185 |
+
end_date="2023-01-31",
|
| 186 |
+
min_latitude=-90.0,
|
| 187 |
+
max_latitude=90.0,
|
| 188 |
+
min_longitude=0.0,
|
| 189 |
+
max_longitude=360.0
|
| 190 |
+
)
|
| 191 |
+
assert args.min_latitude == -90.0
|
| 192 |
+
assert args.max_latitude == 90.0
|
| 193 |
+
|
| 194 |
+
def test_era5_args_query_type_field(self):
|
| 195 |
+
"""Test that ERA5 args handles optional query_type correctly."""
|
| 196 |
+
from eurus.tools.era5 import ERA5RetrievalArgs
|
| 197 |
+
args = ERA5RetrievalArgs(
|
| 198 |
+
variable_id="sst",
|
| 199 |
+
start_date="2023-01-01",
|
| 200 |
+
end_date="2023-12-31",
|
| 201 |
+
min_latitude=20.0,
|
| 202 |
+
max_latitude=30.0,
|
| 203 |
+
min_longitude=260.0,
|
| 204 |
+
max_longitude=280.0
|
| 205 |
+
)
|
| 206 |
+
# Just verify args created successfully
|
| 207 |
+
assert args.variable_id == "sst"
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
# ============================================================================
|
| 211 |
+
# CONFIG EXTENDED TESTS
|
| 212 |
+
# ============================================================================
|
| 213 |
+
|
| 214 |
+
class TestConfigRegions:
|
| 215 |
+
"""Tests for region configuration."""
|
| 216 |
+
|
| 217 |
+
def test_get_region_valid(self):
|
| 218 |
+
"""Test getting valid predefined region."""
|
| 219 |
+
from eurus.config import get_region
|
| 220 |
+
region = get_region("gulf_of_mexico")
|
| 221 |
+
assert region is not None
|
| 222 |
+
assert hasattr(region, 'min_lat')
|
| 223 |
+
assert hasattr(region, 'max_lat')
|
| 224 |
+
|
| 225 |
+
def test_get_region_case_insensitive(self):
|
| 226 |
+
"""Test region lookup is case insensitive."""
|
| 227 |
+
from eurus.config import get_region
|
| 228 |
+
region = get_region("GULF_OF_MEXICO")
|
| 229 |
+
assert region is not None
|
| 230 |
+
|
| 231 |
+
def test_list_regions_output(self):
|
| 232 |
+
"""Test list_regions returns formatted string."""
|
| 233 |
+
from eurus.config import list_regions
|
| 234 |
+
output = list_regions()
|
| 235 |
+
assert "gulf" in output.lower() or "region" in output.lower()
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
# ============================================================================
|
| 239 |
+
# MEMORY MODULE INTEGRATION
|
| 240 |
+
# ============================================================================
|
| 241 |
+
|
| 242 |
+
class TestMemoryIntegration:
|
| 243 |
+
"""Integration tests for memory management."""
|
| 244 |
+
|
| 245 |
+
def test_memory_manager_create(self):
|
| 246 |
+
"""Test MemoryManager can be created."""
|
| 247 |
+
from eurus.memory import MemoryManager, reset_memory
|
| 248 |
+
reset_memory()
|
| 249 |
+
mm = MemoryManager()
|
| 250 |
+
assert mm is not None
|
| 251 |
+
|
| 252 |
+
def test_memory_add_conversation(self):
|
| 253 |
+
"""Test adding to conversation history."""
|
| 254 |
+
from eurus.memory import MemoryManager, reset_memory
|
| 255 |
+
reset_memory()
|
| 256 |
+
mm = MemoryManager()
|
| 257 |
+
mm.add_message("user", "Hello")
|
| 258 |
+
history = mm.get_conversation_history()
|
| 259 |
+
assert len(history) >= 1
|
| 260 |
+
|
| 261 |
+
def test_memory_dataset_registration(self):
|
| 262 |
+
"""Test dataset registration."""
|
| 263 |
+
from eurus.memory import MemoryManager, reset_memory
|
| 264 |
+
reset_memory()
|
| 265 |
+
mm = MemoryManager()
|
| 266 |
+
mm.register_dataset(
|
| 267 |
+
path="/tmp/test.zarr",
|
| 268 |
+
variable="sst",
|
| 269 |
+
query_type="temporal",
|
| 270 |
+
start_date="2023-01-01",
|
| 271 |
+
end_date="2023-12-31",
|
| 272 |
+
lat_bounds=(20.0, 30.0),
|
| 273 |
+
lon_bounds=(260.0, 280.0),
|
| 274 |
+
file_size_bytes=1024
|
| 275 |
+
)
|
| 276 |
+
datasets = mm.list_datasets()
|
| 277 |
+
assert len(datasets) >= 1
|
| 278 |
+
|
| 279 |
+
|
| 280 |
+
# ============================================================================
|
| 281 |
+
# ROUTING TOOL EXTENDED TESTS
|
| 282 |
+
# ============================================================================
|
| 283 |
+
|
| 284 |
+
class TestRoutingTool:
|
| 285 |
+
"""Extended tests for routing functionality."""
|
| 286 |
+
|
| 287 |
+
def test_routing_tool_exists(self):
|
| 288 |
+
"""Test routing tool can be imported."""
|
| 289 |
+
from eurus.tools.routing import routing_tool
|
| 290 |
+
assert routing_tool is not None
|
| 291 |
+
assert routing_tool.name == "calculate_maritime_route"
|
| 292 |
+
|
| 293 |
+
def test_has_routing_deps_flag(self):
|
| 294 |
+
"""Test HAS_ROUTING_DEPS flag exists."""
|
| 295 |
+
from eurus.tools.routing import HAS_ROUTING_DEPS
|
| 296 |
+
assert isinstance(HAS_ROUTING_DEPS, bool)
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
# ============================================================================
|
| 300 |
+
# REPL TOOL COMPREHENSIVE SECURITY TESTS
|
| 301 |
+
# ============================================================================
|
| 302 |
+
|
| 303 |
+
class TestREPLSecurityComprehensive:
|
| 304 |
+
"""REPL tests — Docker is the sandbox, all imports allowed."""
|
| 305 |
+
|
| 306 |
+
def test_repl_allows_sys(self):
|
| 307 |
+
"""Test REPL allows sys module (Docker sandbox)."""
|
| 308 |
+
from eurus.tools.repl import PythonREPLTool
|
| 309 |
+
repl = PythonREPLTool()
|
| 310 |
+
result = repl._run("import sys; print(sys.version_info.major)")
|
| 311 |
+
assert result is not None
|
| 312 |
+
assert "Error" not in result
|
| 313 |
+
repl.close()
|
| 314 |
+
|
| 315 |
+
def test_repl_allows_os(self):
|
| 316 |
+
"""Test REPL allows os module (Docker sandbox)."""
|
| 317 |
+
from eurus.tools.repl import PythonREPLTool
|
| 318 |
+
repl = PythonREPLTool()
|
| 319 |
+
result = repl._run("import os; print(os.getcwd())")
|
| 320 |
+
assert result is not None
|
| 321 |
+
assert "Error" not in result
|
| 322 |
+
repl.close()
|
| 323 |
+
|
| 324 |
+
def test_repl_allows_xarray(self):
|
| 325 |
+
"""Test REPL allows xarray operations."""
|
| 326 |
+
from eurus.tools.repl import PythonREPLTool
|
| 327 |
+
repl = PythonREPLTool()
|
| 328 |
+
result = repl._run("import xarray as xr; print(type(xr))")
|
| 329 |
+
assert "module" in result.lower() or "xarray" in result.lower()
|
| 330 |
+
repl.close()
|
| 331 |
+
|
| 332 |
+
def test_repl_allows_pandas(self):
|
| 333 |
+
"""Test REPL allows pandas operations."""
|
| 334 |
+
from eurus.tools.repl import PythonREPLTool
|
| 335 |
+
repl = PythonREPLTool()
|
| 336 |
+
result = repl._run("import pandas as pd; print(pd.DataFrame({'a': [1, 2]}))")
|
| 337 |
+
assert "Error" not in result
|
| 338 |
+
repl.close()
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
|
| 342 |
+
# ============================================================================
|
| 343 |
+
# EDGE CASES AND ERROR HANDLING
|
| 344 |
+
# ============================================================================
|
| 345 |
+
|
| 346 |
+
class TestEdgeCases:
|
| 347 |
+
"""Tests for edge cases and error handling."""
|
| 348 |
+
|
| 349 |
+
def test_get_short_name_unknown(self):
|
| 350 |
+
"""Test get_short_name with unknown variable returns input."""
|
| 351 |
+
from eurus.config import get_short_name
|
| 352 |
+
result = get_short_name("completely_unknown_variable_xyz")
|
| 353 |
+
# Should return the input as-is for unknown variables
|
| 354 |
+
assert "completely_unknown_variable_xyz" in result or result is not None
|
| 355 |
+
|
| 356 |
+
def test_variable_info_none_for_unknown(self):
|
| 357 |
+
"""Test get_variable_info returns None for unknown."""
|
| 358 |
+
from eurus.config import get_variable_info
|
| 359 |
+
result = get_variable_info("unknown_var_xyz")
|
| 360 |
+
assert result is None
|
| 361 |
+
|
| 362 |
+
def test_era5_tool_has_description(self):
|
| 363 |
+
"""Test ERA5 tool has comprehensive description."""
|
| 364 |
+
from eurus.tools.era5 import era5_tool
|
| 365 |
+
assert len(era5_tool.description) > 100
|
web/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Eurus Web Interface
|
| 3 |
+
====================
|
| 4 |
+
A browser-based chat interface for the Eurus Climate Agent.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
__version__ = "1.0.0"
|
web/agent_wrapper.py
ADDED
|
@@ -0,0 +1,306 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent Wrapper for Web Interface
|
| 3 |
+
===============================
|
| 4 |
+
Wraps the LangChain agent for WebSocket streaming.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import asyncio
|
| 10 |
+
import logging
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
from typing import Optional, Callable, Any, List, Dict
|
| 13 |
+
from queue import Queue
|
| 14 |
+
|
| 15 |
+
# Add src directory to path for eurus package
|
| 16 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 17 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 18 |
+
sys.path.insert(0, str(PROJECT_ROOT / "src"))
|
| 19 |
+
|
| 20 |
+
from dotenv import load_dotenv
|
| 21 |
+
load_dotenv()
|
| 22 |
+
|
| 23 |
+
from langchain_openai import ChatOpenAI
|
| 24 |
+
from langchain.agents import create_agent
|
| 25 |
+
|
| 26 |
+
# IMPORT FROM EURUS PACKAGE - SINGLE SOURCE OF TRUTH
|
| 27 |
+
from eurus.config import CONFIG, AGENT_SYSTEM_PROMPT
|
| 28 |
+
from eurus.memory import get_memory, SmartConversationMemory # Singleton for datasets, per-session for chat
|
| 29 |
+
from eurus.tools import get_all_tools
|
| 30 |
+
from eurus.tools.repl import PythonREPLTool
|
| 31 |
+
|
| 32 |
+
logger = logging.getLogger(__name__)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
class AgentSession:
|
| 36 |
+
"""
|
| 37 |
+
Manages a single agent session with streaming support.
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
def __init__(self, api_keys: Optional[Dict[str, str]] = None):
|
| 41 |
+
self._agent = None
|
| 42 |
+
self._repl_tool: Optional[PythonREPLTool] = None
|
| 43 |
+
self._messages: List[Dict] = []
|
| 44 |
+
self._initialized = False
|
| 45 |
+
self._api_keys = api_keys or {}
|
| 46 |
+
|
| 47 |
+
# Global singleton keeps the dataset cache (shared across sessions)
|
| 48 |
+
self._memory = get_memory()
|
| 49 |
+
# Per-session conversation memory — never touches other sessions
|
| 50 |
+
self._conversation = SmartConversationMemory()
|
| 51 |
+
|
| 52 |
+
# Queue for captured plots (thread-safe)
|
| 53 |
+
self._plot_queue: Queue = Queue()
|
| 54 |
+
|
| 55 |
+
self._initialize()
|
| 56 |
+
|
| 57 |
+
def _initialize(self):
|
| 58 |
+
"""Initialize the agent and tools."""
|
| 59 |
+
logger.info("Initializing agent session...")
|
| 60 |
+
|
| 61 |
+
# Resolve API keys: user-provided take priority over env vars
|
| 62 |
+
openai_key = self._api_keys.get("openai_api_key") or os.environ.get("OPENAI_API_KEY")
|
| 63 |
+
arraylake_key = self._api_keys.get("arraylake_api_key") or os.environ.get("ARRAYLAKE_API_KEY")
|
| 64 |
+
|
| 65 |
+
if not arraylake_key:
|
| 66 |
+
logger.warning("ARRAYLAKE_API_KEY not found")
|
| 67 |
+
elif not os.environ.get("ARRAYLAKE_API_KEY"):
|
| 68 |
+
# Only set env var if not already configured (avoid overwriting
|
| 69 |
+
# server-configured keys with user-provided ones in multi-user scenarios)
|
| 70 |
+
os.environ["ARRAYLAKE_API_KEY"] = arraylake_key
|
| 71 |
+
|
| 72 |
+
if not openai_key:
|
| 73 |
+
logger.error("OPENAI_API_KEY not found")
|
| 74 |
+
return
|
| 75 |
+
|
| 76 |
+
try:
|
| 77 |
+
# Initialize REPL tool with working directory
|
| 78 |
+
logger.info("Starting Python kernel...")
|
| 79 |
+
self._repl_tool = PythonREPLTool(working_dir=os.getcwd())
|
| 80 |
+
|
| 81 |
+
# Set up plot callback using the proper method
|
| 82 |
+
def on_plot_captured(base64_data: str, filepath: str, code: str = ""):
|
| 83 |
+
logger.info(f"Plot captured, adding to queue: {filepath}")
|
| 84 |
+
self._plot_queue.put((base64_data, filepath, code))
|
| 85 |
+
|
| 86 |
+
self._repl_tool.set_plot_callback(on_plot_captured)
|
| 87 |
+
logger.info("Plot callback registered")
|
| 88 |
+
|
| 89 |
+
# Get ALL tools from centralized registry (no SCIENCE_TOOLS!)
|
| 90 |
+
tools = get_all_tools(enable_routing=True, enable_guide=True)
|
| 91 |
+
# Replace the default REPL with our configured one
|
| 92 |
+
tools = [t for t in tools if t.name != "python_repl"] + [self._repl_tool]
|
| 93 |
+
|
| 94 |
+
# Initialize LLM with resolved key
|
| 95 |
+
logger.info("Connecting to LLM...")
|
| 96 |
+
llm = ChatOpenAI(
|
| 97 |
+
model=CONFIG.model_name,
|
| 98 |
+
temperature=CONFIG.temperature,
|
| 99 |
+
api_key=openai_key,
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Use session-local memory for datasets (NOT global!)
|
| 103 |
+
datasets = self._memory.list_datasets()
|
| 104 |
+
enhanced_prompt = AGENT_SYSTEM_PROMPT
|
| 105 |
+
|
| 106 |
+
if datasets != "No datasets in cache.":
|
| 107 |
+
enhanced_prompt += f"\n\n## CACHED DATASETS\n{datasets}"
|
| 108 |
+
|
| 109 |
+
# Create agent
|
| 110 |
+
logger.info("Creating agent...")
|
| 111 |
+
self._agent = create_agent(
|
| 112 |
+
model=llm,
|
| 113 |
+
tools=tools,
|
| 114 |
+
system_prompt=enhanced_prompt,
|
| 115 |
+
debug=False
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# FRESH conversation - no old messages!
|
| 119 |
+
self._messages = []
|
| 120 |
+
|
| 121 |
+
self._initialized = True
|
| 122 |
+
logger.info("Agent session initialized successfully")
|
| 123 |
+
|
| 124 |
+
except Exception as e:
|
| 125 |
+
logger.exception(f"Failed to initialize agent: {e}")
|
| 126 |
+
self._initialized = False
|
| 127 |
+
|
| 128 |
+
def is_ready(self) -> bool:
|
| 129 |
+
"""Check if the agent is ready."""
|
| 130 |
+
return self._initialized and self._agent is not None
|
| 131 |
+
|
| 132 |
+
def clear_messages(self):
|
| 133 |
+
"""Clear conversation messages."""
|
| 134 |
+
self._messages = []
|
| 135 |
+
|
| 136 |
+
def get_pending_plots(self) -> List[tuple]:
|
| 137 |
+
"""Get all pending plots from queue."""
|
| 138 |
+
plots = []
|
| 139 |
+
while not self._plot_queue.empty():
|
| 140 |
+
try:
|
| 141 |
+
plots.append(self._plot_queue.get_nowait())
|
| 142 |
+
except Exception:
|
| 143 |
+
break
|
| 144 |
+
return plots
|
| 145 |
+
|
| 146 |
+
async def process_message(
|
| 147 |
+
self,
|
| 148 |
+
user_message: str,
|
| 149 |
+
stream_callback: Callable
|
| 150 |
+
) -> str:
|
| 151 |
+
"""
|
| 152 |
+
Process a user message and stream the response.
|
| 153 |
+
"""
|
| 154 |
+
if not self.is_ready():
|
| 155 |
+
raise RuntimeError("Agent not initialized")
|
| 156 |
+
|
| 157 |
+
# Clear any old plots from queue
|
| 158 |
+
self.get_pending_plots()
|
| 159 |
+
|
| 160 |
+
# Add user message to history (session-local memory)
|
| 161 |
+
self._conversation.add_message("user", user_message)
|
| 162 |
+
self._messages.append({"role": "user", "content": user_message})
|
| 163 |
+
|
| 164 |
+
try:
|
| 165 |
+
# Send status: analyzing
|
| 166 |
+
await stream_callback("status", "🔍 Analyzing your request...")
|
| 167 |
+
await asyncio.sleep(0.3)
|
| 168 |
+
|
| 169 |
+
# Invoke the agent in executor (~15 tool calls max)
|
| 170 |
+
config = {"recursion_limit": 35}
|
| 171 |
+
|
| 172 |
+
# Stream status updates while agent is working
|
| 173 |
+
await stream_callback("status", "🤖 Processing with AI...")
|
| 174 |
+
|
| 175 |
+
result = await asyncio.get_event_loop().run_in_executor(
|
| 176 |
+
None,
|
| 177 |
+
lambda: self._agent.invoke({"messages": self._messages}, config=config)
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
# Update messages
|
| 181 |
+
self._messages = result["messages"]
|
| 182 |
+
|
| 183 |
+
# Parse messages to show tool calls made
|
| 184 |
+
tool_calls_made = []
|
| 185 |
+
for msg in self._messages:
|
| 186 |
+
if hasattr(msg, 'tool_calls') and msg.tool_calls:
|
| 187 |
+
for tc in msg.tool_calls:
|
| 188 |
+
tool_name = tc.get('name', 'unknown')
|
| 189 |
+
if tool_name not in tool_calls_made:
|
| 190 |
+
tool_calls_made.append(tool_name)
|
| 191 |
+
|
| 192 |
+
if tool_calls_made:
|
| 193 |
+
tools_str = ", ".join(tool_calls_made)
|
| 194 |
+
await stream_callback("status", f"🛠️ Used tools: {tools_str}")
|
| 195 |
+
await asyncio.sleep(0.5)
|
| 196 |
+
|
| 197 |
+
# Extract response
|
| 198 |
+
last_message = self._messages[-1]
|
| 199 |
+
|
| 200 |
+
if hasattr(last_message, 'content') and last_message.content:
|
| 201 |
+
response_text = last_message.content
|
| 202 |
+
elif isinstance(last_message, dict) and last_message.get('content'):
|
| 203 |
+
response_text = last_message['content']
|
| 204 |
+
else:
|
| 205 |
+
response_text = str(last_message)
|
| 206 |
+
|
| 207 |
+
# Send status: generating response
|
| 208 |
+
await stream_callback("status", "✍️ Generating response...")
|
| 209 |
+
await asyncio.sleep(0.2)
|
| 210 |
+
|
| 211 |
+
# Stream the response in chunks
|
| 212 |
+
chunk_size = 50
|
| 213 |
+
for i in range(0, len(response_text), chunk_size):
|
| 214 |
+
chunk = response_text[i:i + chunk_size]
|
| 215 |
+
await stream_callback("chunk", chunk)
|
| 216 |
+
await asyncio.sleep(0.01)
|
| 217 |
+
|
| 218 |
+
# Send any captured media (plots and videos)
|
| 219 |
+
plots = self.get_pending_plots()
|
| 220 |
+
# NOTE: Only use session-specific _plot_queue, NOT shared folder scan (privacy!)
|
| 221 |
+
|
| 222 |
+
if plots:
|
| 223 |
+
await stream_callback("status", f"📊 Rendering {len(plots)} visualization(s)...")
|
| 224 |
+
await asyncio.sleep(0.3)
|
| 225 |
+
|
| 226 |
+
logger.info(f"Sending {len(plots)} media items to client")
|
| 227 |
+
for plot_data in plots:
|
| 228 |
+
base64_data, filepath = plot_data[0], plot_data[1]
|
| 229 |
+
code = plot_data[2] if len(plot_data) > 2 else ""
|
| 230 |
+
|
| 231 |
+
# Determine if this is a video or image
|
| 232 |
+
ext = filepath.lower().split('.')[-1] if filepath else ''
|
| 233 |
+
if ext in ('gif',):
|
| 234 |
+
await stream_callback("video", "", data=base64_data, path=filepath, mimetype="image/gif")
|
| 235 |
+
elif ext in ('webm',):
|
| 236 |
+
await stream_callback("video", "", data=base64_data, path=filepath, mimetype="video/webm")
|
| 237 |
+
elif ext in ('mp4',):
|
| 238 |
+
await stream_callback("video", "", data=base64_data, path=filepath, mimetype="video/mp4")
|
| 239 |
+
else:
|
| 240 |
+
# Default to plot (png, jpg, etc.)
|
| 241 |
+
await stream_callback("plot", "", data=base64_data, path=filepath, code=code)
|
| 242 |
+
|
| 243 |
+
# Save to memory
|
| 244 |
+
self._conversation.add_message("assistant", response_text)
|
| 245 |
+
|
| 246 |
+
return response_text
|
| 247 |
+
|
| 248 |
+
except Exception as e:
|
| 249 |
+
logger.exception(f"Error processing message: {e}")
|
| 250 |
+
raise
|
| 251 |
+
|
| 252 |
+
def close(self):
|
| 253 |
+
"""Clean up resources."""
|
| 254 |
+
logger.info("Closing agent session...")
|
| 255 |
+
if self._repl_tool:
|
| 256 |
+
try:
|
| 257 |
+
self._repl_tool.close()
|
| 258 |
+
except Exception as e:
|
| 259 |
+
logger.error(f"Error closing REPL: {e}")
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
# Per-connection sessions (NOT global singleton!)
|
| 263 |
+
# Key: unique connection ID, Value: AgentSession
|
| 264 |
+
_sessions: Dict[str, AgentSession] = {}
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
def create_session(connection_id: str, api_keys: Optional[Dict[str, str]] = None) -> AgentSession:
|
| 268 |
+
"""Create a new session for a connection."""
|
| 269 |
+
if connection_id in _sessions:
|
| 270 |
+
# Close existing session first
|
| 271 |
+
_sessions[connection_id].close()
|
| 272 |
+
session = AgentSession(api_keys=api_keys)
|
| 273 |
+
_sessions[connection_id] = session
|
| 274 |
+
logger.info(f"Created session for connection: {connection_id}")
|
| 275 |
+
return session
|
| 276 |
+
|
| 277 |
+
|
| 278 |
+
def get_session(connection_id: str) -> Optional[AgentSession]:
|
| 279 |
+
"""Get session for a connection."""
|
| 280 |
+
return _sessions.get(connection_id)
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
def close_session(connection_id: str):
|
| 284 |
+
"""Close and remove session for a connection."""
|
| 285 |
+
if connection_id in _sessions:
|
| 286 |
+
_sessions[connection_id].close()
|
| 287 |
+
del _sessions[connection_id]
|
| 288 |
+
logger.info(f"Closed session for connection: {connection_id}")
|
| 289 |
+
|
| 290 |
+
|
| 291 |
+
# DEPRECATED: Keep for backward compatibility during migration
|
| 292 |
+
def get_agent_session() -> AgentSession:
|
| 293 |
+
"""DEPRECATED: Use create_session/get_session with connection_id instead."""
|
| 294 |
+
logger.warning("get_agent_session() is deprecated - use create_session(connection_id)")
|
| 295 |
+
# Create default session for CLI/testing
|
| 296 |
+
if "_default" not in _sessions:
|
| 297 |
+
_sessions["_default"] = AgentSession()
|
| 298 |
+
return _sessions["_default"]
|
| 299 |
+
|
| 300 |
+
|
| 301 |
+
def shutdown_agent_session():
|
| 302 |
+
"""Shutdown all agent sessions."""
|
| 303 |
+
count = len(_sessions)
|
| 304 |
+
for conn_id in list(_sessions.keys()):
|
| 305 |
+
close_session(conn_id)
|
| 306 |
+
logger.info(f"Shutdown {count} sessions")
|
web/app.py
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Eurus Web Application
|
| 3 |
+
======================
|
| 4 |
+
FastAPI application factory and main entry point.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
import logging
|
| 10 |
+
from pathlib import Path
|
| 11 |
+
from contextlib import asynccontextmanager
|
| 12 |
+
|
| 13 |
+
from fastapi import FastAPI
|
| 14 |
+
from fastapi.staticfiles import StaticFiles
|
| 15 |
+
from fastapi.templating import Jinja2Templates
|
| 16 |
+
|
| 17 |
+
# Add parent and src directory to path for eurus package
|
| 18 |
+
PROJECT_ROOT = Path(__file__).parent.parent
|
| 19 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 20 |
+
sys.path.insert(0, str(PROJECT_ROOT / "src"))
|
| 21 |
+
|
| 22 |
+
# IMPORT FROM EURUS PACKAGE
|
| 23 |
+
from eurus.config import CONFIG, PLOTS_DIR
|
| 24 |
+
|
| 25 |
+
# Configure logging
|
| 26 |
+
logging.basicConfig(
|
| 27 |
+
level=logging.INFO,
|
| 28 |
+
format='%(asctime)s | %(levelname)s | %(name)s | %(message)s',
|
| 29 |
+
datefmt='%H:%M:%S'
|
| 30 |
+
)
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
# Paths
|
| 34 |
+
WEB_DIR = Path(__file__).parent
|
| 35 |
+
TEMPLATES_DIR = WEB_DIR / "templates"
|
| 36 |
+
STATIC_DIR = WEB_DIR / "static"
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
@asynccontextmanager
|
| 40 |
+
async def lifespan(app: FastAPI):
|
| 41 |
+
"""Application lifespan handler for startup/shutdown."""
|
| 42 |
+
# Startup
|
| 43 |
+
logger.info("Starting Eurus Web Interface...")
|
| 44 |
+
logger.info(f"Templates: {TEMPLATES_DIR}")
|
| 45 |
+
logger.info(f"Static files: {STATIC_DIR}")
|
| 46 |
+
logger.info(f"Plots directory: {PLOTS_DIR}")
|
| 47 |
+
|
| 48 |
+
# Sessions are created per-connection in websocket.py
|
| 49 |
+
logger.info("Ready to accept connections")
|
| 50 |
+
|
| 51 |
+
yield
|
| 52 |
+
|
| 53 |
+
# Shutdown
|
| 54 |
+
logger.info("Shutting down Eurus Web Interface...")
|
| 55 |
+
from web.agent_wrapper import shutdown_agent_session
|
| 56 |
+
shutdown_agent_session()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def create_app() -> FastAPI:
|
| 60 |
+
"""Create and configure the FastAPI application."""
|
| 61 |
+
|
| 62 |
+
app = FastAPI(
|
| 63 |
+
title="Eurus Climate Agent",
|
| 64 |
+
description="Interactive web interface for ERA5 climate data analysis",
|
| 65 |
+
version="1.0.0",
|
| 66 |
+
lifespan=lifespan,
|
| 67 |
+
)
|
| 68 |
+
|
| 69 |
+
# Mount static files
|
| 70 |
+
app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
|
| 71 |
+
|
| 72 |
+
# Mount plots directory for serving generated plots
|
| 73 |
+
PLOTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 74 |
+
app.mount("/plots", StaticFiles(directory=str(PLOTS_DIR)), name="plots")
|
| 75 |
+
|
| 76 |
+
# Include routers
|
| 77 |
+
from web.routes import api_router, websocket_router, pages_router
|
| 78 |
+
|
| 79 |
+
app.include_router(api_router, prefix="/api", tags=["api"])
|
| 80 |
+
app.include_router(websocket_router, tags=["websocket"])
|
| 81 |
+
app.include_router(pages_router, tags=["pages"])
|
| 82 |
+
|
| 83 |
+
return app
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
# Create the app instance
|
| 87 |
+
app = create_app()
|
| 88 |
+
|
| 89 |
+
|
| 90 |
+
def main():
|
| 91 |
+
"""Main entry point for running the web server."""
|
| 92 |
+
import uvicorn
|
| 93 |
+
|
| 94 |
+
host = getattr(CONFIG, 'web_host', '127.0.0.1')
|
| 95 |
+
port = getattr(CONFIG, 'web_port', 8000)
|
| 96 |
+
|
| 97 |
+
print(f"""
|
| 98 |
+
╔═══════════════════════════════════════════════════════════════════════════╗
|
| 99 |
+
║ ║
|
| 100 |
+
║ ██╗ ██╗ ██████╗ ███████╗████████╗ ██████╗ ██╗ ██╗ ║
|
| 101 |
+
║ ██║ ██║██╔═══██╗██╔════╝╚══██╔══╝██╔═══██╗██║ ██╔╝ ║
|
| 102 |
+
║ ██║ ██║██║ ██║███████╗ ██║ ██║ ██║█████╔╝ ║
|
| 103 |
+
║ ╚██╗ ██╔╝██║ ██║╚════██║ ██║ ██║ ██║██╔═██╗ ║
|
| 104 |
+
║ ╚████╔╝ ╚██████╔╝███████║ ██║ ╚██████╔╝██║ ██╗ ║
|
| 105 |
+
║ ╚═══╝ ╚═════╝ ╚══════╝ ╚═╝ ╚═════╝ ╚═╝ ╚═╝ ║
|
| 106 |
+
║ ║
|
| 107 |
+
║ Eurus Web Interface v1.0 ║
|
| 108 |
+
║ ───────────────────────────────────── ║
|
| 109 |
+
║ ║
|
| 110 |
+
║ Starting server at: http://{host}:{port} ║
|
| 111 |
+
║ ║
|
| 112 |
+
╚═══════════════════════════════════════════════════════════════════════════╝
|
| 113 |
+
""")
|
| 114 |
+
|
| 115 |
+
uvicorn.run(
|
| 116 |
+
"web.app:app",
|
| 117 |
+
host=host,
|
| 118 |
+
port=port,
|
| 119 |
+
reload=False,
|
| 120 |
+
log_level="info",
|
| 121 |
+
)
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
if __name__ == "__main__":
|
| 125 |
+
main()
|
web/routes/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Web routes package."""
|
| 2 |
+
|
| 3 |
+
from .api import router as api_router
|
| 4 |
+
from .websocket import router as websocket_router
|
| 5 |
+
from .pages import router as pages_router
|
| 6 |
+
|
| 7 |
+
__all__ = ["api_router", "websocket_router", "pages_router"]
|
web/routes/api.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
REST API Routes
|
| 3 |
+
===============
|
| 4 |
+
Health checks, cache management, and configuration endpoints.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import os
|
| 8 |
+
import sys
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
from typing import List, Dict, Any
|
| 11 |
+
|
| 12 |
+
from fastapi import APIRouter, HTTPException
|
| 13 |
+
from pydantic import BaseModel
|
| 14 |
+
|
| 15 |
+
# Add project root and src/ to path for eurus package
|
| 16 |
+
PROJECT_ROOT = Path(__file__).parent.parent.parent
|
| 17 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
| 18 |
+
sys.path.insert(0, str(PROJECT_ROOT / "src"))
|
| 19 |
+
|
| 20 |
+
# IMPORT FROM EURUS PACKAGE
|
| 21 |
+
from eurus.config import CONFIG, ERA5_VARIABLES, GEOGRAPHIC_REGIONS
|
| 22 |
+
|
| 23 |
+
router = APIRouter()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class HealthResponse(BaseModel):
|
| 27 |
+
status: str
|
| 28 |
+
version: str
|
| 29 |
+
agent_ready: bool
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class DatasetInfo(BaseModel):
|
| 33 |
+
variable: str
|
| 34 |
+
query_type: str
|
| 35 |
+
start_date: str
|
| 36 |
+
end_date: str
|
| 37 |
+
lat_bounds: tuple
|
| 38 |
+
lon_bounds: tuple
|
| 39 |
+
file_size_bytes: int
|
| 40 |
+
path: str
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
class CacheResponse(BaseModel):
|
| 44 |
+
datasets: List[Dict[str, Any]]
|
| 45 |
+
total_size_bytes: int
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class ConfigResponse(BaseModel):
|
| 49 |
+
variables: List[Dict[str, str]]
|
| 50 |
+
regions: List[str]
|
| 51 |
+
model: str
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
@router.get("/keys-status")
|
| 55 |
+
async def keys_status():
|
| 56 |
+
"""Check which API keys are configured via environment variables."""
|
| 57 |
+
return {
|
| 58 |
+
"openai": bool(os.environ.get("OPENAI_API_KEY")),
|
| 59 |
+
"arraylake": bool(os.environ.get("ARRAYLAKE_API_KEY")),
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
@router.get("/health", response_model=HealthResponse)
|
| 64 |
+
async def health_check():
|
| 65 |
+
"""Check if the server and agent are healthy."""
|
| 66 |
+
from web.agent_wrapper import get_agent_session
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
session = get_agent_session()
|
| 70 |
+
agent_ready = session is not None and session.is_ready()
|
| 71 |
+
except Exception:
|
| 72 |
+
agent_ready = False
|
| 73 |
+
|
| 74 |
+
return HealthResponse(
|
| 75 |
+
status="ok",
|
| 76 |
+
version="1.0.0",
|
| 77 |
+
agent_ready=agent_ready
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
@router.get("/cache", response_model=CacheResponse)
|
| 82 |
+
async def list_cache():
|
| 83 |
+
"""List all cached datasets."""
|
| 84 |
+
from eurus.memory import get_memory
|
| 85 |
+
|
| 86 |
+
memory = get_memory()
|
| 87 |
+
datasets = []
|
| 88 |
+
total_size = 0
|
| 89 |
+
|
| 90 |
+
for path, record in memory.datasets.items():
|
| 91 |
+
if os.path.exists(path):
|
| 92 |
+
size = record.file_size_bytes
|
| 93 |
+
if size == 0:
|
| 94 |
+
# Calculate size if not recorded
|
| 95 |
+
if os.path.isdir(path):
|
| 96 |
+
size = sum(
|
| 97 |
+
os.path.getsize(os.path.join(dp, f))
|
| 98 |
+
for dp, _, files in os.walk(path)
|
| 99 |
+
for f in files
|
| 100 |
+
)
|
| 101 |
+
else:
|
| 102 |
+
size = os.path.getsize(path)
|
| 103 |
+
|
| 104 |
+
datasets.append({
|
| 105 |
+
"variable": record.variable,
|
| 106 |
+
"query_type": record.query_type,
|
| 107 |
+
"start_date": record.start_date,
|
| 108 |
+
"end_date": record.end_date,
|
| 109 |
+
"lat_bounds": record.lat_bounds,
|
| 110 |
+
"lon_bounds": record.lon_bounds,
|
| 111 |
+
"file_size_bytes": size,
|
| 112 |
+
"path": path
|
| 113 |
+
})
|
| 114 |
+
total_size += size
|
| 115 |
+
|
| 116 |
+
return CacheResponse(datasets=datasets, total_size_bytes=total_size)
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
@router.get("/config", response_model=ConfigResponse)
|
| 120 |
+
async def get_config():
|
| 121 |
+
"""Get available variables and regions."""
|
| 122 |
+
# Get unique variables
|
| 123 |
+
seen_vars = set()
|
| 124 |
+
variables = []
|
| 125 |
+
for var_id, var_info in ERA5_VARIABLES.items():
|
| 126 |
+
if var_info.short_name not in seen_vars:
|
| 127 |
+
seen_vars.add(var_info.short_name)
|
| 128 |
+
variables.append({
|
| 129 |
+
"name": var_info.short_name,
|
| 130 |
+
"long_name": var_info.long_name,
|
| 131 |
+
"units": var_info.units,
|
| 132 |
+
"description": var_info.description
|
| 133 |
+
})
|
| 134 |
+
|
| 135 |
+
regions = list(GEOGRAPHIC_REGIONS.keys())
|
| 136 |
+
|
| 137 |
+
return ConfigResponse(
|
| 138 |
+
variables=variables,
|
| 139 |
+
regions=regions,
|
| 140 |
+
model=CONFIG.model_name
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
|
| 144 |
+
@router.delete("/conversation")
|
| 145 |
+
async def clear_conversation():
|
| 146 |
+
"""Clear the conversation history."""
|
| 147 |
+
from eurus.memory import get_memory
|
| 148 |
+
from web.agent_wrapper import get_agent_session
|
| 149 |
+
|
| 150 |
+
memory = get_memory()
|
| 151 |
+
memory.clear_conversation()
|
| 152 |
+
|
| 153 |
+
# Also clear the agent session messages
|
| 154 |
+
session = get_agent_session()
|
| 155 |
+
if session:
|
| 156 |
+
session.clear_messages()
|
| 157 |
+
|
| 158 |
+
return {"status": "ok", "message": "Conversation cleared"}
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
@router.get("/memory")
|
| 162 |
+
async def get_memory_summary():
|
| 163 |
+
"""Get memory summary."""
|
| 164 |
+
from eurus.memory import get_memory
|
| 165 |
+
|
| 166 |
+
memory = get_memory()
|
| 167 |
+
|
| 168 |
+
return {
|
| 169 |
+
"conversation_count": len(memory.conversations),
|
| 170 |
+
"dataset_count": len([p for p in memory.datasets if os.path.exists(p)]),
|
| 171 |
+
"analysis_count": len(memory.analyses),
|
| 172 |
+
"context_summary": memory.get_context_summary()
|
| 173 |
+
}
|
web/routes/pages.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Page Routes
|
| 3 |
+
===========
|
| 4 |
+
HTML page rendering endpoints.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import sys
|
| 8 |
+
from pathlib import Path
|
| 9 |
+
|
| 10 |
+
from fastapi import APIRouter, Request
|
| 11 |
+
from fastapi.responses import HTMLResponse
|
| 12 |
+
from fastapi.templating import Jinja2Templates
|
| 13 |
+
|
| 14 |
+
# Templates directory
|
| 15 |
+
TEMPLATES_DIR = Path(__file__).parent.parent / "templates"
|
| 16 |
+
templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
|
| 17 |
+
|
| 18 |
+
router = APIRouter()
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
@router.get("/", response_class=HTMLResponse)
|
| 22 |
+
async def index(request: Request):
|
| 23 |
+
"""Render the main chat page."""
|
| 24 |
+
return templates.TemplateResponse(
|
| 25 |
+
"index.html",
|
| 26 |
+
{"request": request}
|
| 27 |
+
)
|
web/routes/websocket.py
ADDED
|
@@ -0,0 +1,131 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
WebSocket Chat Handler
|
| 3 |
+
======================
|
| 4 |
+
Handles real-time chat via WebSocket with streaming responses.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
import json
|
| 8 |
+
import asyncio
|
| 9 |
+
import logging
|
| 10 |
+
from typing import Optional
|
| 11 |
+
|
| 12 |
+
from fastapi import APIRouter, WebSocket, WebSocketDisconnect
|
| 13 |
+
|
| 14 |
+
router = APIRouter()
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
class ConnectionManager:
|
| 19 |
+
"""Manages WebSocket connections."""
|
| 20 |
+
|
| 21 |
+
def __init__(self):
|
| 22 |
+
self.active_connections: list[WebSocket] = []
|
| 23 |
+
|
| 24 |
+
async def connect(self, websocket: WebSocket):
|
| 25 |
+
await websocket.accept()
|
| 26 |
+
self.active_connections.append(websocket)
|
| 27 |
+
logger.info(f"WebSocket connected. Total: {len(self.active_connections)}")
|
| 28 |
+
|
| 29 |
+
def disconnect(self, websocket: WebSocket):
|
| 30 |
+
if websocket in self.active_connections:
|
| 31 |
+
self.active_connections.remove(websocket)
|
| 32 |
+
logger.info(f"WebSocket disconnected. Total: {len(self.active_connections)}")
|
| 33 |
+
|
| 34 |
+
async def send_json(self, websocket: WebSocket, data: dict):
|
| 35 |
+
try:
|
| 36 |
+
await websocket.send_json(data)
|
| 37 |
+
except Exception as e:
|
| 38 |
+
logger.error(f"Failed to send message: {e}")
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
manager = ConnectionManager()
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
@router.websocket("/ws/chat")
|
| 45 |
+
async def websocket_chat(websocket: WebSocket):
|
| 46 |
+
"""WebSocket endpoint for chat."""
|
| 47 |
+
import uuid
|
| 48 |
+
connection_id = str(uuid.uuid4()) # Unique ID for this connection
|
| 49 |
+
|
| 50 |
+
await manager.connect(websocket)
|
| 51 |
+
logger.info(f"New connection: {connection_id}")
|
| 52 |
+
|
| 53 |
+
try:
|
| 54 |
+
# Session created lazily after we receive API keys
|
| 55 |
+
from web.agent_wrapper import create_session, get_session, close_session
|
| 56 |
+
session = None
|
| 57 |
+
|
| 58 |
+
while True:
|
| 59 |
+
data = await websocket.receive_json()
|
| 60 |
+
message = data.get("message", "").strip()
|
| 61 |
+
|
| 62 |
+
# Handle API key configuration from client
|
| 63 |
+
if data.get("type") == "configure_keys":
|
| 64 |
+
api_keys = {
|
| 65 |
+
"openai_api_key": data.get("openai_api_key", ""),
|
| 66 |
+
"arraylake_api_key": data.get("arraylake_api_key", ""),
|
| 67 |
+
}
|
| 68 |
+
session = create_session(connection_id, api_keys=api_keys)
|
| 69 |
+
ready = session.is_ready()
|
| 70 |
+
await manager.send_json(websocket, {
|
| 71 |
+
"type": "keys_configured",
|
| 72 |
+
"ready": ready,
|
| 73 |
+
})
|
| 74 |
+
continue
|
| 75 |
+
|
| 76 |
+
# Create default session if not yet created (keys from env)
|
| 77 |
+
if session is None:
|
| 78 |
+
session = create_session(connection_id)
|
| 79 |
+
|
| 80 |
+
if not message:
|
| 81 |
+
continue
|
| 82 |
+
|
| 83 |
+
logger.info(f"[{connection_id[:8]}] Received: {message[:100]}...")
|
| 84 |
+
|
| 85 |
+
# Handle /clear command — clear session memory + UI
|
| 86 |
+
if message.strip() == "/clear":
|
| 87 |
+
session = get_session(connection_id)
|
| 88 |
+
if session:
|
| 89 |
+
session.clear_messages()
|
| 90 |
+
await manager.send_json(websocket, {"type": "clear"})
|
| 91 |
+
continue
|
| 92 |
+
|
| 93 |
+
# Send thinking indicator
|
| 94 |
+
await manager.send_json(websocket, {"type": "thinking"})
|
| 95 |
+
|
| 96 |
+
try:
|
| 97 |
+
# Get session for this connection
|
| 98 |
+
session = get_session(connection_id)
|
| 99 |
+
if not session:
|
| 100 |
+
raise RuntimeError("Session not found")
|
| 101 |
+
|
| 102 |
+
# Callback for streaming
|
| 103 |
+
async def stream_callback(event_type: str, content: str, **kwargs):
|
| 104 |
+
msg = {"type": event_type, "content": content}
|
| 105 |
+
msg.update(kwargs)
|
| 106 |
+
await manager.send_json(websocket, msg)
|
| 107 |
+
|
| 108 |
+
# Process message
|
| 109 |
+
response = await session.process_message(message, stream_callback)
|
| 110 |
+
|
| 111 |
+
# Send complete
|
| 112 |
+
await manager.send_json(websocket, {
|
| 113 |
+
"type": "complete",
|
| 114 |
+
"content": response
|
| 115 |
+
})
|
| 116 |
+
|
| 117 |
+
except Exception as e:
|
| 118 |
+
logger.exception(f"Error: {e}")
|
| 119 |
+
await manager.send_json(websocket, {
|
| 120 |
+
"type": "error",
|
| 121 |
+
"content": str(e)
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
+
except WebSocketDisconnect:
|
| 125 |
+
logger.info(f"Connection {connection_id[:8]} disconnected")
|
| 126 |
+
manager.disconnect(websocket)
|
| 127 |
+
close_session(connection_id) # Clean up session
|
| 128 |
+
except Exception as e:
|
| 129 |
+
logger.exception(f"WebSocket error: {e}")
|
| 130 |
+
manager.disconnect(websocket)
|
| 131 |
+
close_session(connection_id) # Clean up session
|
web/static/css/style.css
ADDED
|
@@ -0,0 +1,854 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/* Eurus - Premium Interface with Eye Comfort */
|
| 2 |
+
/* Inspired by Google Material, Apple HIG, and modern design systems */
|
| 3 |
+
|
| 4 |
+
/* ===== DARK THEME (Refined Neosynth) ===== */
|
| 5 |
+
:root,
|
| 6 |
+
[data-theme="dark"] {
|
| 7 |
+
/* Softer dark base - not pure black for reduced eye strain */
|
| 8 |
+
--bg-primary: #0f1419;
|
| 9 |
+
--bg-secondary: #15202b;
|
| 10 |
+
--bg-tertiary: #1c2938;
|
| 11 |
+
|
| 12 |
+
/* Refined neon - softer cyan/purple, easier on eyes */
|
| 13 |
+
--accent-primary: #1d9bf0;
|
| 14 |
+
--accent-secondary: #8b5cf6;
|
| 15 |
+
--accent-tertiary: #22d3ee;
|
| 16 |
+
|
| 17 |
+
/* High contrast text - WCAG AA compliant */
|
| 18 |
+
--text-primary: #e7e9ea;
|
| 19 |
+
--text-secondary: #8899a6;
|
| 20 |
+
--text-muted: #5c6e7e;
|
| 21 |
+
|
| 22 |
+
/* Subtle glass effect */
|
| 23 |
+
--glass-bg: rgba(255, 255, 255, 0.04);
|
| 24 |
+
--glass-border: rgba(255, 255, 255, 0.1);
|
| 25 |
+
|
| 26 |
+
/* Messages */
|
| 27 |
+
--message-user-bg: linear-gradient(135deg, rgba(29, 155, 240, 0.12), rgba(139, 92, 246, 0.08));
|
| 28 |
+
--message-user-border: rgba(29, 155, 240, 0.25);
|
| 29 |
+
--message-assistant-bg: rgba(255, 255, 255, 0.04);
|
| 30 |
+
--message-assistant-border: rgba(255, 255, 255, 0.08);
|
| 31 |
+
|
| 32 |
+
/* Code - rich dark */
|
| 33 |
+
--code-bg: #0d1117;
|
| 34 |
+
|
| 35 |
+
/* Refined glow - subtle, not overwhelming */
|
| 36 |
+
--glow-primary: 0 2px 12px rgba(29, 155, 240, 0.2);
|
| 37 |
+
--glow-secondary: 0 2px 12px rgba(139, 92, 246, 0.15);
|
| 38 |
+
|
| 39 |
+
/* Subtle ambient gradient */
|
| 40 |
+
--bg-gradient:
|
| 41 |
+
radial-gradient(ellipse at 50% 0%, rgba(29, 155, 240, 0.04) 0%, transparent 50%);
|
| 42 |
+
|
| 43 |
+
/* Focus ring */
|
| 44 |
+
--focus-ring: 0 0 0 2px rgba(29, 155, 240, 0.5);
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
/* ===== LIGHT THEME (Clean & Professional) ===== */
|
| 48 |
+
[data-theme="light"] {
|
| 49 |
+
/* Warm white - easier than pure white */
|
| 50 |
+
--bg-primary: #f7f9fa;
|
| 51 |
+
--bg-secondary: #ffffff;
|
| 52 |
+
--bg-tertiary: #eff3f4;
|
| 53 |
+
|
| 54 |
+
/* Professional blue - Google-inspired */
|
| 55 |
+
--accent-primary: #1a73e8;
|
| 56 |
+
--accent-secondary: #5f6368;
|
| 57 |
+
--accent-tertiary: #34a853;
|
| 58 |
+
|
| 59 |
+
/* High contrast text */
|
| 60 |
+
--text-primary: #202124;
|
| 61 |
+
--text-secondary: #5f6368;
|
| 62 |
+
--text-muted: #9aa0a6;
|
| 63 |
+
|
| 64 |
+
/* Soft shadows, minimal borders */
|
| 65 |
+
--glass-bg: rgba(255, 255, 255, 0.9);
|
| 66 |
+
--glass-border: rgba(0, 0, 0, 0.08);
|
| 67 |
+
|
| 68 |
+
/* Messages - clean and minimal */
|
| 69 |
+
--message-user-bg: linear-gradient(135deg, rgba(26, 115, 232, 0.08), rgba(26, 115, 232, 0.04));
|
| 70 |
+
--message-user-border: rgba(26, 115, 232, 0.15);
|
| 71 |
+
--message-assistant-bg: #ffffff;
|
| 72 |
+
--message-assistant-border: rgba(0, 0, 0, 0.06);
|
| 73 |
+
|
| 74 |
+
/* Code - readable dark */
|
| 75 |
+
--code-bg: #1f2937;
|
| 76 |
+
|
| 77 |
+
/* Soft elevation shadows */
|
| 78 |
+
--glow-primary: 0 1px 3px rgba(0, 0, 0, 0.1), 0 4px 12px rgba(26, 115, 232, 0.08);
|
| 79 |
+
--glow-secondary: 0 1px 3px rgba(0, 0, 0, 0.1);
|
| 80 |
+
|
| 81 |
+
/* Clean background */
|
| 82 |
+
--bg-gradient: none;
|
| 83 |
+
|
| 84 |
+
/* Focus ring */
|
| 85 |
+
--focus-ring: 0 0 0 2px rgba(26, 115, 232, 0.4);
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
/* ===== BASE STYLES ===== */
|
| 89 |
+
* {
|
| 90 |
+
box-sizing: border-box;
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
body {
|
| 94 |
+
margin: 0;
|
| 95 |
+
padding: 0;
|
| 96 |
+
min-height: 100vh;
|
| 97 |
+
display: flex;
|
| 98 |
+
flex-direction: column;
|
| 99 |
+
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
|
| 100 |
+
font-size: 15px;
|
| 101 |
+
line-height: 1.5;
|
| 102 |
+
background: var(--bg-primary);
|
| 103 |
+
background-image: var(--bg-gradient);
|
| 104 |
+
color: var(--text-primary);
|
| 105 |
+
transition: background-color 0.2s ease, color 0.2s ease;
|
| 106 |
+
-webkit-font-smoothing: antialiased;
|
| 107 |
+
-moz-osx-font-smoothing: grayscale;
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
/* ===== HEADER ===== */
|
| 111 |
+
header {
|
| 112 |
+
background: var(--bg-secondary);
|
| 113 |
+
border-bottom: 1px solid var(--glass-border);
|
| 114 |
+
padding: 0.75rem 1.25rem;
|
| 115 |
+
transition: background-color 0.2s ease;
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
header nav {
|
| 119 |
+
display: flex;
|
| 120 |
+
justify-content: space-between;
|
| 121 |
+
align-items: center;
|
| 122 |
+
max-width: 1200px;
|
| 123 |
+
margin: 0 auto;
|
| 124 |
+
}
|
| 125 |
+
|
| 126 |
+
header nav ul {
|
| 127 |
+
list-style: none;
|
| 128 |
+
margin: 0;
|
| 129 |
+
padding: 0;
|
| 130 |
+
display: flex;
|
| 131 |
+
align-items: center;
|
| 132 |
+
gap: 1rem;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
header nav a {
|
| 136 |
+
color: var(--text-secondary);
|
| 137 |
+
text-decoration: none;
|
| 138 |
+
font-size: 0.875rem;
|
| 139 |
+
font-weight: 500;
|
| 140 |
+
padding: 0.5rem 0.75rem;
|
| 141 |
+
border-radius: 0.5rem;
|
| 142 |
+
transition: all 0.15s ease;
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
header nav a:hover {
|
| 146 |
+
color: var(--accent-primary);
|
| 147 |
+
background: var(--glass-bg);
|
| 148 |
+
}
|
| 149 |
+
|
| 150 |
+
.logo {
|
| 151 |
+
font-weight: 700;
|
| 152 |
+
font-size: 1.125rem;
|
| 153 |
+
color: var(--accent-primary);
|
| 154 |
+
letter-spacing: -0.01em;
|
| 155 |
+
}
|
| 156 |
+
|
| 157 |
+
/* Theme Toggle Button */
|
| 158 |
+
.theme-toggle {
|
| 159 |
+
background: var(--glass-bg);
|
| 160 |
+
border: 1px solid var(--glass-border);
|
| 161 |
+
border-radius: 0.5rem;
|
| 162 |
+
padding: 0.5rem;
|
| 163 |
+
cursor: pointer;
|
| 164 |
+
transition: all 0.15s ease;
|
| 165 |
+
display: flex;
|
| 166 |
+
align-items: center;
|
| 167 |
+
justify-content: center;
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
.theme-toggle:hover {
|
| 171 |
+
background: var(--bg-tertiary);
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.theme-toggle:focus {
|
| 175 |
+
outline: none;
|
| 176 |
+
box-shadow: var(--focus-ring);
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
.theme-icon {
|
| 180 |
+
font-size: 1rem;
|
| 181 |
+
line-height: 1;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
/* Connection status */
|
| 185 |
+
.status-badge {
|
| 186 |
+
padding: 0.375rem 0.625rem;
|
| 187 |
+
border-radius: 1rem;
|
| 188 |
+
font-size: 0.6875rem;
|
| 189 |
+
font-weight: 600;
|
| 190 |
+
letter-spacing: 0.02em;
|
| 191 |
+
text-transform: uppercase;
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
.status-badge.connected {
|
| 195 |
+
background: rgba(52, 168, 83, 0.12);
|
| 196 |
+
color: #34a853;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.status-badge.disconnected {
|
| 200 |
+
background: rgba(234, 67, 53, 0.12);
|
| 201 |
+
color: #ea4335;
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
.status-badge.connecting {
|
| 205 |
+
background: rgba(251, 188, 4, 0.12);
|
| 206 |
+
color: #f9ab00;
|
| 207 |
+
animation: pulse 2s ease-in-out infinite;
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
@keyframes pulse {
|
| 211 |
+
|
| 212 |
+
0%,
|
| 213 |
+
100% {
|
| 214 |
+
opacity: 1;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
50% {
|
| 218 |
+
opacity: 0.6;
|
| 219 |
+
}
|
| 220 |
+
}
|
| 221 |
+
|
| 222 |
+
/* ===== MAIN CONTENT ===== */
|
| 223 |
+
main {
|
| 224 |
+
flex: 1;
|
| 225 |
+
display: flex;
|
| 226 |
+
flex-direction: column;
|
| 227 |
+
overflow: hidden;
|
| 228 |
+
}
|
| 229 |
+
|
| 230 |
+
/* Chat container */
|
| 231 |
+
.chat-container {
|
| 232 |
+
display: flex;
|
| 233 |
+
flex-direction: column;
|
| 234 |
+
height: calc(100vh - 110px);
|
| 235 |
+
max-width: 800px;
|
| 236 |
+
margin: 0 auto;
|
| 237 |
+
width: 100%;
|
| 238 |
+
padding: 0 1rem;
|
| 239 |
+
}
|
| 240 |
+
|
| 241 |
+
/* Messages area */
|
| 242 |
+
.chat-messages {
|
| 243 |
+
flex: 1;
|
| 244 |
+
overflow-y: auto;
|
| 245 |
+
padding: 1.25rem 0;
|
| 246 |
+
display: flex;
|
| 247 |
+
flex-direction: column;
|
| 248 |
+
gap: 0.875rem;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
/* ===== MESSAGE STYLES ===== */
|
| 252 |
+
.message {
|
| 253 |
+
max-width: 85%;
|
| 254 |
+
padding: 0.875rem 1rem;
|
| 255 |
+
border-radius: 1rem;
|
| 256 |
+
line-height: 1.6;
|
| 257 |
+
font-size: 0.9375rem;
|
| 258 |
+
animation: messageAppear 0.2s ease-out;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
@keyframes messageAppear {
|
| 262 |
+
from {
|
| 263 |
+
opacity: 0;
|
| 264 |
+
transform: translateY(8px);
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
to {
|
| 268 |
+
opacity: 1;
|
| 269 |
+
transform: translateY(0);
|
| 270 |
+
}
|
| 271 |
+
}
|
| 272 |
+
|
| 273 |
+
.user-message {
|
| 274 |
+
background: var(--message-user-bg);
|
| 275 |
+
border: 1px solid var(--message-user-border);
|
| 276 |
+
align-self: flex-end;
|
| 277 |
+
border-bottom-right-radius: 0.25rem;
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
.assistant-message {
|
| 281 |
+
background: var(--message-assistant-bg);
|
| 282 |
+
border: 1px solid var(--message-assistant-border);
|
| 283 |
+
align-self: flex-start;
|
| 284 |
+
border-bottom-left-radius: 0.25rem;
|
| 285 |
+
}
|
| 286 |
+
|
| 287 |
+
[data-theme="light"] .assistant-message {
|
| 288 |
+
box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
|
| 289 |
+
}
|
| 290 |
+
|
| 291 |
+
.system-message {
|
| 292 |
+
background: var(--bg-tertiary);
|
| 293 |
+
border: 1px solid var(--glass-border);
|
| 294 |
+
align-self: center;
|
| 295 |
+
max-width: 90%;
|
| 296 |
+
font-size: 0.875rem;
|
| 297 |
+
border-radius: 0.75rem;
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
.system-message h3 {
|
| 301 |
+
margin: 0 0 0.5rem 0;
|
| 302 |
+
font-size: 0.9375rem;
|
| 303 |
+
font-weight: 600;
|
| 304 |
+
color: var(--accent-primary);
|
| 305 |
+
}
|
| 306 |
+
|
| 307 |
+
.thinking-message {
|
| 308 |
+
background: transparent;
|
| 309 |
+
align-self: flex-start;
|
| 310 |
+
padding: 0.5rem;
|
| 311 |
+
border: none;
|
| 312 |
+
}
|
| 313 |
+
|
| 314 |
+
.error-message {
|
| 315 |
+
background: rgba(234, 67, 53, 0.1);
|
| 316 |
+
border: 1px solid rgba(234, 67, 53, 0.25);
|
| 317 |
+
align-self: center;
|
| 318 |
+
color: #ea4335;
|
| 319 |
+
}
|
| 320 |
+
|
| 321 |
+
.message-header {
|
| 322 |
+
display: flex;
|
| 323 |
+
align-items: center;
|
| 324 |
+
gap: 0.375rem;
|
| 325 |
+
margin-bottom: 0.375rem;
|
| 326 |
+
font-size: 0.75rem;
|
| 327 |
+
color: var(--text-secondary);
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
.avatar-icon {
|
| 331 |
+
width: 20px;
|
| 332 |
+
height: 20px;
|
| 333 |
+
border-radius: 50%;
|
| 334 |
+
object-fit: cover;
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
|
| 341 |
+
.message-role {
|
| 342 |
+
font-weight: 600;
|
| 343 |
+
text-transform: uppercase;
|
| 344 |
+
letter-spacing: 0.03em;
|
| 345 |
+
color: var(--accent-primary);
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
.message-content {
|
| 349 |
+
word-wrap: break-word;
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
.message-content p {
|
| 353 |
+
margin: 0 0 0.625rem 0;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
.message-content p:last-child {
|
| 357 |
+
margin-bottom: 0;
|
| 358 |
+
}
|
| 359 |
+
|
| 360 |
+
.message-content pre {
|
| 361 |
+
margin: 0.625rem 0;
|
| 362 |
+
padding: 0.875rem;
|
| 363 |
+
border-radius: 0.5rem;
|
| 364 |
+
overflow-x: auto;
|
| 365 |
+
background: var(--code-bg);
|
| 366 |
+
border: 1px solid var(--glass-border);
|
| 367 |
+
font-size: 0.8125rem;
|
| 368 |
+
line-height: 1.5;
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
+
.message-content code {
|
| 372 |
+
font-family: 'SF Mono', Monaco, Consolas, 'Liberation Mono', monospace;
|
| 373 |
+
font-size: 0.8125rem;
|
| 374 |
+
}
|
| 375 |
+
|
| 376 |
+
.message-content ul,
|
| 377 |
+
.message-content ol {
|
| 378 |
+
margin: 0.5rem 0;
|
| 379 |
+
padding-left: 1.5rem;
|
| 380 |
+
}
|
| 381 |
+
|
| 382 |
+
/* ===== PLOT DISPLAY ===== */
|
| 383 |
+
.message-plots {
|
| 384 |
+
margin-top: 0.875rem;
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
.plot-figure {
|
| 388 |
+
margin: 0;
|
| 389 |
+
display: block;
|
| 390 |
+
max-width: 100%;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.plot-figure img {
|
| 394 |
+
max-width: 100%;
|
| 395 |
+
width: auto;
|
| 396 |
+
height: auto;
|
| 397 |
+
border-radius: 0.5rem;
|
| 398 |
+
border: 1px solid var(--glass-border);
|
| 399 |
+
cursor: pointer;
|
| 400 |
+
transition: all 0.15s ease;
|
| 401 |
+
}
|
| 402 |
+
|
| 403 |
+
.plot-figure img:hover {
|
| 404 |
+
box-shadow: var(--glow-primary);
|
| 405 |
+
}
|
| 406 |
+
|
| 407 |
+
.plot-actions {
|
| 408 |
+
margin-top: 0.625rem;
|
| 409 |
+
display: flex;
|
| 410 |
+
gap: 0.5rem;
|
| 411 |
+
}
|
| 412 |
+
|
| 413 |
+
.plot-actions button {
|
| 414 |
+
padding: 0.5rem 0.875rem;
|
| 415 |
+
font-size: 0.75rem;
|
| 416 |
+
font-weight: 500;
|
| 417 |
+
border: 1px solid var(--glass-border);
|
| 418 |
+
border-radius: 0.375rem;
|
| 419 |
+
background: var(--bg-tertiary);
|
| 420 |
+
color: var(--text-secondary);
|
| 421 |
+
cursor: pointer;
|
| 422 |
+
transition: all 0.15s ease;
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
.plot-actions button:hover {
|
| 426 |
+
border-color: var(--accent-primary);
|
| 427 |
+
color: var(--accent-primary);
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
/* Plot code display */
|
| 431 |
+
.plot-code {
|
| 432 |
+
margin-top: 0.625rem;
|
| 433 |
+
border-radius: 0.5rem;
|
| 434 |
+
overflow: hidden;
|
| 435 |
+
border: 1px solid var(--glass-border);
|
| 436 |
+
}
|
| 437 |
+
|
| 438 |
+
.plot-code pre {
|
| 439 |
+
margin: 0;
|
| 440 |
+
padding: 0.875rem;
|
| 441 |
+
background: var(--code-bg);
|
| 442 |
+
overflow-x: auto;
|
| 443 |
+
font-size: 0.8125rem;
|
| 444 |
+
line-height: 1.5;
|
| 445 |
+
}
|
| 446 |
+
|
| 447 |
+
.plot-code code {
|
| 448 |
+
font-family: 'SF Mono', Monaco, Consolas, 'Liberation Mono', monospace;
|
| 449 |
+
color: #e6edf3;
|
| 450 |
+
}
|
| 451 |
+
|
| 452 |
+
/* ===== INPUT AREA ===== */
|
| 453 |
+
.chat-input-container {
|
| 454 |
+
padding: 1rem 0;
|
| 455 |
+
border-top: 1px solid var(--glass-border);
|
| 456 |
+
background: var(--bg-primary);
|
| 457 |
+
}
|
| 458 |
+
|
| 459 |
+
.chat-form {
|
| 460 |
+
display: flex;
|
| 461 |
+
gap: 0.5rem;
|
| 462 |
+
align-items: flex-end;
|
| 463 |
+
}
|
| 464 |
+
|
| 465 |
+
.chat-form textarea {
|
| 466 |
+
flex: 1;
|
| 467 |
+
min-height: 2.75rem;
|
| 468 |
+
max-height: 8rem;
|
| 469 |
+
padding: 0.75rem 1rem;
|
| 470 |
+
border: 1px solid var(--glass-border);
|
| 471 |
+
border-radius: 1.5rem;
|
| 472 |
+
font-size: 0.9375rem;
|
| 473 |
+
font-family: inherit;
|
| 474 |
+
resize: none;
|
| 475 |
+
line-height: 1.4;
|
| 476 |
+
background: var(--bg-secondary);
|
| 477 |
+
color: var(--text-primary);
|
| 478 |
+
transition: all 0.15s ease;
|
| 479 |
+
}
|
| 480 |
+
|
| 481 |
+
.chat-form textarea::placeholder {
|
| 482 |
+
color: var(--text-muted);
|
| 483 |
+
}
|
| 484 |
+
|
| 485 |
+
.chat-form textarea:focus {
|
| 486 |
+
outline: none;
|
| 487 |
+
border-color: var(--accent-primary);
|
| 488 |
+
box-shadow: var(--focus-ring);
|
| 489 |
+
}
|
| 490 |
+
|
| 491 |
+
.chat-form button {
|
| 492 |
+
padding: 0.75rem 1.25rem;
|
| 493 |
+
background: var(--accent-primary);
|
| 494 |
+
color: #ffffff;
|
| 495 |
+
border: none;
|
| 496 |
+
border-radius: 1.5rem;
|
| 497 |
+
font-size: 0.875rem;
|
| 498 |
+
font-weight: 600;
|
| 499 |
+
cursor: pointer;
|
| 500 |
+
white-space: nowrap;
|
| 501 |
+
transition: all 0.15s ease;
|
| 502 |
+
}
|
| 503 |
+
|
| 504 |
+
.chat-form button:hover {
|
| 505 |
+
filter: brightness(1.1);
|
| 506 |
+
box-shadow: var(--glow-primary);
|
| 507 |
+
}
|
| 508 |
+
|
| 509 |
+
.chat-form button:focus {
|
| 510 |
+
outline: none;
|
| 511 |
+
box-shadow: var(--focus-ring);
|
| 512 |
+
}
|
| 513 |
+
|
| 514 |
+
.chat-form button:disabled {
|
| 515 |
+
background: var(--text-muted);
|
| 516 |
+
cursor: not-allowed;
|
| 517 |
+
box-shadow: none;
|
| 518 |
+
filter: none;
|
| 519 |
+
}
|
| 520 |
+
|
| 521 |
+
.input-hints {
|
| 522 |
+
margin-top: 0.5rem;
|
| 523 |
+
font-size: 0.75rem;
|
| 524 |
+
color: var(--text-muted);
|
| 525 |
+
}
|
| 526 |
+
|
| 527 |
+
.input-hints kbd {
|
| 528 |
+
background: var(--bg-tertiary);
|
| 529 |
+
padding: 0.125rem 0.375rem;
|
| 530 |
+
border-radius: 0.25rem;
|
| 531 |
+
border: 1px solid var(--glass-border);
|
| 532 |
+
font-size: 0.6875rem;
|
| 533 |
+
font-family: inherit;
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
/* ===== FOOTER ===== */
|
| 537 |
+
footer {
|
| 538 |
+
background: var(--bg-secondary);
|
| 539 |
+
border-top: 1px solid var(--glass-border);
|
| 540 |
+
padding: 0.625rem 1rem;
|
| 541 |
+
text-align: center;
|
| 542 |
+
font-size: 0.75rem;
|
| 543 |
+
color: var(--text-muted);
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
/* ===== MODAL ===== */
|
| 547 |
+
dialog {
|
| 548 |
+
border: none;
|
| 549 |
+
border-radius: 0.75rem;
|
| 550 |
+
padding: 0;
|
| 551 |
+
max-width: 560px;
|
| 552 |
+
width: 90%;
|
| 553 |
+
background: var(--bg-secondary);
|
| 554 |
+
color: var(--text-primary);
|
| 555 |
+
box-shadow: 0 8px 32px rgba(0, 0, 0, 0.2);
|
| 556 |
+
}
|
| 557 |
+
|
| 558 |
+
dialog::backdrop {
|
| 559 |
+
background: rgba(0, 0, 0, 0.5);
|
| 560 |
+
}
|
| 561 |
+
|
| 562 |
+
dialog header {
|
| 563 |
+
display: flex;
|
| 564 |
+
justify-content: space-between;
|
| 565 |
+
align-items: center;
|
| 566 |
+
padding: 1rem 1.25rem;
|
| 567 |
+
border-bottom: 1px solid var(--glass-border);
|
| 568 |
+
}
|
| 569 |
+
|
| 570 |
+
dialog header h3 {
|
| 571 |
+
margin: 0;
|
| 572 |
+
font-size: 1rem;
|
| 573 |
+
font-weight: 600;
|
| 574 |
+
color: var(--text-primary);
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
dialog .close-modal {
|
| 578 |
+
background: none;
|
| 579 |
+
border: none;
|
| 580 |
+
font-size: 1.25rem;
|
| 581 |
+
cursor: pointer;
|
| 582 |
+
color: var(--text-secondary);
|
| 583 |
+
padding: 0.25rem;
|
| 584 |
+
line-height: 1;
|
| 585 |
+
border-radius: 0.25rem;
|
| 586 |
+
transition: all 0.15s ease;
|
| 587 |
+
}
|
| 588 |
+
|
| 589 |
+
dialog .close-modal:hover {
|
| 590 |
+
background: var(--bg-tertiary);
|
| 591 |
+
color: var(--text-primary);
|
| 592 |
+
}
|
| 593 |
+
|
| 594 |
+
#cache-content {
|
| 595 |
+
padding: 1rem 1.25rem;
|
| 596 |
+
max-height: 400px;
|
| 597 |
+
overflow-y: auto;
|
| 598 |
+
}
|
| 599 |
+
|
| 600 |
+
#cache-content table {
|
| 601 |
+
width: 100%;
|
| 602 |
+
border-collapse: collapse;
|
| 603 |
+
font-size: 0.8125rem;
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
#cache-content th,
|
| 607 |
+
#cache-content td {
|
| 608 |
+
padding: 0.625rem 0.5rem;
|
| 609 |
+
text-align: left;
|
| 610 |
+
border-bottom: 1px solid var(--glass-border);
|
| 611 |
+
}
|
| 612 |
+
|
| 613 |
+
#cache-content th {
|
| 614 |
+
font-weight: 600;
|
| 615 |
+
color: var(--text-secondary);
|
| 616 |
+
font-size: 0.75rem;
|
| 617 |
+
text-transform: uppercase;
|
| 618 |
+
letter-spacing: 0.03em;
|
| 619 |
+
}
|
| 620 |
+
|
| 621 |
+
/* ===== TYPING INDICATOR ===== */
|
| 622 |
+
.typing-indicator {
|
| 623 |
+
display: flex;
|
| 624 |
+
gap: 0.25rem;
|
| 625 |
+
padding: 0.5rem;
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
.typing-indicator span {
|
| 629 |
+
width: 6px;
|
| 630 |
+
height: 6px;
|
| 631 |
+
background: var(--text-muted);
|
| 632 |
+
border-radius: 50%;
|
| 633 |
+
animation: typing 1.2s infinite ease-in-out;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
.typing-indicator span:nth-child(1) {
|
| 637 |
+
animation-delay: 0s;
|
| 638 |
+
}
|
| 639 |
+
|
| 640 |
+
.typing-indicator span:nth-child(2) {
|
| 641 |
+
animation-delay: 0.15s;
|
| 642 |
+
}
|
| 643 |
+
|
| 644 |
+
.typing-indicator span:nth-child(3) {
|
| 645 |
+
animation-delay: 0.3s;
|
| 646 |
+
}
|
| 647 |
+
|
| 648 |
+
@keyframes typing {
|
| 649 |
+
|
| 650 |
+
0%,
|
| 651 |
+
60%,
|
| 652 |
+
100% {
|
| 653 |
+
transform: translateY(0);
|
| 654 |
+
opacity: 0.4;
|
| 655 |
+
}
|
| 656 |
+
|
| 657 |
+
30% {
|
| 658 |
+
transform: translateY(-4px);
|
| 659 |
+
opacity: 1;
|
| 660 |
+
}
|
| 661 |
+
}
|
| 662 |
+
|
| 663 |
+
/* ===== STATUS INDICATOR ===== */
|
| 664 |
+
.status-indicator {
|
| 665 |
+
display: flex;
|
| 666 |
+
align-items: center;
|
| 667 |
+
gap: 0.625rem;
|
| 668 |
+
padding: 0.625rem 0.875rem;
|
| 669 |
+
background: var(--bg-tertiary);
|
| 670 |
+
border: 1px solid var(--glass-border);
|
| 671 |
+
border-radius: 0.5rem;
|
| 672 |
+
font-size: 0.8125rem;
|
| 673 |
+
color: var(--text-primary);
|
| 674 |
+
animation: statusAppear 0.2s ease-out;
|
| 675 |
+
}
|
| 676 |
+
|
| 677 |
+
.status-spinner {
|
| 678 |
+
width: 14px;
|
| 679 |
+
height: 14px;
|
| 680 |
+
border: 2px solid var(--glass-border);
|
| 681 |
+
border-top-color: var(--accent-primary);
|
| 682 |
+
border-radius: 50%;
|
| 683 |
+
animation: spin 0.8s linear infinite;
|
| 684 |
+
}
|
| 685 |
+
|
| 686 |
+
.status-text {
|
| 687 |
+
font-weight: 500;
|
| 688 |
+
}
|
| 689 |
+
|
| 690 |
+
@keyframes spin {
|
| 691 |
+
to {
|
| 692 |
+
transform: rotate(360deg);
|
| 693 |
+
}
|
| 694 |
+
}
|
| 695 |
+
|
| 696 |
+
@keyframes statusAppear {
|
| 697 |
+
from {
|
| 698 |
+
opacity: 0;
|
| 699 |
+
}
|
| 700 |
+
|
| 701 |
+
to {
|
| 702 |
+
opacity: 1;
|
| 703 |
+
}
|
| 704 |
+
}
|
| 705 |
+
|
| 706 |
+
/* ===== SCROLLBAR ===== */
|
| 707 |
+
::-webkit-scrollbar {
|
| 708 |
+
width: 8px;
|
| 709 |
+
height: 8px;
|
| 710 |
+
}
|
| 711 |
+
|
| 712 |
+
::-webkit-scrollbar-track {
|
| 713 |
+
background: transparent;
|
| 714 |
+
}
|
| 715 |
+
|
| 716 |
+
::-webkit-scrollbar-thumb {
|
| 717 |
+
background: var(--text-muted);
|
| 718 |
+
border-radius: 4px;
|
| 719 |
+
border: 2px solid var(--bg-primary);
|
| 720 |
+
}
|
| 721 |
+
|
| 722 |
+
::-webkit-scrollbar-thumb:hover {
|
| 723 |
+
background: var(--text-secondary);
|
| 724 |
+
}
|
| 725 |
+
|
| 726 |
+
/* ===== RESPONSIVE ===== */
|
| 727 |
+
@media (max-width: 640px) {
|
| 728 |
+
.chat-container {
|
| 729 |
+
padding: 0 0.75rem;
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
.message {
|
| 733 |
+
max-width: 92%;
|
| 734 |
+
}
|
| 735 |
+
|
| 736 |
+
.chat-form button {
|
| 737 |
+
padding: 0.75rem 1rem;
|
| 738 |
+
}
|
| 739 |
+
|
| 740 |
+
header nav ul {
|
| 741 |
+
gap: 0.5rem;
|
| 742 |
+
}
|
| 743 |
+
}
|
| 744 |
+
|
| 745 |
+
/* ===== ACCESSIBILITY ===== */
|
| 746 |
+
@media (prefers-reduced-motion: reduce) {
|
| 747 |
+
|
| 748 |
+
*,
|
| 749 |
+
*::before,
|
| 750 |
+
*::after {
|
| 751 |
+
animation-duration: 0.01ms !important;
|
| 752 |
+
transition-duration: 0.01ms !important;
|
| 753 |
+
}
|
| 754 |
+
}
|
| 755 |
+
|
| 756 |
+
/* Focus visible for keyboard users */
|
| 757 |
+
:focus-visible {
|
| 758 |
+
outline: none;
|
| 759 |
+
box-shadow: var(--focus-ring);
|
| 760 |
+
}
|
| 761 |
+
|
| 762 |
+
/* ===== SELECTION ===== */
|
| 763 |
+
::selection {
|
| 764 |
+
background: rgba(29, 155, 240, 0.25);
|
| 765 |
+
}
|
| 766 |
+
|
| 767 |
+
[data-theme="light"] ::selection {
|
| 768 |
+
background: rgba(26, 115, 232, 0.2);
|
| 769 |
+
}
|
| 770 |
+
|
| 771 |
+
/* ===== API KEYS PANEL ===== */
|
| 772 |
+
.api-keys-panel {
|
| 773 |
+
margin: 0 auto 16px;
|
| 774 |
+
max-width: 480px;
|
| 775 |
+
background: var(--bg-tertiary);
|
| 776 |
+
border: 1px solid var(--glass-border);
|
| 777 |
+
border-radius: 12px;
|
| 778 |
+
overflow: hidden;
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
.api-keys-header {
|
| 782 |
+
padding: 12px 16px;
|
| 783 |
+
font-weight: 600;
|
| 784 |
+
color: var(--text-primary);
|
| 785 |
+
background: var(--glass-bg);
|
| 786 |
+
border-bottom: 1px solid var(--glass-border);
|
| 787 |
+
}
|
| 788 |
+
|
| 789 |
+
.api-keys-body {
|
| 790 |
+
padding: 16px;
|
| 791 |
+
}
|
| 792 |
+
|
| 793 |
+
.api-keys-note {
|
| 794 |
+
font-size: 13px;
|
| 795 |
+
color: var(--text-secondary);
|
| 796 |
+
margin-bottom: 12px;
|
| 797 |
+
line-height: 1.4;
|
| 798 |
+
}
|
| 799 |
+
|
| 800 |
+
.api-key-field {
|
| 801 |
+
margin-bottom: 12px;
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
+
.api-key-field label {
|
| 805 |
+
display: block;
|
| 806 |
+
font-size: 13px;
|
| 807 |
+
font-weight: 500;
|
| 808 |
+
color: var(--text-secondary);
|
| 809 |
+
margin-bottom: 4px;
|
| 810 |
+
}
|
| 811 |
+
|
| 812 |
+
.api-key-field .required {
|
| 813 |
+
color: #ef4444;
|
| 814 |
+
}
|
| 815 |
+
|
| 816 |
+
.api-key-field input {
|
| 817 |
+
width: 100%;
|
| 818 |
+
padding: 8px 12px;
|
| 819 |
+
background: var(--bg-primary);
|
| 820 |
+
border: 1px solid var(--glass-border);
|
| 821 |
+
border-radius: 6px;
|
| 822 |
+
color: var(--text-primary);
|
| 823 |
+
font-family: monospace;
|
| 824 |
+
font-size: 13px;
|
| 825 |
+
box-sizing: border-box;
|
| 826 |
+
}
|
| 827 |
+
|
| 828 |
+
.api-key-field input:focus {
|
| 829 |
+
outline: none;
|
| 830 |
+
border-color: var(--accent-primary);
|
| 831 |
+
box-shadow: var(--focus-ring);
|
| 832 |
+
}
|
| 833 |
+
|
| 834 |
+
.save-keys-btn {
|
| 835 |
+
width: 100%;
|
| 836 |
+
padding: 10px;
|
| 837 |
+
background: var(--accent-primary);
|
| 838 |
+
color: #fff;
|
| 839 |
+
border: none;
|
| 840 |
+
border-radius: 6px;
|
| 841 |
+
font-size: 14px;
|
| 842 |
+
font-weight: 600;
|
| 843 |
+
cursor: pointer;
|
| 844 |
+
margin-top: 4px;
|
| 845 |
+
}
|
| 846 |
+
|
| 847 |
+
.save-keys-btn:hover {
|
| 848 |
+
opacity: 0.9;
|
| 849 |
+
}
|
| 850 |
+
|
| 851 |
+
.save-keys-btn:disabled {
|
| 852 |
+
opacity: 0.5;
|
| 853 |
+
cursor: not-allowed;
|
| 854 |
+
}
|
web/static/eurus_avatar.png
ADDED
|
|
web/static/favicon.jpeg
ADDED
|
|
web/static/js/chat.js
ADDED
|
@@ -0,0 +1,734 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
/**
|
| 2 |
+
* Eurus Chat WebSocket Client
|
| 3 |
+
*/
|
| 4 |
+
|
| 5 |
+
class EurusChat {
|
| 6 |
+
constructor() {
|
| 7 |
+
this.ws = null;
|
| 8 |
+
this.messageId = 0;
|
| 9 |
+
this.currentAssistantMessage = null;
|
| 10 |
+
this.isConnected = false;
|
| 11 |
+
this.keysConfigured = false;
|
| 12 |
+
this.serverKeysPresent = { openai: false, arraylake: false };
|
| 13 |
+
this.reconnectAttempts = 0;
|
| 14 |
+
this.maxReconnectAttempts = 5;
|
| 15 |
+
this.reconnectDelay = 1000;
|
| 16 |
+
|
| 17 |
+
this.messagesContainer = document.getElementById('chat-messages');
|
| 18 |
+
this.messageInput = document.getElementById('message-input');
|
| 19 |
+
this.chatForm = document.getElementById('chat-form');
|
| 20 |
+
this.sendBtn = document.getElementById('send-btn');
|
| 21 |
+
this.connectionStatus = document.getElementById('connection-status');
|
| 22 |
+
this.clearBtn = document.getElementById('clear-btn');
|
| 23 |
+
this.cacheBtn = document.getElementById('cache-btn');
|
| 24 |
+
this.cacheModal = document.getElementById('cache-modal');
|
| 25 |
+
this.apiKeysPanel = document.getElementById('api-keys-panel');
|
| 26 |
+
this.saveKeysBtn = document.getElementById('save-keys-btn');
|
| 27 |
+
this.openaiKeyInput = document.getElementById('openai-key');
|
| 28 |
+
this.arraylakeKeyInput = document.getElementById('arraylake-key');
|
| 29 |
+
|
| 30 |
+
marked.setOptions({
|
| 31 |
+
highlight: (code, lang) => {
|
| 32 |
+
if (lang && hljs.getLanguage(lang)) {
|
| 33 |
+
return hljs.highlight(code, { language: lang }).value;
|
| 34 |
+
}
|
| 35 |
+
return hljs.highlightAuto(code).value;
|
| 36 |
+
},
|
| 37 |
+
breaks: true,
|
| 38 |
+
gfm: true
|
| 39 |
+
});
|
| 40 |
+
|
| 41 |
+
this.themeToggle = document.getElementById('theme-toggle');
|
| 42 |
+
this.init();
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
init() {
|
| 46 |
+
this.checkKeysStatus();
|
| 47 |
+
this.connect();
|
| 48 |
+
this.setupEventListeners();
|
| 49 |
+
this.setupImageModal();
|
| 50 |
+
this.setupTheme();
|
| 51 |
+
this.setupKeysPanel();
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
async checkKeysStatus() {
|
| 55 |
+
try {
|
| 56 |
+
const resp = await fetch('/api/keys-status');
|
| 57 |
+
const data = await resp.json();
|
| 58 |
+
this.serverKeysPresent = data;
|
| 59 |
+
|
| 60 |
+
if (data.openai) {
|
| 61 |
+
// Keys pre-configured on server — hide the panel
|
| 62 |
+
this.apiKeysPanel.style.display = 'none';
|
| 63 |
+
this.keysConfigured = true;
|
| 64 |
+
} else {
|
| 65 |
+
// No server keys — check localStorage for saved keys
|
| 66 |
+
const savedOpenai = localStorage.getItem('eurus-openai-key');
|
| 67 |
+
const savedArraylake = localStorage.getItem('eurus-arraylake-key');
|
| 68 |
+
if (savedOpenai) {
|
| 69 |
+
this.openaiKeyInput.value = savedOpenai;
|
| 70 |
+
}
|
| 71 |
+
if (savedArraylake) {
|
| 72 |
+
this.arraylakeKeyInput.value = savedArraylake;
|
| 73 |
+
}
|
| 74 |
+
this.apiKeysPanel.style.display = 'block';
|
| 75 |
+
this.keysConfigured = false;
|
| 76 |
+
}
|
| 77 |
+
} catch (e) {
|
| 78 |
+
// Can't reach server yet, show panel
|
| 79 |
+
this.apiKeysPanel.style.display = 'block';
|
| 80 |
+
}
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
setupKeysPanel() {
|
| 84 |
+
this.saveKeysBtn.addEventListener('click', () => this.saveAndSendKeys());
|
| 85 |
+
|
| 86 |
+
// Allow Enter in key fields to submit
|
| 87 |
+
[this.openaiKeyInput, this.arraylakeKeyInput].forEach(input => {
|
| 88 |
+
input.addEventListener('keydown', (e) => {
|
| 89 |
+
if (e.key === 'Enter') {
|
| 90 |
+
e.preventDefault();
|
| 91 |
+
this.saveAndSendKeys();
|
| 92 |
+
}
|
| 93 |
+
});
|
| 94 |
+
});
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
saveAndSendKeys() {
|
| 98 |
+
const openaiKey = this.openaiKeyInput.value.trim();
|
| 99 |
+
const arraylakeKey = this.arraylakeKeyInput.value.trim();
|
| 100 |
+
|
| 101 |
+
if (!openaiKey) {
|
| 102 |
+
this.openaiKeyInput.focus();
|
| 103 |
+
return;
|
| 104 |
+
}
|
| 105 |
+
|
| 106 |
+
// Save to localStorage (client-side only)
|
| 107 |
+
localStorage.setItem('eurus-openai-key', openaiKey);
|
| 108 |
+
if (arraylakeKey) {
|
| 109 |
+
localStorage.setItem('eurus-arraylake-key', arraylakeKey);
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
// Send keys via WebSocket
|
| 113 |
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
| 114 |
+
this.saveKeysBtn.disabled = true;
|
| 115 |
+
this.saveKeysBtn.textContent = 'Connecting...';
|
| 116 |
+
this.ws.send(JSON.stringify({
|
| 117 |
+
type: 'configure_keys',
|
| 118 |
+
openai_api_key: openaiKey,
|
| 119 |
+
arraylake_api_key: arraylakeKey,
|
| 120 |
+
}));
|
| 121 |
+
}
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
setupTheme() {
|
| 125 |
+
// Load saved theme or default to dark (neosynth)
|
| 126 |
+
const savedTheme = localStorage.getItem('eurus-theme') || 'dark';
|
| 127 |
+
document.documentElement.setAttribute('data-theme', savedTheme);
|
| 128 |
+
this.updateThemeIcon(savedTheme);
|
| 129 |
+
|
| 130 |
+
// Theme toggle click handler
|
| 131 |
+
if (this.themeToggle) {
|
| 132 |
+
this.themeToggle.addEventListener('click', () => {
|
| 133 |
+
const currentTheme = document.documentElement.getAttribute('data-theme');
|
| 134 |
+
const newTheme = currentTheme === 'dark' ? 'light' : 'dark';
|
| 135 |
+
document.documentElement.setAttribute('data-theme', newTheme);
|
| 136 |
+
localStorage.setItem('eurus-theme', newTheme);
|
| 137 |
+
this.updateThemeIcon(newTheme);
|
| 138 |
+
});
|
| 139 |
+
}
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
updateThemeIcon(theme) {
|
| 143 |
+
if (this.themeToggle) {
|
| 144 |
+
const icon = this.themeToggle.querySelector('.theme-icon');
|
| 145 |
+
if (icon) {
|
| 146 |
+
icon.textContent = theme === 'dark' ? '☀️' : '🌙';
|
| 147 |
+
}
|
| 148 |
+
}
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
connect() {
|
| 152 |
+
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
|
| 153 |
+
const wsUrl = `${protocol}//${window.location.host}/ws/chat`;
|
| 154 |
+
|
| 155 |
+
this.updateConnectionStatus('connecting');
|
| 156 |
+
|
| 157 |
+
try {
|
| 158 |
+
this.ws = new WebSocket(wsUrl);
|
| 159 |
+
|
| 160 |
+
this.ws.onopen = () => {
|
| 161 |
+
this.isConnected = true;
|
| 162 |
+
this.reconnectAttempts = 0;
|
| 163 |
+
this.updateConnectionStatus('connected');
|
| 164 |
+
|
| 165 |
+
// If server has no keys, auto-send saved keys from localStorage
|
| 166 |
+
if (!this.serverKeysPresent.openai) {
|
| 167 |
+
const savedOpenai = localStorage.getItem('eurus-openai-key');
|
| 168 |
+
if (savedOpenai) {
|
| 169 |
+
const savedArraylake = localStorage.getItem('eurus-arraylake-key') || '';
|
| 170 |
+
this.ws.send(JSON.stringify({
|
| 171 |
+
type: 'configure_keys',
|
| 172 |
+
openai_api_key: savedOpenai,
|
| 173 |
+
arraylake_api_key: savedArraylake,
|
| 174 |
+
}));
|
| 175 |
+
}
|
| 176 |
+
} else {
|
| 177 |
+
this.sendBtn.disabled = false;
|
| 178 |
+
}
|
| 179 |
+
};
|
| 180 |
+
|
| 181 |
+
this.ws.onclose = () => {
|
| 182 |
+
this.isConnected = false;
|
| 183 |
+
this.updateConnectionStatus('disconnected');
|
| 184 |
+
this.sendBtn.disabled = true;
|
| 185 |
+
this.attemptReconnect();
|
| 186 |
+
};
|
| 187 |
+
|
| 188 |
+
this.ws.onerror = () => {
|
| 189 |
+
this.updateConnectionStatus('disconnected');
|
| 190 |
+
};
|
| 191 |
+
|
| 192 |
+
this.ws.onmessage = (event) => {
|
| 193 |
+
this.handleMessage(JSON.parse(event.data));
|
| 194 |
+
};
|
| 195 |
+
|
| 196 |
+
} catch (error) {
|
| 197 |
+
this.updateConnectionStatus('disconnected');
|
| 198 |
+
}
|
| 199 |
+
}
|
| 200 |
+
|
| 201 |
+
attemptReconnect() {
|
| 202 |
+
if (this.reconnectAttempts >= this.maxReconnectAttempts) return;
|
| 203 |
+
|
| 204 |
+
this.reconnectAttempts++;
|
| 205 |
+
const delay = this.reconnectDelay * Math.pow(2, this.reconnectAttempts - 1);
|
| 206 |
+
|
| 207 |
+
this.updateConnectionStatus('connecting');
|
| 208 |
+
setTimeout(() => this.connect(), delay);
|
| 209 |
+
}
|
| 210 |
+
|
| 211 |
+
updateConnectionStatus(status) {
|
| 212 |
+
this.connectionStatus.className = 'status-badge ' + status;
|
| 213 |
+
const text = { connected: 'Connected', disconnected: 'Disconnected', connecting: 'Connecting...' };
|
| 214 |
+
this.connectionStatus.textContent = text[status] || status;
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
setupEventListeners() {
|
| 218 |
+
this.chatForm.addEventListener('submit', (e) => {
|
| 219 |
+
e.preventDefault();
|
| 220 |
+
this.sendMessage();
|
| 221 |
+
});
|
| 222 |
+
|
| 223 |
+
this.messageInput.addEventListener('keydown', (e) => {
|
| 224 |
+
if (e.key === 'Enter' && !e.shiftKey) {
|
| 225 |
+
e.preventDefault();
|
| 226 |
+
this.sendMessage();
|
| 227 |
+
}
|
| 228 |
+
});
|
| 229 |
+
|
| 230 |
+
this.messageInput.addEventListener('input', () => {
|
| 231 |
+
this.messageInput.style.height = 'auto';
|
| 232 |
+
this.messageInput.style.height = Math.min(this.messageInput.scrollHeight, 150) + 'px';
|
| 233 |
+
});
|
| 234 |
+
|
| 235 |
+
this.clearBtn.addEventListener('click', (e) => {
|
| 236 |
+
e.preventDefault();
|
| 237 |
+
this.clearChat();
|
| 238 |
+
});
|
| 239 |
+
|
| 240 |
+
this.cacheBtn.addEventListener('click', (e) => {
|
| 241 |
+
e.preventDefault();
|
| 242 |
+
this.showCacheModal();
|
| 243 |
+
});
|
| 244 |
+
|
| 245 |
+
this.cacheModal.querySelector('.close-modal').addEventListener('click', () => {
|
| 246 |
+
this.cacheModal.close();
|
| 247 |
+
});
|
| 248 |
+
}
|
| 249 |
+
|
| 250 |
+
setupImageModal() {
|
| 251 |
+
// Create modal for enlarged images
|
| 252 |
+
const modal = document.createElement('div');
|
| 253 |
+
modal.id = 'image-modal';
|
| 254 |
+
modal.innerHTML = `
|
| 255 |
+
<div class="image-modal-backdrop"></div>
|
| 256 |
+
<div class="image-modal-content">
|
| 257 |
+
<img alt="Enlarged plot">
|
| 258 |
+
<div class="image-modal-actions">
|
| 259 |
+
<button class="download-btn">Download</button>
|
| 260 |
+
<button class="close-btn">Close</button>
|
| 261 |
+
</div>
|
| 262 |
+
</div>
|
| 263 |
+
`;
|
| 264 |
+
document.body.appendChild(modal);
|
| 265 |
+
|
| 266 |
+
// Add modal styles
|
| 267 |
+
const style = document.createElement('style');
|
| 268 |
+
style.textContent = `
|
| 269 |
+
#image-modal {
|
| 270 |
+
display: none;
|
| 271 |
+
position: fixed;
|
| 272 |
+
top: 0;
|
| 273 |
+
left: 0;
|
| 274 |
+
width: 100%;
|
| 275 |
+
height: 100%;
|
| 276 |
+
z-index: 1000;
|
| 277 |
+
}
|
| 278 |
+
#image-modal.active {
|
| 279 |
+
display: flex;
|
| 280 |
+
align-items: center;
|
| 281 |
+
justify-content: center;
|
| 282 |
+
}
|
| 283 |
+
.image-modal-backdrop {
|
| 284 |
+
position: absolute;
|
| 285 |
+
top: 0;
|
| 286 |
+
left: 0;
|
| 287 |
+
width: 100%;
|
| 288 |
+
height: 100%;
|
| 289 |
+
background: rgba(0,0,0,0.8);
|
| 290 |
+
}
|
| 291 |
+
.image-modal-content {
|
| 292 |
+
position: relative;
|
| 293 |
+
max-width: 90%;
|
| 294 |
+
max-height: 90%;
|
| 295 |
+
display: flex;
|
| 296 |
+
flex-direction: column;
|
| 297 |
+
align-items: center;
|
| 298 |
+
}
|
| 299 |
+
.image-modal-content img {
|
| 300 |
+
max-width: 100%;
|
| 301 |
+
max-height: calc(90vh - 60px);
|
| 302 |
+
border-radius: 4px;
|
| 303 |
+
}
|
| 304 |
+
.image-modal-actions {
|
| 305 |
+
margin-top: 12px;
|
| 306 |
+
display: flex;
|
| 307 |
+
gap: 8px;
|
| 308 |
+
}
|
| 309 |
+
.image-modal-actions button {
|
| 310 |
+
padding: 8px 16px;
|
| 311 |
+
border: none;
|
| 312 |
+
border-radius: 4px;
|
| 313 |
+
cursor: pointer;
|
| 314 |
+
font-size: 14px;
|
| 315 |
+
}
|
| 316 |
+
.image-modal-actions .download-btn {
|
| 317 |
+
background: #1976d2;
|
| 318 |
+
color: white;
|
| 319 |
+
}
|
| 320 |
+
.image-modal-actions .close-btn {
|
| 321 |
+
background: #757575;
|
| 322 |
+
color: white;
|
| 323 |
+
}
|
| 324 |
+
`;
|
| 325 |
+
document.head.appendChild(style);
|
| 326 |
+
|
| 327 |
+
// Event listeners
|
| 328 |
+
modal.querySelector('.image-modal-backdrop').addEventListener('click', () => {
|
| 329 |
+
modal.classList.remove('active');
|
| 330 |
+
});
|
| 331 |
+
|
| 332 |
+
modal.querySelector('.close-btn').addEventListener('click', () => {
|
| 333 |
+
modal.classList.remove('active');
|
| 334 |
+
});
|
| 335 |
+
|
| 336 |
+
modal.querySelector('.download-btn').addEventListener('click', () => {
|
| 337 |
+
const img = modal.querySelector('img');
|
| 338 |
+
const link = document.createElement('a');
|
| 339 |
+
link.href = img.src;
|
| 340 |
+
link.download = 'eurus_plot.png';
|
| 341 |
+
link.click();
|
| 342 |
+
});
|
| 343 |
+
|
| 344 |
+
document.addEventListener('keydown', (e) => {
|
| 345 |
+
if (e.key === 'Escape' && modal.classList.contains('active')) {
|
| 346 |
+
modal.classList.remove('active');
|
| 347 |
+
}
|
| 348 |
+
});
|
| 349 |
+
|
| 350 |
+
this.imageModal = modal;
|
| 351 |
+
}
|
| 352 |
+
|
| 353 |
+
showImageModal(src) {
|
| 354 |
+
this.imageModal.querySelector('img').src = src;
|
| 355 |
+
this.imageModal.classList.add('active');
|
| 356 |
+
}
|
| 357 |
+
|
| 358 |
+
sendMessage() {
|
| 359 |
+
const message = this.messageInput.value.trim();
|
| 360 |
+
if (!message || !this.isConnected) return;
|
| 361 |
+
|
| 362 |
+
this.addUserMessage(message);
|
| 363 |
+
this.ws.send(JSON.stringify({ message }));
|
| 364 |
+
|
| 365 |
+
this.messageInput.value = '';
|
| 366 |
+
this.messageInput.style.height = 'auto';
|
| 367 |
+
this.sendBtn.disabled = true;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
handleMessage(data) {
|
| 371 |
+
switch (data.type) {
|
| 372 |
+
case 'keys_configured':
|
| 373 |
+
this.keysConfigured = data.ready;
|
| 374 |
+
if (data.ready) {
|
| 375 |
+
this.apiKeysPanel.style.display = 'none';
|
| 376 |
+
this.sendBtn.disabled = false;
|
| 377 |
+
} else {
|
| 378 |
+
this.saveKeysBtn.disabled = false;
|
| 379 |
+
this.saveKeysBtn.textContent = 'Connect';
|
| 380 |
+
this.showError('Failed to initialize agent. Check your API keys.');
|
| 381 |
+
}
|
| 382 |
+
break;
|
| 383 |
+
|
| 384 |
+
case 'thinking':
|
| 385 |
+
this.showThinkingIndicator();
|
| 386 |
+
break;
|
| 387 |
+
|
| 388 |
+
case 'status':
|
| 389 |
+
this.updateStatusIndicator(data.content);
|
| 390 |
+
break;
|
| 391 |
+
|
| 392 |
+
|
| 393 |
+
case 'chunk':
|
| 394 |
+
this.appendToAssistantMessage(data.content);
|
| 395 |
+
break;
|
| 396 |
+
|
| 397 |
+
case 'plot':
|
| 398 |
+
this.addPlot(data.data, data.path, data.code || '');
|
| 399 |
+
break;
|
| 400 |
+
|
| 401 |
+
case 'video':
|
| 402 |
+
console.log('[WS] Video message received:', data);
|
| 403 |
+
this.addVideo(data.data, data.path, data.mimetype || 'video/mp4');
|
| 404 |
+
break;
|
| 405 |
+
|
| 406 |
+
case 'complete':
|
| 407 |
+
this.finalizeAssistantMessage(data.content);
|
| 408 |
+
this.sendBtn.disabled = false;
|
| 409 |
+
break;
|
| 410 |
+
|
| 411 |
+
case 'error':
|
| 412 |
+
this.showError(data.content);
|
| 413 |
+
this.sendBtn.disabled = false;
|
| 414 |
+
break;
|
| 415 |
+
|
| 416 |
+
case 'clear':
|
| 417 |
+
this.clearMessagesUI();
|
| 418 |
+
break;
|
| 419 |
+
}
|
| 420 |
+
}
|
| 421 |
+
|
| 422 |
+
addUserMessage(content) {
|
| 423 |
+
const div = document.createElement('div');
|
| 424 |
+
div.className = 'message user-message';
|
| 425 |
+
div.innerHTML = `
|
| 426 |
+
<div class="message-header">
|
| 427 |
+
<span class="message-role">You</span>
|
| 428 |
+
</div>
|
| 429 |
+
<div class="message-content">${this.escapeHtml(content)}</div>
|
| 430 |
+
`;
|
| 431 |
+
this.messagesContainer.appendChild(div);
|
| 432 |
+
this.scrollToBottom();
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
showThinkingIndicator() {
|
| 436 |
+
this.removeThinkingIndicator();
|
| 437 |
+
|
| 438 |
+
const div = document.createElement('div');
|
| 439 |
+
div.className = 'message thinking-message';
|
| 440 |
+
div.id = 'thinking-indicator';
|
| 441 |
+
div.innerHTML = `
|
| 442 |
+
<div class="typing-indicator">
|
| 443 |
+
<span></span><span></span><span></span>
|
| 444 |
+
</div>
|
| 445 |
+
`;
|
| 446 |
+
this.messagesContainer.appendChild(div);
|
| 447 |
+
this.scrollToBottom();
|
| 448 |
+
}
|
| 449 |
+
|
| 450 |
+
removeThinkingIndicator() {
|
| 451 |
+
const indicator = document.getElementById('thinking-indicator');
|
| 452 |
+
if (indicator) indicator.remove();
|
| 453 |
+
}
|
| 454 |
+
|
| 455 |
+
updateStatusIndicator(statusText) {
|
| 456 |
+
// Replace thinking dots with status message
|
| 457 |
+
let indicator = document.getElementById('thinking-indicator');
|
| 458 |
+
|
| 459 |
+
if (!indicator) {
|
| 460 |
+
indicator = document.createElement('div');
|
| 461 |
+
indicator.className = 'message thinking-message';
|
| 462 |
+
indicator.id = 'thinking-indicator';
|
| 463 |
+
this.messagesContainer.appendChild(indicator);
|
| 464 |
+
}
|
| 465 |
+
|
| 466 |
+
indicator.innerHTML = `
|
| 467 |
+
<div class="status-indicator">
|
| 468 |
+
<span class="status-spinner"></span>
|
| 469 |
+
<span class="status-text">${this.escapeHtml(statusText)}</span>
|
| 470 |
+
</div>
|
| 471 |
+
`;
|
| 472 |
+
this.scrollToBottom();
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
appendToAssistantMessage(content) {
|
| 476 |
+
this.removeThinkingIndicator();
|
| 477 |
+
|
| 478 |
+
if (!this.currentAssistantMessage) {
|
| 479 |
+
this.currentAssistantMessage = document.createElement('div');
|
| 480 |
+
this.currentAssistantMessage.className = 'message assistant-message';
|
| 481 |
+
this.currentAssistantMessage.innerHTML = `
|
| 482 |
+
<div class="message-header">
|
| 483 |
+
<img src="/static/favicon.jpeg" class="avatar-icon" alt="">
|
| 484 |
+
<span class="message-role">Eurus</span>
|
| 485 |
+
</div>
|
| 486 |
+
<div class="message-content markdown-content"></div>
|
| 487 |
+
<div class="message-plots"></div>
|
| 488 |
+
`;
|
| 489 |
+
this.messagesContainer.appendChild(this.currentAssistantMessage);
|
| 490 |
+
}
|
| 491 |
+
|
| 492 |
+
const contentDiv = this.currentAssistantMessage.querySelector('.message-content');
|
| 493 |
+
const raw = (contentDiv.getAttribute('data-raw') || '') + content;
|
| 494 |
+
contentDiv.setAttribute('data-raw', raw);
|
| 495 |
+
contentDiv.innerHTML = marked.parse(raw);
|
| 496 |
+
|
| 497 |
+
contentDiv.querySelectorAll('pre code').forEach(block => hljs.highlightElement(block));
|
| 498 |
+
this.scrollToBottom();
|
| 499 |
+
}
|
| 500 |
+
|
| 501 |
+
addPlot(base64Data, path, code = '') {
|
| 502 |
+
this.removeThinkingIndicator();
|
| 503 |
+
|
| 504 |
+
if (!this.currentAssistantMessage) {
|
| 505 |
+
this.appendToAssistantMessage('');
|
| 506 |
+
}
|
| 507 |
+
|
| 508 |
+
const plotsDiv = this.currentAssistantMessage.querySelector('.message-plots');
|
| 509 |
+
|
| 510 |
+
const figure = document.createElement('figure');
|
| 511 |
+
figure.className = 'plot-figure';
|
| 512 |
+
|
| 513 |
+
const imgSrc = `data:image/png;base64,${base64Data}`;
|
| 514 |
+
const codeId = `code-${Date.now()}-${Math.random().toString(36).substr(2, 9)}`;
|
| 515 |
+
|
| 516 |
+
figure.innerHTML = `
|
| 517 |
+
<img src="${imgSrc}" alt="Generated plot">
|
| 518 |
+
<div class="plot-actions">
|
| 519 |
+
<button class="enlarge-btn" title="Enlarge">Enlarge</button>
|
| 520 |
+
<button class="download-btn" title="Download">Download</button>
|
| 521 |
+
${code && code.trim() ? `<button class="code-btn" title="Show Code">Show Code</button>` : ''}
|
| 522 |
+
</div>
|
| 523 |
+
`;
|
| 524 |
+
|
| 525 |
+
// Add code block separately if code exists
|
| 526 |
+
if (code && code.trim()) {
|
| 527 |
+
const codeDiv = document.createElement('div');
|
| 528 |
+
codeDiv.className = 'plot-code';
|
| 529 |
+
codeDiv.style.display = 'none';
|
| 530 |
+
|
| 531 |
+
const pre = document.createElement('pre');
|
| 532 |
+
const codeEl = document.createElement('code');
|
| 533 |
+
codeEl.className = 'language-python hljs';
|
| 534 |
+
|
| 535 |
+
// Highlight immediately
|
| 536 |
+
try {
|
| 537 |
+
const highlighted = hljs.highlight(code, { language: 'python' });
|
| 538 |
+
codeEl.innerHTML = highlighted.value;
|
| 539 |
+
} catch (e) {
|
| 540 |
+
console.error('Highlight error:', e);
|
| 541 |
+
codeEl.textContent = code;
|
| 542 |
+
}
|
| 543 |
+
|
| 544 |
+
pre.appendChild(codeEl);
|
| 545 |
+
codeDiv.appendChild(pre);
|
| 546 |
+
figure.appendChild(codeDiv);
|
| 547 |
+
}
|
| 548 |
+
|
| 549 |
+
// Add enlarge action
|
| 550 |
+
figure.querySelector('.enlarge-btn').addEventListener('click', () => {
|
| 551 |
+
this.showImageModal(imgSrc);
|
| 552 |
+
});
|
| 553 |
+
|
| 554 |
+
// Add download action
|
| 555 |
+
figure.querySelector('.download-btn').addEventListener('click', () => {
|
| 556 |
+
const link = document.createElement('a');
|
| 557 |
+
link.href = imgSrc;
|
| 558 |
+
const filename = path ? path.split('/').pop() : 'eurus_plot.png';
|
| 559 |
+
link.download = filename;
|
| 560 |
+
link.click();
|
| 561 |
+
});
|
| 562 |
+
|
| 563 |
+
// Add show code toggle
|
| 564 |
+
const codeBtn = figure.querySelector('.code-btn');
|
| 565 |
+
if (codeBtn) {
|
| 566 |
+
const codeDiv = figure.querySelector('.plot-code');
|
| 567 |
+
|
| 568 |
+
codeBtn.addEventListener('click', () => {
|
| 569 |
+
if (codeDiv.style.display === 'none') {
|
| 570 |
+
codeDiv.style.display = 'block';
|
| 571 |
+
codeBtn.textContent = 'Hide Code';
|
| 572 |
+
} else {
|
| 573 |
+
codeDiv.style.display = 'none';
|
| 574 |
+
codeBtn.textContent = 'Show Code';
|
| 575 |
+
}
|
| 576 |
+
});
|
| 577 |
+
}
|
| 578 |
+
|
| 579 |
+
// Click on image to enlarge
|
| 580 |
+
figure.querySelector('img').addEventListener('click', () => {
|
| 581 |
+
this.showImageModal(imgSrc);
|
| 582 |
+
});
|
| 583 |
+
|
| 584 |
+
plotsDiv.appendChild(figure);
|
| 585 |
+
this.scrollToBottom();
|
| 586 |
+
}
|
| 587 |
+
|
| 588 |
+
addVideo(base64Data, path, mimetype = 'video/mp4') {
|
| 589 |
+
console.log('[VIDEO] addVideo called:', { path, mimetype, dataLength: base64Data?.length });
|
| 590 |
+
this.removeThinkingIndicator();
|
| 591 |
+
|
| 592 |
+
if (!this.currentAssistantMessage) {
|
| 593 |
+
this.appendToAssistantMessage('');
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
const plotsDiv = this.currentAssistantMessage.querySelector('.message-plots');
|
| 597 |
+
console.log('[VIDEO] plotsDiv found:', plotsDiv);
|
| 598 |
+
|
| 599 |
+
const figure = document.createElement('figure');
|
| 600 |
+
figure.className = 'plot-figure video-figure';
|
| 601 |
+
|
| 602 |
+
// Handle different formats
|
| 603 |
+
let videoSrc;
|
| 604 |
+
if (mimetype === 'image/gif') {
|
| 605 |
+
// GIFs display as img
|
| 606 |
+
videoSrc = `data:image/gif;base64,${base64Data}`;
|
| 607 |
+
figure.innerHTML = `
|
| 608 |
+
<img src="${videoSrc}" alt="Generated animation" class="video-gif" style="max-width: 100%; border-radius: 8px;">
|
| 609 |
+
<div class="plot-actions">
|
| 610 |
+
<button class="enlarge-btn" title="Enlarge">Enlarge</button>
|
| 611 |
+
<button class="download-btn" title="Download">Download</button>
|
| 612 |
+
</div>
|
| 613 |
+
`;
|
| 614 |
+
|
| 615 |
+
// Enlarge for GIF
|
| 616 |
+
figure.querySelector('.enlarge-btn').addEventListener('click', () => {
|
| 617 |
+
this.showImageModal(videoSrc);
|
| 618 |
+
});
|
| 619 |
+
figure.querySelector('img').addEventListener('click', () => {
|
| 620 |
+
this.showImageModal(videoSrc);
|
| 621 |
+
});
|
| 622 |
+
} else {
|
| 623 |
+
// Video formats (webm, mp4)
|
| 624 |
+
videoSrc = `data:${mimetype};base64,${base64Data}`;
|
| 625 |
+
figure.innerHTML = `
|
| 626 |
+
<video controls autoplay loop muted playsinline style="max-width: 100%; border-radius: 8px;">
|
| 627 |
+
<source src="${videoSrc}" type="${mimetype}">
|
| 628 |
+
Your browser does not support video playback.
|
| 629 |
+
</video>
|
| 630 |
+
<div class="plot-actions">
|
| 631 |
+
<button class="download-btn" title="Download">Download</button>
|
| 632 |
+
</div>
|
| 633 |
+
`;
|
| 634 |
+
}
|
| 635 |
+
|
| 636 |
+
// Download button
|
| 637 |
+
figure.querySelector('.download-btn').addEventListener('click', () => {
|
| 638 |
+
const link = document.createElement('a');
|
| 639 |
+
link.href = videoSrc;
|
| 640 |
+
const ext = mimetype.includes('gif') ? 'gif' : mimetype.includes('webm') ? 'webm' : 'mp4';
|
| 641 |
+
const filename = path ? path.split('/').pop() : `eurus_animation.${ext}`;
|
| 642 |
+
link.download = filename;
|
| 643 |
+
link.click();
|
| 644 |
+
});
|
| 645 |
+
|
| 646 |
+
plotsDiv.appendChild(figure);
|
| 647 |
+
this.scrollToBottom();
|
| 648 |
+
}
|
| 649 |
+
|
| 650 |
+
finalizeAssistantMessage(content) {
|
| 651 |
+
this.removeThinkingIndicator();
|
| 652 |
+
if (content && !this.currentAssistantMessage) {
|
| 653 |
+
this.appendToAssistantMessage(content);
|
| 654 |
+
}
|
| 655 |
+
this.currentAssistantMessage = null;
|
| 656 |
+
}
|
| 657 |
+
|
| 658 |
+
showError(message) {
|
| 659 |
+
this.removeThinkingIndicator();
|
| 660 |
+
|
| 661 |
+
const div = document.createElement('div');
|
| 662 |
+
div.className = 'message error-message';
|
| 663 |
+
div.innerHTML = `
|
| 664 |
+
<div class="message-header">
|
| 665 |
+
<span class="message-role">Error</span>
|
| 666 |
+
</div>
|
| 667 |
+
<div class="message-content">${this.escapeHtml(message)}</div>
|
| 668 |
+
`;
|
| 669 |
+
this.messagesContainer.appendChild(div);
|
| 670 |
+
this.currentAssistantMessage = null;
|
| 671 |
+
this.scrollToBottom();
|
| 672 |
+
}
|
| 673 |
+
|
| 674 |
+
async clearChat() {
|
| 675 |
+
if (!confirm('Clear conversation?')) return;
|
| 676 |
+
|
| 677 |
+
// Send clear command through WebSocket so the agent session memory is also cleared
|
| 678 |
+
if (this.ws && this.ws.readyState === WebSocket.OPEN) {
|
| 679 |
+
this.ws.send(JSON.stringify({ message: '/clear' }));
|
| 680 |
+
} else {
|
| 681 |
+
// Fallback to REST if WS not available
|
| 682 |
+
try {
|
| 683 |
+
const response = await fetch('/api/conversation', { method: 'DELETE' });
|
| 684 |
+
if (response.ok) this.clearMessagesUI();
|
| 685 |
+
} catch (error) {
|
| 686 |
+
console.error('Error clearing:', error);
|
| 687 |
+
}
|
| 688 |
+
}
|
| 689 |
+
}
|
| 690 |
+
|
| 691 |
+
clearMessagesUI() {
|
| 692 |
+
const messages = this.messagesContainer.querySelectorAll('.message:not(.system-message)');
|
| 693 |
+
messages.forEach(msg => msg.remove());
|
| 694 |
+
this.currentAssistantMessage = null;
|
| 695 |
+
}
|
| 696 |
+
|
| 697 |
+
async showCacheModal() {
|
| 698 |
+
this.cacheModal.showModal();
|
| 699 |
+
const content = document.getElementById('cache-content');
|
| 700 |
+
content.innerHTML = '<p>Loading...</p>';
|
| 701 |
+
|
| 702 |
+
try {
|
| 703 |
+
const response = await fetch('/api/cache');
|
| 704 |
+
const data = await response.json();
|
| 705 |
+
|
| 706 |
+
if (data.datasets && data.datasets.length > 0) {
|
| 707 |
+
let html = '<table><thead><tr><th>Variable</th><th>Period</th><th>Type</th></tr></thead><tbody>';
|
| 708 |
+
for (const ds of data.datasets) {
|
| 709 |
+
html += `<tr><td>${ds.variable}</td><td>${ds.start_date} to ${ds.end_date}</td><td>${ds.query_type}</td></tr>`;
|
| 710 |
+
}
|
| 711 |
+
html += '</tbody></table>';
|
| 712 |
+
content.innerHTML = html;
|
| 713 |
+
} else {
|
| 714 |
+
content.innerHTML = '<p>No cached datasets.</p>';
|
| 715 |
+
}
|
| 716 |
+
} catch (error) {
|
| 717 |
+
content.innerHTML = `<p>Error: ${error.message}</p>`;
|
| 718 |
+
}
|
| 719 |
+
}
|
| 720 |
+
|
| 721 |
+
scrollToBottom() {
|
| 722 |
+
this.messagesContainer.scrollTop = this.messagesContainer.scrollHeight;
|
| 723 |
+
}
|
| 724 |
+
|
| 725 |
+
escapeHtml(text) {
|
| 726 |
+
const div = document.createElement('div');
|
| 727 |
+
div.textContent = text;
|
| 728 |
+
return div.innerHTML;
|
| 729 |
+
}
|
| 730 |
+
}
|
| 731 |
+
|
| 732 |
+
document.addEventListener('DOMContentLoaded', () => {
|
| 733 |
+
window.eurusChat = new EurusChat();
|
| 734 |
+
});
|
web/templates/base.html
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
|
| 4 |
+
<head>
|
| 5 |
+
<meta charset="UTF-8">
|
| 6 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>{% block title %}Eurus{% endblock %}</title>
|
| 8 |
+
<link rel="icon" type="image/jpeg" href="/static/favicon.jpeg">
|
| 9 |
+
|
| 10 |
+
<!-- Custom styles only -->
|
| 11 |
+
<link rel="stylesheet" href="/static/css/style.css">
|
| 12 |
+
|
| 13 |
+
<!-- Marked.js for markdown -->
|
| 14 |
+
<script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
|
| 15 |
+
|
| 16 |
+
<!-- Highlight.js for code -->
|
| 17 |
+
<link rel="stylesheet"
|
| 18 |
+
href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/atom-one-dark.min.css">
|
| 19 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
|
| 20 |
+
<script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/languages/python.min.js"></script>
|
| 21 |
+
|
| 22 |
+
{% block head %}{% endblock %}
|
| 23 |
+
</head>
|
| 24 |
+
|
| 25 |
+
<body>
|
| 26 |
+
<header>
|
| 27 |
+
<nav>
|
| 28 |
+
<ul>
|
| 29 |
+
<li><span class="logo">Eurus</span></li>
|
| 30 |
+
</ul>
|
| 31 |
+
<ul>
|
| 32 |
+
<li><a href="#" id="clear-btn">Clear</a></li>
|
| 33 |
+
<li><a href="#" id="cache-btn">Cache</a></li>
|
| 34 |
+
<li>
|
| 35 |
+
<button id="theme-toggle" class="theme-toggle" title="Toggle theme">
|
| 36 |
+
<span class="theme-icon">🌙</span>
|
| 37 |
+
</button>
|
| 38 |
+
</li>
|
| 39 |
+
<li>
|
| 40 |
+
<span id="connection-status" class="status-badge disconnected">
|
| 41 |
+
Disconnected
|
| 42 |
+
</span>
|
| 43 |
+
</li>
|
| 44 |
+
</ul>
|
| 45 |
+
</nav>
|
| 46 |
+
</header>
|
| 47 |
+
|
| 48 |
+
<main>
|
| 49 |
+
{% block content %}{% endblock %}
|
| 50 |
+
</main>
|
| 51 |
+
|
| 52 |
+
<footer>
|
| 53 |
+
Eurus Climate Agent
|
| 54 |
+
</footer>
|
| 55 |
+
|
| 56 |
+
{% block scripts %}{% endblock %}
|
| 57 |
+
</body>
|
| 58 |
+
|
| 59 |
+
</html>
|
web/templates/components/message.html
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- User message template -->
|
| 2 |
+
<div class="message user-message" data-message-id="{{ message_id }}">
|
| 3 |
+
<div class="message-header">
|
| 4 |
+
<span class="message-role">You</span>
|
| 5 |
+
<span class="message-time">{{ timestamp }}</span>
|
| 6 |
+
</div>
|
| 7 |
+
<div class="message-content">
|
| 8 |
+
{{ content }}
|
| 9 |
+
</div>
|
| 10 |
+
</div>
|
| 11 |
+
|
| 12 |
+
<!-- Assistant message template -->
|
| 13 |
+
<div class="message assistant-message" data-message-id="{{ message_id }}">
|
| 14 |
+
<div class="message-header">
|
| 15 |
+
<span class="message-role">Eurus</span>
|
| 16 |
+
<span class="message-time">{{ timestamp }}</span>
|
| 17 |
+
</div>
|
| 18 |
+
<div class="message-content markdown-content">
|
| 19 |
+
{{ content }}
|
| 20 |
+
</div>
|
| 21 |
+
{% if plots %}
|
| 22 |
+
<div class="message-plots">
|
| 23 |
+
{% for plot in plots %}
|
| 24 |
+
<figure class="plot-figure">
|
| 25 |
+
<img src="{{ plot.url }}" alt="Generated plot" loading="lazy">
|
| 26 |
+
{% if plot.path %}
|
| 27 |
+
<figcaption>{{ plot.path }}</figcaption>
|
| 28 |
+
{% endif %}
|
| 29 |
+
</figure>
|
| 30 |
+
{% endfor %}
|
| 31 |
+
</div>
|
| 32 |
+
{% endif %}
|
| 33 |
+
</div>
|
| 34 |
+
|
| 35 |
+
<!-- Thinking indicator template -->
|
| 36 |
+
<div class="message thinking-message" data-message-id="{{ message_id }}">
|
| 37 |
+
<div class="message-header">
|
| 38 |
+
<span class="message-role">Eurus</span>
|
| 39 |
+
</div>
|
| 40 |
+
<div class="message-content">
|
| 41 |
+
<span aria-busy="true">Thinking...</span>
|
| 42 |
+
</div>
|
| 43 |
+
</div>
|
| 44 |
+
|
| 45 |
+
<!-- Code execution template -->
|
| 46 |
+
<div class="message code-message" data-message-id="{{ message_id }}">
|
| 47 |
+
<div class="message-header">
|
| 48 |
+
<span class="message-role">Executing Code</span>
|
| 49 |
+
</div>
|
| 50 |
+
<div class="message-content">
|
| 51 |
+
<pre><code class="language-python">{{ code }}</code></pre>
|
| 52 |
+
</div>
|
| 53 |
+
</div>
|
web/templates/index.html
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{% extends "base.html" %}
|
| 2 |
+
|
| 3 |
+
{% block title %}Eurus - Climate Data Analysis{% endblock %}
|
| 4 |
+
|
| 5 |
+
{% block content %}
|
| 6 |
+
<div class="chat-container">
|
| 7 |
+
<!-- API Keys panel — hidden when keys are pre-configured via env -->
|
| 8 |
+
<div id="api-keys-panel" class="api-keys-panel" style="display: none;">
|
| 9 |
+
<div class="api-keys-header">
|
| 10 |
+
<span>API Keys Required</span>
|
| 11 |
+
</div>
|
| 12 |
+
<div class="api-keys-body">
|
| 13 |
+
<p class="api-keys-note">Enter your API keys to use Eurus. Keys are stored in your browser only and never saved on the server.</p>
|
| 14 |
+
<div class="api-key-field">
|
| 15 |
+
<label for="openai-key">OpenAI API Key <span class="required">*</span></label>
|
| 16 |
+
<input type="password" id="openai-key" placeholder="sk-..." autocomplete="off">
|
| 17 |
+
</div>
|
| 18 |
+
<div class="api-key-field">
|
| 19 |
+
<label for="arraylake-key">Arraylake API Key</label>
|
| 20 |
+
<input type="password" id="arraylake-key" placeholder="ema_..." autocomplete="off">
|
| 21 |
+
</div>
|
| 22 |
+
<button id="save-keys-btn" class="save-keys-btn">Connect</button>
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
|
| 26 |
+
<div id="chat-messages" class="chat-messages">
|
| 27 |
+
<div class="message system-message">
|
| 28 |
+
<h3>Welcome to Eurus</h3>
|
| 29 |
+
<p>I can help you analyze ERA5 climate data. Try:</p>
|
| 30 |
+
<ul>
|
| 31 |
+
<li>"Show me SST for California coast, Jan 2024"</li>
|
| 32 |
+
<li>"Plot temperature in the Gulf of Mexico"</li>
|
| 33 |
+
</ul>
|
| 34 |
+
</div>
|
| 35 |
+
</div>
|
| 36 |
+
|
| 37 |
+
<div class="chat-input-container">
|
| 38 |
+
<form id="chat-form" class="chat-form">
|
| 39 |
+
<textarea id="message-input" placeholder="Ask about climate data..." rows="1"></textarea>
|
| 40 |
+
<button type="submit" id="send-btn" disabled>Send</button>
|
| 41 |
+
</form>
|
| 42 |
+
<div class="input-hints">
|
| 43 |
+
<kbd>Enter</kbd> to send, <kbd>Shift+Enter</kbd> for new line
|
| 44 |
+
</div>
|
| 45 |
+
</div>
|
| 46 |
+
</div>
|
| 47 |
+
|
| 48 |
+
<dialog id="cache-modal">
|
| 49 |
+
<article>
|
| 50 |
+
<header>
|
| 51 |
+
<h3>Cached Datasets</h3>
|
| 52 |
+
<button class="close-modal">×</button>
|
| 53 |
+
</header>
|
| 54 |
+
<div id="cache-content">
|
| 55 |
+
<p>Loading...</p>
|
| 56 |
+
</div>
|
| 57 |
+
</article>
|
| 58 |
+
</dialog>
|
| 59 |
+
{% endblock %}
|
| 60 |
+
|
| 61 |
+
{% block scripts %}
|
| 62 |
+
<script src="/static/js/chat.js?v=20260216"></script>
|
| 63 |
+
{% endblock %}
|