github-actions[bot] commited on
Commit ·
5d4e96b
1
Parent(s): 948797f
Fresh start without shapefiles
Browse files- .github/workflows/deploy-to-huggingface.yml +16 -3
- .gitignore +4 -0
- .python-version +1 -0
- README.md +10 -0
- analysis.py +1234 -0
- app.py +1070 -0
- data/SAB/SAB.cpg +3 -0
- data/SAB/SAB.prj +3 -0
- data/SAB/SAB.qmd +27 -0
- main.py +232 -0
- pyproject.toml +32 -0
- requirements.txt +14 -0
- tests/test_main.py +116 -0
- uv.lock +0 -0
.github/workflows/deploy-to-huggingface.yml
CHANGED
|
@@ -25,19 +25,32 @@ jobs:
|
|
| 25 |
# Clone the HF space
|
| 26 |
git clone https://philmaxwell:$HF_TOKEN@huggingface.co/spaces/philmaxwell/sabw-wq-data hf_space
|
| 27 |
|
| 28 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
rsync -av \
|
| 30 |
--exclude 'hf_space' \
|
| 31 |
--exclude '.git' \
|
|
|
|
| 32 |
--exclude '*.ipynb' \
|
| 33 |
--exclude 'Ideas.md' \
|
| 34 |
--exclude 'data/*.parquet' \
|
| 35 |
--exclude '*.csv' \
|
| 36 |
--exclude '*.xlsx' \
|
|
|
|
|
|
|
|
|
|
| 37 |
./ hf_space/
|
| 38 |
|
| 39 |
# Commit and push changes
|
| 40 |
cd hf_space
|
| 41 |
git add .
|
| 42 |
-
git commit -m "
|
| 43 |
-
git push origin main
|
|
|
|
| 25 |
# Clone the HF space
|
| 26 |
git clone https://philmaxwell:$HF_TOKEN@huggingface.co/spaces/philmaxwell/sabw-wq-data hf_space
|
| 27 |
|
| 28 |
+
# Clean up everything except .git directory
|
| 29 |
+
cd hf_space
|
| 30 |
+
find . -mindepth 1 -not -path './.git*' -delete
|
| 31 |
+
|
| 32 |
+
# Remove LFS configuration
|
| 33 |
+
git rm .gitattributes || true
|
| 34 |
+
git commit -am "Clean repository"
|
| 35 |
+
|
| 36 |
+
# Copy new files, excluding shapefiles
|
| 37 |
+
cd ..
|
| 38 |
rsync -av \
|
| 39 |
--exclude 'hf_space' \
|
| 40 |
--exclude '.git' \
|
| 41 |
+
--exclude '.gitattributes' \
|
| 42 |
--exclude '*.ipynb' \
|
| 43 |
--exclude 'Ideas.md' \
|
| 44 |
--exclude 'data/*.parquet' \
|
| 45 |
--exclude '*.csv' \
|
| 46 |
--exclude '*.xlsx' \
|
| 47 |
+
--exclude 'data/SAB/*.shp' \
|
| 48 |
+
--exclude 'data/SAB/*.dbf' \
|
| 49 |
+
--exclude 'data/SAB/*.shx' \
|
| 50 |
./ hf_space/
|
| 51 |
|
| 52 |
# Commit and push changes
|
| 53 |
cd hf_space
|
| 54 |
git add .
|
| 55 |
+
git commit -m "Fresh start without shapefiles"
|
| 56 |
+
git push -f origin main
|
.gitignore
CHANGED
|
@@ -22,3 +22,7 @@ wheels/
|
|
| 22 |
/.quarto/
|
| 23 |
|
| 24 |
.cache/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
/.quarto/
|
| 23 |
|
| 24 |
.cache/
|
| 25 |
+
cache/
|
| 26 |
+
data/KOR.zip
|
| 27 |
+
*.ipynb
|
| 28 |
+
*.json
|
.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
README.md
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Water Quality Report
|
| 3 |
+
emoji: 💧
|
| 4 |
+
colorFrom: blue
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: streamlit
|
| 7 |
+
sdk_version: 1.40.1
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
---
|
analysis.py
ADDED
|
@@ -0,0 +1,1234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import altair as alt
|
| 2 |
+
import contextily as ctx
|
| 3 |
+
import geopandas as gpd
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import numpy as np
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import plotly.graph_objects as go
|
| 8 |
+
import scipy.stats as stats
|
| 9 |
+
import seaborn as sns
|
| 10 |
+
from matplotlib.colors import LinearSegmentedColormap
|
| 11 |
+
from matplotlib.figure import Figure
|
| 12 |
+
from plotly.subplots import make_subplots
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def load_data(filename: str) -> pd.DataFrame:
|
| 16 |
+
return pd.read_csv(filename, dtype={"Station_Number": str}).assign(
|
| 17 |
+
Org_Result_Value=lambda df: pd.to_numeric(
|
| 18 |
+
df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
|
| 19 |
+
),
|
| 20 |
+
Activity_Start_Date_Time=lambda df: pd.to_datetime(
|
| 21 |
+
df["Activity_Start_Date_Time"]
|
| 22 |
+
),
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def plot_analyte_trends(
|
| 27 |
+
df: pd.DataFrame, analyte_names: list[str], sample_position: str, figsize=(15, 12)
|
| 28 |
+
) -> Figure:
|
| 29 |
+
"""
|
| 30 |
+
Create subplots of analyte trends for the given dataframe and analytes.
|
| 31 |
+
|
| 32 |
+
Parameters:
|
| 33 |
+
-----------
|
| 34 |
+
df : pandas DataFrame
|
| 35 |
+
The filtered dataframe containing data for a specific station and position
|
| 36 |
+
analyte_names : list[str]
|
| 37 |
+
List of analyte names to plot
|
| 38 |
+
figsize : tuple
|
| 39 |
+
Figure size in inches (width, height)
|
| 40 |
+
"""
|
| 41 |
+
# Calculate number of rows needed (2 columns)
|
| 42 |
+
n_rows = (len(analyte_names) + 1) // 2
|
| 43 |
+
|
| 44 |
+
fig, axes = plt.subplots(n_rows, 2, figsize=figsize)
|
| 45 |
+
axes = axes.flatten() # Flatten axes array for easier indexing
|
| 46 |
+
|
| 47 |
+
station_number = df["Station_Number"].iloc[0]
|
| 48 |
+
station_name = df["Name"].iloc[0]
|
| 49 |
+
|
| 50 |
+
if sample_position == "All":
|
| 51 |
+
sample_position_label = "Surface and Bottom"
|
| 52 |
+
else:
|
| 53 |
+
sample_position_label = sample_position
|
| 54 |
+
|
| 55 |
+
for idx, analyte_name in enumerate(analyte_names):
|
| 56 |
+
ax = axes[idx]
|
| 57 |
+
data = (
|
| 58 |
+
df[df["Org_Analyte_Name"] == analyte_name]
|
| 59 |
+
.assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
|
| 60 |
+
.dropna(subset=["Org_Result_Value"])
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
if data.empty:
|
| 64 |
+
ax.text(
|
| 65 |
+
0.5,
|
| 66 |
+
0.5,
|
| 67 |
+
f"No data available for {analyte_name}",
|
| 68 |
+
ha="center",
|
| 69 |
+
va="center",
|
| 70 |
+
)
|
| 71 |
+
continue
|
| 72 |
+
|
| 73 |
+
# Determine if log scale should be used
|
| 74 |
+
log_scale_analytes = [
|
| 75 |
+
"Turbidity",
|
| 76 |
+
"Fecal Coliform (MPN)",
|
| 77 |
+
"Total Nitrogen",
|
| 78 |
+
"Total Phosphorus",
|
| 79 |
+
]
|
| 80 |
+
log_scale = analyte_name in log_scale_analytes
|
| 81 |
+
if log_scale:
|
| 82 |
+
ax.set_yscale("log")
|
| 83 |
+
ax.yaxis.set_major_formatter(plt.ScalarFormatter()) # type: ignore
|
| 84 |
+
|
| 85 |
+
# Create box plot
|
| 86 |
+
groups = data.groupby("Year")
|
| 87 |
+
positions = np.array(list(groups.groups.keys()))
|
| 88 |
+
group_data = [group["Org_Result_Value"] for name, group in groups]
|
| 89 |
+
|
| 90 |
+
ax.boxplot(
|
| 91 |
+
group_data,
|
| 92 |
+
positions=positions,
|
| 93 |
+
widths=0.6,
|
| 94 |
+
patch_artist=True,
|
| 95 |
+
boxprops=dict(facecolor="lightblue", color="blue", alpha=0.5),
|
| 96 |
+
medianprops=dict(color="blue"),
|
| 97 |
+
whiskerprops=dict(color="blue"),
|
| 98 |
+
capprops=dict(color="blue"),
|
| 99 |
+
flierprops=dict(color="blue", markeredgecolor="blue", alpha=0.5),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
# Calculate and plot trend line
|
| 103 |
+
yearly_means = data.groupby("Year")["Org_Result_Value"].mean()
|
| 104 |
+
X = yearly_means.index.values.reshape(-1, 1)
|
| 105 |
+
y = yearly_means.values
|
| 106 |
+
|
| 107 |
+
# Plot means
|
| 108 |
+
ax.plot(X, y, "bo-", linewidth=1, markersize=4, label="Annual Mean")
|
| 109 |
+
|
| 110 |
+
# Calculate trend line
|
| 111 |
+
if len(X) > 1: # Only calculate trend if we have more than one point
|
| 112 |
+
slope, intercept, r_value, p_value, std_err = stats.linregress(X.ravel(), y)
|
| 113 |
+
trend_line = slope * X.ravel() + intercept
|
| 114 |
+
ax.plot(X, trend_line, "r--", alpha=0.8, linewidth=1, label="Trend")
|
| 115 |
+
|
| 116 |
+
# Add statistics
|
| 117 |
+
stats_text = f"R²={r_value**2:.3f}\np={p_value:.3f}" # type: ignore
|
| 118 |
+
ax.text(
|
| 119 |
+
0.02,
|
| 120 |
+
0.98,
|
| 121 |
+
stats_text,
|
| 122 |
+
transform=ax.transAxes,
|
| 123 |
+
verticalalignment="top",
|
| 124 |
+
bbox=dict(boxstyle="round", facecolor="white", alpha=0.8),
|
| 125 |
+
parse_math=False,
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
# Customize subplot
|
| 129 |
+
ax.set_title(f"{analyte_name}", pad=15)
|
| 130 |
+
ax.set_xlabel("Year")
|
| 131 |
+
analyte_unit = data["Org_Result_Unit"].iloc[0]
|
| 132 |
+
if analyte_name == "Depth, Secchi Disk Depth":
|
| 133 |
+
y_label = f"Depth ({analyte_unit})"
|
| 134 |
+
elif analyte_name == "pH":
|
| 135 |
+
y_label = None
|
| 136 |
+
elif analyte_name.startswith("Dissolved"):
|
| 137 |
+
y_label = f"DO ({analyte_unit})"
|
| 138 |
+
elif analyte_name.startswith("Fecal Coliform"):
|
| 139 |
+
y_label = f"Fecal Coliform ({analyte_unit})"
|
| 140 |
+
else:
|
| 141 |
+
y_label = f"{analyte_name} ({analyte_unit})"
|
| 142 |
+
|
| 143 |
+
ax.set_ylabel(y_label)
|
| 144 |
+
ax.grid(True, alpha=0.3)
|
| 145 |
+
|
| 146 |
+
# Add sample sizes
|
| 147 |
+
for year, group in groups:
|
| 148 |
+
ax.text(
|
| 149 |
+
year,
|
| 150 |
+
ax.get_ylim()[1],
|
| 151 |
+
f"n={len(group)}",
|
| 152 |
+
ha="center",
|
| 153 |
+
va="bottom",
|
| 154 |
+
fontsize=8,
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
# Remove any unused subplots
|
| 158 |
+
for idx in range(len(analyte_names), len(axes)):
|
| 159 |
+
fig.delaxes(axes[idx])
|
| 160 |
+
|
| 161 |
+
# Add overall title with more space
|
| 162 |
+
fig.suptitle(
|
| 163 |
+
f"Water Quality Trends for {station_number} - {station_name} - {sample_position_label}",
|
| 164 |
+
fontsize=14,
|
| 165 |
+
y=0.95,
|
| 166 |
+
)
|
| 167 |
+
|
| 168 |
+
# Adjust layout with more space
|
| 169 |
+
plt.tight_layout(rect=(0, 0, 1, 0.95))
|
| 170 |
+
return fig
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
def altair_plot_sector_trends(
|
| 174 |
+
df: pd.DataFrame, analyte_names: list[str]
|
| 175 |
+
) -> alt.VConcatChart:
|
| 176 |
+
"""
|
| 177 |
+
Create plots of mean annual analyte trends by sector using Altair.
|
| 178 |
+
|
| 179 |
+
Parameters:
|
| 180 |
+
-----------
|
| 181 |
+
df : pd.DataFrame
|
| 182 |
+
Input dataframe
|
| 183 |
+
analyte_names : list[str]
|
| 184 |
+
List of analytes to plot
|
| 185 |
+
|
| 186 |
+
Returns:
|
| 187 |
+
--------
|
| 188 |
+
alt.VConcatChart
|
| 189 |
+
Vertically concatenated Altair charts for each analyte
|
| 190 |
+
"""
|
| 191 |
+
# Custom color scheme matching the matplotlib version
|
| 192 |
+
color_scale = alt.Scale(
|
| 193 |
+
domain=df["Sector"].unique().tolist(),
|
| 194 |
+
range=[
|
| 195 |
+
"#1f77b4", # blue
|
| 196 |
+
"#ff7f0e", # orange
|
| 197 |
+
"#2ca02c", # green
|
| 198 |
+
"#d62728", # red
|
| 199 |
+
"#9467bd", # purple
|
| 200 |
+
"#8c564b", # brown
|
| 201 |
+
"#e377c2", # pink
|
| 202 |
+
"#7f7f7f", # gray
|
| 203 |
+
],
|
| 204 |
+
)
|
| 205 |
+
|
| 206 |
+
charts = []
|
| 207 |
+
for analyte_name in analyte_names:
|
| 208 |
+
# Filter data for current analyte
|
| 209 |
+
analyte_data = df[df["Org_Analyte_Name"] == analyte_name].copy()
|
| 210 |
+
|
| 211 |
+
# For Salinity, exclude Fresh Water Lakes
|
| 212 |
+
if analyte_name == "Salinity":
|
| 213 |
+
analyte_data = analyte_data[analyte_data["Sector"] != "Fresh Water Lakes"]
|
| 214 |
+
|
| 215 |
+
# Calculate annual means and standard errors
|
| 216 |
+
processed_data = (
|
| 217 |
+
analyte_data.assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
|
| 218 |
+
.groupby(["Year", "Sector"])["Org_Result_Value"]
|
| 219 |
+
.agg(["mean", "sem"])
|
| 220 |
+
.reset_index()
|
| 221 |
+
.rename(columns={"mean": "Mean", "sem": "SE"})
|
| 222 |
+
)
|
| 223 |
+
|
| 224 |
+
# Add confidence interval bounds
|
| 225 |
+
processed_data["Upper"] = processed_data["Mean"] + processed_data["SE"]
|
| 226 |
+
processed_data["Lower"] = processed_data["Mean"] - processed_data["SE"]
|
| 227 |
+
|
| 228 |
+
# Get the unit for the y-axis label
|
| 229 |
+
unit = analyte_data["Org_Result_Unit"].iloc[0] if not analyte_data.empty else ""
|
| 230 |
+
|
| 231 |
+
# Determine if log scale should be used
|
| 232 |
+
use_log_scale = analyte_name in [
|
| 233 |
+
"Turbidity",
|
| 234 |
+
"Fecal Coliform (MPN)",
|
| 235 |
+
"Total Nitrogen",
|
| 236 |
+
"Total Phosphorus",
|
| 237 |
+
]
|
| 238 |
+
|
| 239 |
+
# Create base chart
|
| 240 |
+
base = alt.Chart(processed_data).encode(
|
| 241 |
+
x=alt.X("Year:O", axis=alt.Axis(title=None)),
|
| 242 |
+
color=alt.Color("Sector:N", scale=color_scale),
|
| 243 |
+
tooltip=[
|
| 244 |
+
alt.Tooltip("Year:O"),
|
| 245 |
+
alt.Tooltip("Sector:N"),
|
| 246 |
+
alt.Tooltip("Mean:Q", format=".2f"),
|
| 247 |
+
alt.Tooltip("SE:Q", format=".2f"),
|
| 248 |
+
],
|
| 249 |
+
)
|
| 250 |
+
|
| 251 |
+
# Create line and point layers
|
| 252 |
+
lines = base.mark_line().encode(
|
| 253 |
+
y=alt.Y(
|
| 254 |
+
"Mean:Q",
|
| 255 |
+
title=f"({unit})",
|
| 256 |
+
scale=alt.Scale(type="log" if use_log_scale else "linear"),
|
| 257 |
+
)
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
points = base.mark_point(size=50).encode(y=alt.Y("Mean:Q"))
|
| 261 |
+
|
| 262 |
+
# Create confidence interval area
|
| 263 |
+
area = base.mark_area(opacity=0.15).encode(
|
| 264 |
+
y=alt.Y("Lower:Q"), y2=alt.Y2("Upper:Q")
|
| 265 |
+
)
|
| 266 |
+
|
| 267 |
+
# Combine layers
|
| 268 |
+
chart = (
|
| 269 |
+
(area + lines + points)
|
| 270 |
+
.properties(
|
| 271 |
+
width=600,
|
| 272 |
+
height=300,
|
| 273 |
+
title=alt.TitleParams(text=analyte_name, anchor="middle", fontSize=14),
|
| 274 |
+
)
|
| 275 |
+
.interactive()
|
| 276 |
+
)
|
| 277 |
+
|
| 278 |
+
charts.append(chart)
|
| 279 |
+
|
| 280 |
+
# Combine all charts vertically
|
| 281 |
+
final_chart = alt.vconcat(*charts).configure(
|
| 282 |
+
view={"strokeWidth": 0}, axis={"grid": True, "gridOpacity": 0.2}
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
return final_chart
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
def plotly_plot_analyte_trends(df: pd.DataFrame, analyte_names: list[str]) -> go.Figure:
|
| 289 |
+
"""
|
| 290 |
+
Create subplots of analyte trends using Plotly for the given dataframe and analytes.
|
| 291 |
+
|
| 292 |
+
Parameters:
|
| 293 |
+
-----------
|
| 294 |
+
df : pandas DataFrame
|
| 295 |
+
The filtered dataframe containing data for a specific station and position
|
| 296 |
+
analyte_names : list[str]
|
| 297 |
+
List of analyte names to plot
|
| 298 |
+
|
| 299 |
+
Returns:
|
| 300 |
+
--------
|
| 301 |
+
go.Figure
|
| 302 |
+
Plotly figure containing the subplots
|
| 303 |
+
"""
|
| 304 |
+
# Calculate number of rows needed (2 columns)
|
| 305 |
+
n_rows = (len(analyte_names) + 1) // 2
|
| 306 |
+
|
| 307 |
+
# Create subplot figure
|
| 308 |
+
fig = make_subplots(
|
| 309 |
+
rows=n_rows,
|
| 310 |
+
cols=2,
|
| 311 |
+
subplot_titles=analyte_names,
|
| 312 |
+
vertical_spacing=0.12,
|
| 313 |
+
horizontal_spacing=0.1,
|
| 314 |
+
)
|
| 315 |
+
|
| 316 |
+
station_number = df["Station_Number"].iloc[0]
|
| 317 |
+
sample_position = df["Sample_Position"].iloc[0]
|
| 318 |
+
|
| 319 |
+
for idx, analyte_name in enumerate(analyte_names):
|
| 320 |
+
row = idx // 2 + 1
|
| 321 |
+
col = idx % 2 + 1
|
| 322 |
+
|
| 323 |
+
data = (
|
| 324 |
+
df[df["Org_Analyte_Name"] == analyte_name]
|
| 325 |
+
.assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
|
| 326 |
+
.dropna(subset=["Org_Result_Value"])
|
| 327 |
+
)
|
| 328 |
+
|
| 329 |
+
if data.empty:
|
| 330 |
+
fig.add_annotation(
|
| 331 |
+
text=f"No data available for {analyte_name}",
|
| 332 |
+
xref=f"x{idx+1}",
|
| 333 |
+
yref=f"y{idx+1}",
|
| 334 |
+
x=0.5,
|
| 335 |
+
y=0.5,
|
| 336 |
+
showarrow=False,
|
| 337 |
+
row=row,
|
| 338 |
+
col=col,
|
| 339 |
+
)
|
| 340 |
+
continue
|
| 341 |
+
|
| 342 |
+
# Determine if log scale should be used
|
| 343 |
+
log_scale = analyte_name in ["Turbidity", "Fecal Coliform (MPN)"]
|
| 344 |
+
|
| 345 |
+
# Create box plot
|
| 346 |
+
groups = data.groupby("Year")
|
| 347 |
+
years = list(groups.groups.keys())
|
| 348 |
+
|
| 349 |
+
# Add box plot
|
| 350 |
+
fig.add_trace(
|
| 351 |
+
go.Box(
|
| 352 |
+
x=data["Year"],
|
| 353 |
+
y=data["Org_Result_Value"],
|
| 354 |
+
name="Box Plot",
|
| 355 |
+
boxpoints="outliers",
|
| 356 |
+
line=dict(color="blue"),
|
| 357 |
+
fillcolor="lightblue",
|
| 358 |
+
showlegend=False,
|
| 359 |
+
),
|
| 360 |
+
row=row,
|
| 361 |
+
col=col,
|
| 362 |
+
)
|
| 363 |
+
|
| 364 |
+
# Calculate and plot means
|
| 365 |
+
yearly_means = data.groupby("Year")["Org_Result_Value"].mean()
|
| 366 |
+
|
| 367 |
+
# Add mean line
|
| 368 |
+
fig.add_trace(
|
| 369 |
+
go.Scatter(
|
| 370 |
+
x=years,
|
| 371 |
+
y=yearly_means.values,
|
| 372 |
+
mode="lines+markers",
|
| 373 |
+
name="Annual Mean",
|
| 374 |
+
line=dict(color="blue"),
|
| 375 |
+
showlegend=False,
|
| 376 |
+
),
|
| 377 |
+
row=row,
|
| 378 |
+
col=col,
|
| 379 |
+
)
|
| 380 |
+
|
| 381 |
+
# Calculate and add trend line
|
| 382 |
+
if len(years) > 1:
|
| 383 |
+
X = np.array(years)
|
| 384 |
+
y = yearly_means.values
|
| 385 |
+
slope, intercept, r_value, p_value, std_err = stats.linregress(X, y)
|
| 386 |
+
trend_line = slope * X + intercept
|
| 387 |
+
|
| 388 |
+
fig.add_trace(
|
| 389 |
+
go.Scatter(
|
| 390 |
+
x=years,
|
| 391 |
+
y=trend_line,
|
| 392 |
+
mode="lines",
|
| 393 |
+
name="Trend",
|
| 394 |
+
line=dict(color="red", dash="dash"),
|
| 395 |
+
showlegend=False,
|
| 396 |
+
),
|
| 397 |
+
row=row,
|
| 398 |
+
col=col,
|
| 399 |
+
)
|
| 400 |
+
|
| 401 |
+
# Add statistics annotation
|
| 402 |
+
stats_text = f"R² = {r_value**2:.3f}<br>p = {p_value:.3f}" # type: ignore
|
| 403 |
+
fig.add_annotation(
|
| 404 |
+
text=stats_text,
|
| 405 |
+
xref=f"x{idx+1}",
|
| 406 |
+
yref=f"y{idx+1}",
|
| 407 |
+
x=min(years), # type: ignore
|
| 408 |
+
y=max(data["Org_Result_Value"]),
|
| 409 |
+
showarrow=False,
|
| 410 |
+
bgcolor="white",
|
| 411 |
+
bordercolor="black",
|
| 412 |
+
borderwidth=1,
|
| 413 |
+
row=row,
|
| 414 |
+
col=col,
|
| 415 |
+
)
|
| 416 |
+
|
| 417 |
+
# Add sample size annotations
|
| 418 |
+
for year, group in groups:
|
| 419 |
+
fig.add_annotation(
|
| 420 |
+
text=f"n={len(group)}",
|
| 421 |
+
x=year,
|
| 422 |
+
y=max(data["Org_Result_Value"]),
|
| 423 |
+
showarrow=False,
|
| 424 |
+
font=dict(size=8),
|
| 425 |
+
row=row,
|
| 426 |
+
col=col,
|
| 427 |
+
)
|
| 428 |
+
|
| 429 |
+
# Update axes
|
| 430 |
+
if log_scale:
|
| 431 |
+
fig.update_yaxes(type="log", row=row, col=col)
|
| 432 |
+
|
| 433 |
+
fig.update_xaxes(title_text="Year", row=row, col=col)
|
| 434 |
+
fig.update_yaxes(
|
| 435 |
+
title_text=f'Value ({data["Org_Result_Unit"].iloc[0]})', row=row, col=col
|
| 436 |
+
)
|
| 437 |
+
|
| 438 |
+
# Update layout
|
| 439 |
+
fig.update_layout(
|
| 440 |
+
title=f"Water Quality Trends<br>Station {station_number} - {sample_position}",
|
| 441 |
+
title_x=0.5,
|
| 442 |
+
showlegend=False,
|
| 443 |
+
height=300 * n_rows + 100,
|
| 444 |
+
width=1000,
|
| 445 |
+
template="plotly_white",
|
| 446 |
+
)
|
| 447 |
+
|
| 448 |
+
return fig
|
| 449 |
+
|
| 450 |
+
|
| 451 |
+
def plot_sector_trends(
|
| 452 |
+
df: pd.DataFrame, analyte_names: list[str], base_height: float = 4
|
| 453 |
+
) -> Figure:
|
| 454 |
+
"""
|
| 455 |
+
Create plots of mean annual analyte trends by sector.
|
| 456 |
+
|
| 457 |
+
Parameters:
|
| 458 |
+
-----------
|
| 459 |
+
df : pd.DataFrame
|
| 460 |
+
Input dataframe
|
| 461 |
+
analyte_names : list[str]
|
| 462 |
+
List of analytes to plot
|
| 463 |
+
base_height : float
|
| 464 |
+
Height per subplot in inches (default=4)
|
| 465 |
+
"""
|
| 466 |
+
# Calculate figure dimensions
|
| 467 |
+
n_rows = len(analyte_names)
|
| 468 |
+
fig_height = base_height * n_rows
|
| 469 |
+
|
| 470 |
+
# Create figure with dynamic height
|
| 471 |
+
fig, axes = plt.subplots(n_rows, 1, figsize=(15, fig_height))
|
| 472 |
+
if n_rows == 1:
|
| 473 |
+
axes = [axes]
|
| 474 |
+
|
| 475 |
+
custom_colors = [
|
| 476 |
+
"#1f77b4", # blue
|
| 477 |
+
"#ff7f0e", # orange
|
| 478 |
+
"#2ca02c", # green
|
| 479 |
+
"#d62728", # red
|
| 480 |
+
"#9467bd", # purple
|
| 481 |
+
"#8c564b", # brown
|
| 482 |
+
"#e377c2", # pink
|
| 483 |
+
"#7f7f7f", # gray
|
| 484 |
+
]
|
| 485 |
+
|
| 486 |
+
for idx, analyte_name in enumerate(analyte_names):
|
| 487 |
+
ax = axes[idx]
|
| 488 |
+
|
| 489 |
+
# Filter data for current analyte
|
| 490 |
+
analyte_data = df[df["Org_Analyte_Name"] == analyte_name]
|
| 491 |
+
|
| 492 |
+
# For Salinity, exclude Fresh Water Lakes
|
| 493 |
+
if analyte_name == "Salinity":
|
| 494 |
+
analyte_data = analyte_data[analyte_data["Sector"] != "Fresh Water Lakes"]
|
| 495 |
+
|
| 496 |
+
# Plot each sector with custom colors
|
| 497 |
+
for sector, color in zip(df["Sector"].unique(), custom_colors):
|
| 498 |
+
sector_data = (
|
| 499 |
+
analyte_data[analyte_data["Sector"] == sector]
|
| 500 |
+
.assign(Year=lambda df: df["Activity_Start_Date_Time"].dt.year)
|
| 501 |
+
.groupby("Year")["Org_Result_Value"]
|
| 502 |
+
.agg(["mean", "sem"])
|
| 503 |
+
.reset_index()
|
| 504 |
+
)
|
| 505 |
+
|
| 506 |
+
if not sector_data.empty:
|
| 507 |
+
# Plot mean line with error bands
|
| 508 |
+
ax.plot(
|
| 509 |
+
sector_data["Year"],
|
| 510 |
+
sector_data["mean"],
|
| 511 |
+
"-o",
|
| 512 |
+
color=color,
|
| 513 |
+
label=sector,
|
| 514 |
+
markersize=4,
|
| 515 |
+
linewidth=2, # Slightly thicker lines
|
| 516 |
+
)
|
| 517 |
+
|
| 518 |
+
# Add error bands with slightly reduced opacity
|
| 519 |
+
ax.fill_between(
|
| 520 |
+
sector_data["Year"],
|
| 521 |
+
sector_data["mean"] - sector_data["sem"],
|
| 522 |
+
sector_data["mean"] + sector_data["sem"],
|
| 523 |
+
color=color,
|
| 524 |
+
alpha=0.15, # Reduced opacity for better visibility
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
# Set x-axis to show only whole years
|
| 528 |
+
years = analyte_data["Activity_Start_Date_Time"].dt.year.unique()
|
| 529 |
+
ax.set_xticks(years)
|
| 530 |
+
ax.set_xticklabels(years.astype(int))
|
| 531 |
+
|
| 532 |
+
# Customize subplot with lighter titles and no x-label
|
| 533 |
+
ax.set_title(analyte_name, pad=10, fontsize=11, fontweight="normal")
|
| 534 |
+
ax.set_xlabel("")
|
| 535 |
+
|
| 536 |
+
if not analyte_data.empty:
|
| 537 |
+
analyte_unit = analyte_data["Org_Result_Unit"].iloc[0]
|
| 538 |
+
ax.set_ylabel(f"({analyte_unit})", fontsize=10)
|
| 539 |
+
|
| 540 |
+
# Improve grid appearance
|
| 541 |
+
ax.grid(True, alpha=0.2, linestyle="--")
|
| 542 |
+
ax.spines["top"].set_visible(False)
|
| 543 |
+
ax.spines["right"].set_visible(False)
|
| 544 |
+
|
| 545 |
+
# Simplified legend appearance (removed 3D effects)
|
| 546 |
+
ax.legend(
|
| 547 |
+
bbox_to_anchor=(1.05, 1),
|
| 548 |
+
loc="upper left",
|
| 549 |
+
borderaxespad=0.0,
|
| 550 |
+
frameon=True,
|
| 551 |
+
fancybox=False,
|
| 552 |
+
shadow=False,
|
| 553 |
+
fontsize=9,
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
if analyte_name in [
|
| 557 |
+
"Turbidity",
|
| 558 |
+
"Fecal Coliform (MPN)",
|
| 559 |
+
"Total Nitrogen",
|
| 560 |
+
"Total Phosphorus",
|
| 561 |
+
]:
|
| 562 |
+
ax.set_yscale("log")
|
| 563 |
+
|
| 564 |
+
# Adjust layout with more vertical space between subplots
|
| 565 |
+
plt.tight_layout(rect=(0, 0, 0.85, 1), h_pad=2.0)
|
| 566 |
+
return fig
|
| 567 |
+
|
| 568 |
+
|
| 569 |
+
def plot_parameter_correlations(
|
| 570 |
+
df: pd.DataFrame,
|
| 571 |
+
analyte_names: list[str],
|
| 572 |
+
subset_by: str,
|
| 573 |
+
subset: str,
|
| 574 |
+
filter_by: str,
|
| 575 |
+
threshold: float = 0.2,
|
| 576 |
+
) -> Figure:
|
| 577 |
+
pivot_df = df[df["Org_Analyte_Name"].isin(analyte_names)].pivot_table(
|
| 578 |
+
index="Activity_Start_Date_Time",
|
| 579 |
+
columns="Org_Analyte_Name",
|
| 580 |
+
values="Org_Result_Value",
|
| 581 |
+
observed=False,
|
| 582 |
+
)
|
| 583 |
+
|
| 584 |
+
# Clean up column names
|
| 585 |
+
pivot_df = pivot_df.rename(
|
| 586 |
+
columns={
|
| 587 |
+
"Depth, Secchi Disk Depth": "Secchi Depth",
|
| 588 |
+
"Dissolved Oxygen": "DO",
|
| 589 |
+
"Fecal Coliform (MPN)": "Fecal Coliform",
|
| 590 |
+
"Total Nitrogen": "TN",
|
| 591 |
+
"Total Phosphorus": "TP",
|
| 592 |
+
}
|
| 593 |
+
)
|
| 594 |
+
|
| 595 |
+
# Calculate data completeness for each parameter
|
| 596 |
+
completeness = pivot_df.notna().mean()
|
| 597 |
+
valid_params = completeness[completeness >= threshold].index
|
| 598 |
+
excluded_params = completeness[completeness < threshold]
|
| 599 |
+
|
| 600 |
+
# Filter pivot_df to only include parameters meeting the threshold
|
| 601 |
+
pivot_df = pivot_df[valid_params]
|
| 602 |
+
|
| 603 |
+
# Calculate correlation matrix
|
| 604 |
+
corr = pivot_df.corr()
|
| 605 |
+
|
| 606 |
+
# Calculate sample size
|
| 607 |
+
n_samples = len(df)
|
| 608 |
+
|
| 609 |
+
# Create figure with more explicit spacing at the top
|
| 610 |
+
fig = plt.figure(figsize=(6, 7))
|
| 611 |
+
|
| 612 |
+
# Adjust gridspec ratios and spacing - modified to leave more room at top
|
| 613 |
+
gs = fig.add_gridspec(
|
| 614 |
+
3,
|
| 615 |
+
1,
|
| 616 |
+
height_ratios=[
|
| 617 |
+
1, # Title space
|
| 618 |
+
4, # Heatmap
|
| 619 |
+
1.5, # Footnote
|
| 620 |
+
],
|
| 621 |
+
hspace=0.4,
|
| 622 |
+
)
|
| 623 |
+
|
| 624 |
+
# Add title axes, heatmap axes, and footnote axes
|
| 625 |
+
title_ax = fig.add_subplot(gs[0])
|
| 626 |
+
heatmap_ax = fig.add_subplot(gs[1])
|
| 627 |
+
footnote_ax = fig.add_subplot(gs[2])
|
| 628 |
+
|
| 629 |
+
# Create heatmap
|
| 630 |
+
mask = np.triu(np.ones_like(corr, dtype=bool))
|
| 631 |
+
heatmap = sns.heatmap(
|
| 632 |
+
corr,
|
| 633 |
+
mask=mask,
|
| 634 |
+
annot=True,
|
| 635 |
+
cmap="RdBu_r",
|
| 636 |
+
center=0,
|
| 637 |
+
vmin=-1,
|
| 638 |
+
vmax=1,
|
| 639 |
+
ax=heatmap_ax,
|
| 640 |
+
yticklabels=1,
|
| 641 |
+
cbar=True,
|
| 642 |
+
xticklabels=1,
|
| 643 |
+
)
|
| 644 |
+
|
| 645 |
+
# Rotate x-axis labels and adjust their position
|
| 646 |
+
heatmap_ax.set_xticklabels(
|
| 647 |
+
heatmap_ax.get_xticklabels(), rotation=45, ha="right", rotation_mode="anchor"
|
| 648 |
+
)
|
| 649 |
+
|
| 650 |
+
# Move bottom axis labels down
|
| 651 |
+
heatmap_ax.tick_params(axis="x", pad=10)
|
| 652 |
+
|
| 653 |
+
# Fix the colorbar ticks warning by setting ticks first
|
| 654 |
+
colorbar = heatmap.figure.axes[-1] # type: ignore
|
| 655 |
+
ticks = colorbar.get_yticks()
|
| 656 |
+
colorbar.set_yticks(ticks)
|
| 657 |
+
tick_labels = [f"{x:>8.2f}" for x in ticks]
|
| 658 |
+
colorbar.set_yticklabels(tick_labels)
|
| 659 |
+
|
| 660 |
+
# Rotate y-axis labels to horizontal
|
| 661 |
+
heatmap_ax.set_yticklabels(heatmap_ax.get_yticklabels(), rotation=0)
|
| 662 |
+
|
| 663 |
+
# Remove axis labels
|
| 664 |
+
heatmap_ax.set_xlabel("")
|
| 665 |
+
heatmap_ax.set_ylabel("")
|
| 666 |
+
|
| 667 |
+
# Configure footnote axis
|
| 668 |
+
footnote_ax.set_frame_on(False) # Hide the frame
|
| 669 |
+
footnote_ax.set_xticks([]) # Remove x-ticks
|
| 670 |
+
footnote_ax.set_yticks([]) # Remove y-ticks
|
| 671 |
+
|
| 672 |
+
# Add footnote with adjusted position
|
| 673 |
+
if not excluded_params.empty:
|
| 674 |
+
footnote_text = "Excluded parameters (<{:.0%} data completeness):\n".format(
|
| 675 |
+
threshold
|
| 676 |
+
)
|
| 677 |
+
for param, completeness_val in excluded_params.items():
|
| 678 |
+
footnote_text += f" - {param}: {completeness_val:.1%} complete\n"
|
| 679 |
+
|
| 680 |
+
footnote_ax.text(
|
| 681 |
+
0.01,
|
| 682 |
+
0.40,
|
| 683 |
+
footnote_text.rstrip(),
|
| 684 |
+
ha="left",
|
| 685 |
+
va="center",
|
| 686 |
+
fontsize=9,
|
| 687 |
+
fontstyle="italic",
|
| 688 |
+
transform=footnote_ax.transAxes,
|
| 689 |
+
)
|
| 690 |
+
|
| 691 |
+
# Do the same for title axis
|
| 692 |
+
title_ax.set_frame_on(False)
|
| 693 |
+
title_ax.set_xticks([])
|
| 694 |
+
title_ax.set_yticks([])
|
| 695 |
+
|
| 696 |
+
# Modify the filter_by text for display
|
| 697 |
+
display_filter = "Surface and Bottom" if filter_by == "All" else filter_by
|
| 698 |
+
|
| 699 |
+
# Add year information to the subtitle
|
| 700 |
+
year_info = df["Year"].iloc[0] if len(df["Year"].unique()) == 1 else "All Years"
|
| 701 |
+
|
| 702 |
+
# Add titles - using figure coordinates with adjusted positions
|
| 703 |
+
title_ax.text(
|
| 704 |
+
0.45,
|
| 705 |
+
0.8, # Moved higher in figure coordinates
|
| 706 |
+
f"{subset_by}: {subset}",
|
| 707 |
+
ha="center",
|
| 708 |
+
va="center",
|
| 709 |
+
fontsize=12,
|
| 710 |
+
fontweight="bold",
|
| 711 |
+
transform=fig.transFigure,
|
| 712 |
+
)
|
| 713 |
+
title_ax.text(
|
| 714 |
+
0.45,
|
| 715 |
+
0.75, # Moved higher in figure coordinates
|
| 716 |
+
f"{display_filter}, {year_info} (n={n_samples:,})",
|
| 717 |
+
ha="center",
|
| 718 |
+
va="bottom",
|
| 719 |
+
fontsize=10,
|
| 720 |
+
fontstyle="italic",
|
| 721 |
+
transform=fig.transFigure,
|
| 722 |
+
)
|
| 723 |
+
|
| 724 |
+
# Replace tight_layout with more explicit spacing control
|
| 725 |
+
# First, calculate the figure bounds
|
| 726 |
+
fig.canvas.draw()
|
| 727 |
+
|
| 728 |
+
# Get the tight_bbox
|
| 729 |
+
renderer = fig.canvas.get_renderer() # type: ignore
|
| 730 |
+
fig.get_tightbbox(renderer)
|
| 731 |
+
|
| 732 |
+
# Adjust the subplot positions manually
|
| 733 |
+
fig.subplots_adjust(left=0.1, right=0.95, bottom=0.02, top=0.85, hspace=0.4)
|
| 734 |
+
|
| 735 |
+
return fig
|
| 736 |
+
|
| 737 |
+
|
| 738 |
+
def plot_np_ratios(df: pd.DataFrame) -> Figure:
|
| 739 |
+
# Create dataframe with N, P, and Sector information
|
| 740 |
+
nutrients_df = (
|
| 741 |
+
df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])]
|
| 742 |
+
.pivot_table(
|
| 743 |
+
index=["Activity_Start_Date_Time", "Sector"],
|
| 744 |
+
columns="Org_Analyte_Name",
|
| 745 |
+
values="Org_Result_Value",
|
| 746 |
+
observed=True,
|
| 747 |
+
)
|
| 748 |
+
.reset_index()
|
| 749 |
+
)
|
| 750 |
+
|
| 751 |
+
# Calculate N:P ratio
|
| 752 |
+
nutrients_df["N:P Ratio"] = (
|
| 753 |
+
nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"]
|
| 754 |
+
)
|
| 755 |
+
|
| 756 |
+
# Create figure with two subplots
|
| 757 |
+
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
|
| 758 |
+
|
| 759 |
+
# Time series plot with colors by sector
|
| 760 |
+
sns.scatterplot(
|
| 761 |
+
data=nutrients_df,
|
| 762 |
+
x="Activity_Start_Date_Time",
|
| 763 |
+
y="N:P Ratio",
|
| 764 |
+
hue="Sector",
|
| 765 |
+
ax=ax1,
|
| 766 |
+
alpha=0.6,
|
| 767 |
+
)
|
| 768 |
+
ax1.axhline(y=16, color="r", linestyle="--", label="Redfield Ratio (16:1)")
|
| 769 |
+
ax1.set_ylabel("N:P Ratio")
|
| 770 |
+
ax1.set_xlabel("Date")
|
| 771 |
+
ax1.set_title("N:P Ratio Over Time")
|
| 772 |
+
|
| 773 |
+
# Adjust legend position
|
| 774 |
+
ax1.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
|
| 775 |
+
|
| 776 |
+
# Histogram plot
|
| 777 |
+
sns.histplot(x=nutrients_df["N:P Ratio"].dropna(), ax=ax2)
|
| 778 |
+
ax2.axvline(x=16, color="r", linestyle="--", label="Redfield Ratio (16:1)")
|
| 779 |
+
ax2.set_xlabel("N:P Ratio")
|
| 780 |
+
ax2.set_title("Distribution of N:P Ratios")
|
| 781 |
+
ax2.legend()
|
| 782 |
+
|
| 783 |
+
# Adjust layout to accommodate legend
|
| 784 |
+
plt.tight_layout(rect=(0, 0, 0.9, 1))
|
| 785 |
+
return fig
|
| 786 |
+
|
| 787 |
+
|
| 788 |
+
def altair_plot_np_ratios(df: pd.DataFrame) -> alt.VConcatChart:
|
| 789 |
+
# Create dataframe with N, P, and Sector information
|
| 790 |
+
nutrients_df = (
|
| 791 |
+
df[df["Org_Analyte_Name"].isin(["Total Nitrogen", "Total Phosphorus"])]
|
| 792 |
+
.pivot_table(
|
| 793 |
+
index=["Activity_Start_Date_Time", "Sector"],
|
| 794 |
+
columns="Org_Analyte_Name",
|
| 795 |
+
values="Org_Result_Value",
|
| 796 |
+
observed=True,
|
| 797 |
+
)
|
| 798 |
+
.reset_index()
|
| 799 |
+
)
|
| 800 |
+
|
| 801 |
+
# Calculate N:P ratio
|
| 802 |
+
nutrients_df["N:P Ratio"] = (
|
| 803 |
+
nutrients_df["Total Nitrogen"] / nutrients_df["Total Phosphorus"]
|
| 804 |
+
)
|
| 805 |
+
|
| 806 |
+
# Time series plot with colors by sector
|
| 807 |
+
time_series = (
|
| 808 |
+
alt.Chart(nutrients_df)
|
| 809 |
+
.mark_circle(size=60)
|
| 810 |
+
.encode(
|
| 811 |
+
x=alt.X(
|
| 812 |
+
"Activity_Start_Date_Time:T",
|
| 813 |
+
axis=alt.Axis(format="%Y", tickCount="year"),
|
| 814 |
+
title="Date",
|
| 815 |
+
),
|
| 816 |
+
y=alt.Y(r"N\:P Ratio:Q", title="N:P Ratio"),
|
| 817 |
+
color="Sector:N",
|
| 818 |
+
tooltip=[
|
| 819 |
+
alt.Tooltip("Activity_Start_Date_Time:T", title="Date"),
|
| 820 |
+
alt.Tooltip(r"N\:P Ratio:Q", format=".0f", title="N:P Ratio"),
|
| 821 |
+
alt.Tooltip("Sector:N", title="Sector"),
|
| 822 |
+
],
|
| 823 |
+
)
|
| 824 |
+
.properties(title="N:P Ratio Over Time", width=600, height=300)
|
| 825 |
+
.interactive()
|
| 826 |
+
)
|
| 827 |
+
|
| 828 |
+
# Add Redfield Ratio line
|
| 829 |
+
redfield_line = (
|
| 830 |
+
alt.Chart(pd.DataFrame({"y": [16]})).mark_rule(color="red").encode(y="y:Q")
|
| 831 |
+
)
|
| 832 |
+
|
| 833 |
+
# Histogram plot
|
| 834 |
+
histogram = (
|
| 835 |
+
alt.Chart(nutrients_df)
|
| 836 |
+
.mark_bar()
|
| 837 |
+
.encode(
|
| 838 |
+
x=alt.X(r"N\:P Ratio:Q", bin=alt.Bin(maxbins=30), title="N:P Ratio"),
|
| 839 |
+
y="count()",
|
| 840 |
+
tooltip=["count()"],
|
| 841 |
+
)
|
| 842 |
+
.properties(title="Distribution of N:P Ratios", width=600, height=300)
|
| 843 |
+
.interactive()
|
| 844 |
+
)
|
| 845 |
+
|
| 846 |
+
# Add Redfield Ratio line to histogram
|
| 847 |
+
redfield_hist_line = (
|
| 848 |
+
alt.Chart(pd.DataFrame({"x": [16]})).mark_rule(color="red").encode(x="x:Q")
|
| 849 |
+
)
|
| 850 |
+
|
| 851 |
+
# Combine plots
|
| 852 |
+
combined_chart = alt.vconcat(
|
| 853 |
+
time_series + redfield_line, histogram + redfield_hist_line
|
| 854 |
+
).resolve_scale(y="independent")
|
| 855 |
+
|
| 856 |
+
return combined_chart
|
| 857 |
+
|
| 858 |
+
|
| 859 |
+
def plot_calendar_heatmap(
|
| 860 |
+
df: pd.DataFrame, analyte: str, colormap: str | None = None
|
| 861 |
+
) -> Figure:
|
| 862 |
+
data = df[df["Org_Analyte_Name"] == analyte].copy()
|
| 863 |
+
data["Year"] = data["Activity_Start_Date_Time"].dt.year
|
| 864 |
+
data["Month"] = data["Activity_Start_Date_Time"].dt.month
|
| 865 |
+
|
| 866 |
+
pivot_data = data.pivot_table(
|
| 867 |
+
values="Org_Result_Value", index="Year", columns="Month", aggfunc="mean"
|
| 868 |
+
)
|
| 869 |
+
|
| 870 |
+
# Choose appropriate colormap based on analyte type
|
| 871 |
+
if analyte in ["Fecal Coliform (MPN)"]:
|
| 872 |
+
cmap = "viridis" # Blue-green-yellow
|
| 873 |
+
elif analyte in ["Temperature, Water"]:
|
| 874 |
+
cmap = "coolwarm"
|
| 875 |
+
elif analyte in ["Dissolved Oxygen"]:
|
| 876 |
+
cmap = "RdYlBu"
|
| 877 |
+
elif analyte in ["Total Nitrogen", "Total Phosphorus"]:
|
| 878 |
+
cmap = "GnBu" # Green-Blue
|
| 879 |
+
elif analyte in ["Depth, Secchi Disk Depth"]:
|
| 880 |
+
cmap = "Blues_r"
|
| 881 |
+
else:
|
| 882 |
+
cmap = "Blues" # Default blue gradient
|
| 883 |
+
|
| 884 |
+
# If colormap is set, override the analyte-specific default
|
| 885 |
+
if colormap:
|
| 886 |
+
cmap = colormap
|
| 887 |
+
|
| 888 |
+
fig, ax = plt.subplots(figsize=(6, len(pivot_data) * 0.5))
|
| 889 |
+
|
| 890 |
+
# Create heatmap
|
| 891 |
+
sns.heatmap(
|
| 892 |
+
pivot_data,
|
| 893 |
+
cmap=cmap,
|
| 894 |
+
annot=True,
|
| 895 |
+
fmt=".2f",
|
| 896 |
+
cbar_kws={"label": data["Org_Result_Unit"].iloc[0]},
|
| 897 |
+
annot_kws={"size": 6},
|
| 898 |
+
)
|
| 899 |
+
|
| 900 |
+
ax.set_title(f"Monthly Averages Heatmap: {analyte}", fontsize=10, pad=5)
|
| 901 |
+
ax.tick_params(axis="both", which="major", labelsize=7)
|
| 902 |
+
|
| 903 |
+
# Get the colorbar and adjust its label size
|
| 904 |
+
colorbar = ax.collections[0].colorbar
|
| 905 |
+
colorbar.ax.tick_params(labelsize=7) # type: ignore
|
| 906 |
+
colorbar.set_label(data["Org_Result_Unit"].iloc[0], size=7) # type: ignore
|
| 907 |
+
|
| 908 |
+
return fig
|
| 909 |
+
|
| 910 |
+
|
| 911 |
+
def plot_seasonal_salinity(
|
| 912 |
+
salinity_data: pd.DataFrame,
|
| 913 |
+
year: str,
|
| 914 |
+
basemap_provider,
|
| 915 |
+
alpha=0.5,
|
| 916 |
+
shapefile_path="data/SAB/SAB.shp",
|
| 917 |
+
):
|
| 918 |
+
"""
|
| 919 |
+
Create seasonal plots of mean salinity values by WBID with basemap.
|
| 920 |
+
|
| 921 |
+
Args:
|
| 922 |
+
salinity_data: DataFrame containing salinity measurements
|
| 923 |
+
year: Year to filter data for (str)
|
| 924 |
+
"""
|
| 925 |
+
# Read and filter WBIDs
|
| 926 |
+
wbids = gpd.read_file(shapefile_path)
|
| 927 |
+
relevant_wbids = salinity_data["WBID"].unique()
|
| 928 |
+
wbids = wbids[wbids["WBID"].isin(relevant_wbids)]
|
| 929 |
+
wbids = wbids.to_crs(epsg=3857)
|
| 930 |
+
|
| 931 |
+
# Process data - create a copy to avoid SettingWithCopyWarning
|
| 932 |
+
year_data = salinity_data[
|
| 933 |
+
salinity_data["Activity_Start_Date_Time"].dt.year == int(year)
|
| 934 |
+
].copy()
|
| 935 |
+
|
| 936 |
+
# Add season column using loc
|
| 937 |
+
year_data.loc[:, "season"] = pd.cut(
|
| 938 |
+
year_data["Activity_Start_Date_Time"].dt.month,
|
| 939 |
+
bins=[0, 3, 6, 9, 12],
|
| 940 |
+
labels=["Winter", "Spring", "Summer", "Fall"],
|
| 941 |
+
)
|
| 942 |
+
|
| 943 |
+
# Calculate seasonal means with observed=True
|
| 944 |
+
seasonal_means = (
|
| 945 |
+
year_data.groupby(["WBID", "season"], observed=True)["Salinity"]
|
| 946 |
+
.mean()
|
| 947 |
+
.reset_index()
|
| 948 |
+
)
|
| 949 |
+
|
| 950 |
+
fig = plt.figure(figsize=(20, 14))
|
| 951 |
+
|
| 952 |
+
# Create custom colormap with focused range
|
| 953 |
+
colors = ["#08519c", "#73a9cf", "#fee090", "#fc8d59", "#d73027"]
|
| 954 |
+
cmap = LinearSegmentedColormap.from_list("custom", colors, N=100)
|
| 955 |
+
|
| 956 |
+
# Get global min/max for consistent colormap
|
| 957 |
+
vmin = seasonal_means["Salinity"].min()
|
| 958 |
+
vmax = 40
|
| 959 |
+
|
| 960 |
+
# Calculate map extent
|
| 961 |
+
bounds = wbids.total_bounds
|
| 962 |
+
x_buffer = (bounds[2] - bounds[0]) * 0.05
|
| 963 |
+
y_buffer = (bounds[3] - bounds[1]) * 0.05
|
| 964 |
+
extent = [
|
| 965 |
+
bounds[0] - x_buffer,
|
| 966 |
+
bounds[2] + x_buffer,
|
| 967 |
+
bounds[1] - y_buffer,
|
| 968 |
+
bounds[3] + y_buffer,
|
| 969 |
+
]
|
| 970 |
+
|
| 971 |
+
# Create subplots with tighter spacing
|
| 972 |
+
gs = fig.add_gridspec(
|
| 973 |
+
2,
|
| 974 |
+
2,
|
| 975 |
+
width_ratios=[1, 1],
|
| 976 |
+
wspace=0.05, # Minimal horizontal space between plots
|
| 977 |
+
hspace=-0.15, # More negative value to further reduce vertical space
|
| 978 |
+
left=0.02, # Left margin
|
| 979 |
+
right=0.98, # Right margin
|
| 980 |
+
top=0.95, # Slightly reduced top margin to give more space
|
| 981 |
+
bottom=0.05, # Slightly increased bottom margin to give more space
|
| 982 |
+
)
|
| 983 |
+
|
| 984 |
+
for idx, season in enumerate(["Winter", "Spring", "Summer", "Fall"]):
|
| 985 |
+
ax = fig.add_subplot(gs[idx // 2, idx % 2])
|
| 986 |
+
|
| 987 |
+
season_data = seasonal_means[seasonal_means["season"] == season]
|
| 988 |
+
merged = wbids.merge(season_data, on="WBID", how="left")
|
| 989 |
+
|
| 990 |
+
# Plot WBIDs
|
| 991 |
+
merged.plot(
|
| 992 |
+
column="Salinity",
|
| 993 |
+
ax=ax,
|
| 994 |
+
cmap=cmap,
|
| 995 |
+
vmin=vmin,
|
| 996 |
+
vmax=vmax,
|
| 997 |
+
alpha=0.7,
|
| 998 |
+
missing_kwds={"color": "lightgrey", "alpha": 0.5},
|
| 999 |
+
)
|
| 1000 |
+
|
| 1001 |
+
ctx.add_basemap(ax, source=basemap_provider, zoom=11, alpha=alpha) # type: ignore
|
| 1002 |
+
|
| 1003 |
+
ax.set_xlim(extent[0], extent[1])
|
| 1004 |
+
ax.set_ylim(extent[2], extent[3])
|
| 1005 |
+
# Adjust title position
|
| 1006 |
+
if idx < 2: # Top row
|
| 1007 |
+
ax.set_title(
|
| 1008 |
+
f"{season} {year} Mean Salinity", pad=15
|
| 1009 |
+
) # More padding for top row
|
| 1010 |
+
else: # Bottom row
|
| 1011 |
+
ax.set_title(
|
| 1012 |
+
f"{season} {year} Mean Salinity", pad=5
|
| 1013 |
+
) # Less padding for bottom row
|
| 1014 |
+
ax.set_axis_off()
|
| 1015 |
+
|
| 1016 |
+
# Add colorbar
|
| 1017 |
+
norm = plt.Normalize(vmin=vmin, vmax=vmax) # type: ignore
|
| 1018 |
+
sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
|
| 1019 |
+
sm.set_array([])
|
| 1020 |
+
fig.colorbar(
|
| 1021 |
+
sm,
|
| 1022 |
+
ax=fig.axes,
|
| 1023 |
+
orientation="vertical",
|
| 1024 |
+
label="Salinity (ppt)",
|
| 1025 |
+
pad=0.01,
|
| 1026 |
+
fraction=0.015,
|
| 1027 |
+
ticks=np.arange(0, 45, 5), # Add ticks every 5 units
|
| 1028 |
+
)
|
| 1029 |
+
|
| 1030 |
+
return fig
|
| 1031 |
+
|
| 1032 |
+
|
| 1033 |
+
def plot_seasonal_salinity_for_bays(
|
| 1034 |
+
salinity_data: pd.DataFrame,
|
| 1035 |
+
year: str,
|
| 1036 |
+
basemap_provider=ctx.providers.USGS.USTopo, # type: ignore
|
| 1037 |
+
alpha=0.5,
|
| 1038 |
+
shapefile_path="data/SAB/SAB.shp",
|
| 1039 |
+
):
|
| 1040 |
+
"""
|
| 1041 |
+
Create seasonal plots of mean salinity values by WBID for N, E, W, SAB, GL and Lake Powell.
|
| 1042 |
+
"""
|
| 1043 |
+
fig = plot_seasonal_salinity(
|
| 1044 |
+
salinity_data.query(
|
| 1045 |
+
"WBID.isin(['1061A', '1061B', '1061C', '1061D', '1061E', '1061F', '1061G', '1061H', '1055A'])"
|
| 1046 |
+
),
|
| 1047 |
+
year=year,
|
| 1048 |
+
basemap_provider=basemap_provider,
|
| 1049 |
+
alpha=alpha,
|
| 1050 |
+
shapefile_path=shapefile_path,
|
| 1051 |
+
)
|
| 1052 |
+
return fig
|
| 1053 |
+
|
| 1054 |
+
|
| 1055 |
+
def plot_do_temp_relationship(df: pd.DataFrame) -> Figure:
|
| 1056 |
+
"""
|
| 1057 |
+
Create a scatter plot of DO vs temperature with regression line using seaborn.
|
| 1058 |
+
|
| 1059 |
+
Parameters:
|
| 1060 |
+
-----------
|
| 1061 |
+
df : pd.DataFrame
|
| 1062 |
+
Input dataframe containing DO and temperature measurements
|
| 1063 |
+
|
| 1064 |
+
Returns:
|
| 1065 |
+
--------
|
| 1066 |
+
Figure
|
| 1067 |
+
Matplotlib figure containing the plot
|
| 1068 |
+
"""
|
| 1069 |
+
do_temp_data = (
|
| 1070 |
+
df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])]
|
| 1071 |
+
.pivot_table(
|
| 1072 |
+
index=["Activity_Start_Date_Time", "Station_Number", "Sample_Position"],
|
| 1073 |
+
columns="Org_Analyte_Name",
|
| 1074 |
+
values="Org_Result_Value",
|
| 1075 |
+
observed=False,
|
| 1076 |
+
)
|
| 1077 |
+
.reset_index()
|
| 1078 |
+
.dropna(subset=["Dissolved Oxygen", "Temperature, Water"])
|
| 1079 |
+
)
|
| 1080 |
+
|
| 1081 |
+
sns.set_palette("muted")
|
| 1082 |
+
|
| 1083 |
+
# Create plot with regression line
|
| 1084 |
+
g = sns.lmplot(
|
| 1085 |
+
data=do_temp_data,
|
| 1086 |
+
x="Temperature, Water",
|
| 1087 |
+
y="Dissolved Oxygen",
|
| 1088 |
+
hue="Sample_Position",
|
| 1089 |
+
hue_order=["Surface", "Bottom"],
|
| 1090 |
+
scatter_kws={"alpha": 0.6},
|
| 1091 |
+
height=8,
|
| 1092 |
+
aspect=1.5,
|
| 1093 |
+
legend=False,
|
| 1094 |
+
)
|
| 1095 |
+
|
| 1096 |
+
# Add DO threshold and customize plot
|
| 1097 |
+
ax = g.axes[0, 0]
|
| 1098 |
+
ax.axhline(y=5, color="red", linestyle=":", alpha=0.5)
|
| 1099 |
+
ax.text(
|
| 1100 |
+
ax.get_xlim()[0],
|
| 1101 |
+
5.1,
|
| 1102 |
+
" 5 mg/L DO threshold",
|
| 1103 |
+
ha="left",
|
| 1104 |
+
va="bottom",
|
| 1105 |
+
color="red",
|
| 1106 |
+
alpha=0.5,
|
| 1107 |
+
)
|
| 1108 |
+
|
| 1109 |
+
g.set_axis_labels("Water Temperature (°C)", "Dissolved Oxygen (mg/L)")
|
| 1110 |
+
ax.set_title("Dissolved Oxygen vs Water Temperature", pad=20, fontsize=16)
|
| 1111 |
+
ax.legend(title="Sample Position", bbox_to_anchor=(1.05, 1), loc="upper left")
|
| 1112 |
+
# Add grid
|
| 1113 |
+
ax.grid(True, alpha=0.3)
|
| 1114 |
+
|
| 1115 |
+
return g.figure
|
| 1116 |
+
|
| 1117 |
+
|
| 1118 |
+
def altair_plot_do_temp_relationship(df: pd.DataFrame) -> alt.LayerChart:
|
| 1119 |
+
"""
|
| 1120 |
+
Create an interactive scatter plot of DO vs temperature with regression lines using Altair.
|
| 1121 |
+
Matches the style and features of the original matplotlib/seaborn plot.
|
| 1122 |
+
|
| 1123 |
+
Parameters:
|
| 1124 |
+
-----------
|
| 1125 |
+
df : pd.DataFrame
|
| 1126 |
+
Input dataframe containing DO and temperature measurements
|
| 1127 |
+
|
| 1128 |
+
Returns:
|
| 1129 |
+
--------
|
| 1130 |
+
alt.Chart
|
| 1131 |
+
Altair chart object
|
| 1132 |
+
"""
|
| 1133 |
+
# Prepare the data similarly to the original function
|
| 1134 |
+
do_temp_data = (
|
| 1135 |
+
df[df["Org_Analyte_Name"].isin(["Dissolved Oxygen", "Temperature, Water"])]
|
| 1136 |
+
.pivot_table(
|
| 1137 |
+
index=[
|
| 1138 |
+
"Activity_Start_Date_Time",
|
| 1139 |
+
"Station_Number",
|
| 1140 |
+
"Sample_Position",
|
| 1141 |
+
"Sector",
|
| 1142 |
+
],
|
| 1143 |
+
columns="Org_Analyte_Name",
|
| 1144 |
+
values="Org_Result_Value",
|
| 1145 |
+
observed=False,
|
| 1146 |
+
)
|
| 1147 |
+
.reset_index()
|
| 1148 |
+
.dropna(subset=["Dissolved Oxygen", "Temperature, Water"])
|
| 1149 |
+
)
|
| 1150 |
+
|
| 1151 |
+
# Create the base scatter plot
|
| 1152 |
+
scatter = (
|
| 1153 |
+
alt.Chart(do_temp_data)
|
| 1154 |
+
.mark_circle(size=60, opacity=0.6)
|
| 1155 |
+
.encode(
|
| 1156 |
+
x=alt.X(
|
| 1157 |
+
"Temperature, Water:Q",
|
| 1158 |
+
title="Water Temperature (°C)",
|
| 1159 |
+
scale=alt.Scale(zero=False),
|
| 1160 |
+
),
|
| 1161 |
+
y=alt.Y(
|
| 1162 |
+
"Dissolved Oxygen:Q",
|
| 1163 |
+
title="Dissolved Oxygen (mg/L)",
|
| 1164 |
+
scale=alt.Scale(zero=False),
|
| 1165 |
+
),
|
| 1166 |
+
color=alt.Color(
|
| 1167 |
+
"Sample_Position:N",
|
| 1168 |
+
scale=alt.Scale(
|
| 1169 |
+
domain=["Surface", "Bottom"],
|
| 1170 |
+
range=["#8da0cb", "#fc8d62"], # Muted blue and orange
|
| 1171 |
+
),
|
| 1172 |
+
legend=alt.Legend(title="Sample Position"),
|
| 1173 |
+
),
|
| 1174 |
+
tooltip=[
|
| 1175 |
+
alt.Tooltip("Temperature, Water:Q", title="Temperature", format=".1f"),
|
| 1176 |
+
alt.Tooltip("Dissolved Oxygen:Q", title="DO", format=".1f"),
|
| 1177 |
+
alt.Tooltip("Sample_Position:N", title="Position"),
|
| 1178 |
+
alt.Tooltip("Sector:N", title="Sector"),
|
| 1179 |
+
alt.Tooltip("Station_Number:N", title="Station"),
|
| 1180 |
+
],
|
| 1181 |
+
)
|
| 1182 |
+
)
|
| 1183 |
+
|
| 1184 |
+
# Add regression lines for each Sample_Position
|
| 1185 |
+
regression = (
|
| 1186 |
+
scatter.transform_regression(
|
| 1187 |
+
"Temperature, Water", "Dissolved Oxygen", groupby=["Sample_Position"]
|
| 1188 |
+
)
|
| 1189 |
+
.mark_line(size=2)
|
| 1190 |
+
.encode(
|
| 1191 |
+
color=alt.Color(
|
| 1192 |
+
"Sample_Position:N",
|
| 1193 |
+
scale=alt.Scale(
|
| 1194 |
+
domain=["Surface", "Bottom"], range=["#8da0cb", "#fc8d62"]
|
| 1195 |
+
),
|
| 1196 |
+
)
|
| 1197 |
+
)
|
| 1198 |
+
)
|
| 1199 |
+
|
| 1200 |
+
# Create DO threshold line
|
| 1201 |
+
threshold_df = pd.DataFrame({"y": [5]})
|
| 1202 |
+
threshold_line = (
|
| 1203 |
+
alt.Chart(threshold_df)
|
| 1204 |
+
.mark_rule(strokeDash=[4, 4], color="red", opacity=0.5)
|
| 1205 |
+
.encode(y="y:Q")
|
| 1206 |
+
)
|
| 1207 |
+
|
| 1208 |
+
# Add threshold label
|
| 1209 |
+
threshold_label = (
|
| 1210 |
+
alt.Chart(
|
| 1211 |
+
pd.DataFrame({"x": [do_temp_data["Temperature, Water"].min()], "y": [5.1]})
|
| 1212 |
+
)
|
| 1213 |
+
.mark_text(
|
| 1214 |
+
align="left",
|
| 1215 |
+
baseline="bottom",
|
| 1216 |
+
color="red",
|
| 1217 |
+
opacity=0.5,
|
| 1218 |
+
text=" 5 mg/L DO threshold",
|
| 1219 |
+
)
|
| 1220 |
+
.encode(x="x:Q", y="y:Q")
|
| 1221 |
+
)
|
| 1222 |
+
|
| 1223 |
+
# Combine all layers and configure
|
| 1224 |
+
final_chart = (
|
| 1225 |
+
alt.layer(scatter, regression, threshold_line, threshold_label)
|
| 1226 |
+
.properties(
|
| 1227 |
+
width=800,
|
| 1228 |
+
height=750,
|
| 1229 |
+
)
|
| 1230 |
+
.configure_axis(grid=True, gridOpacity=0.3)
|
| 1231 |
+
.interactive()
|
| 1232 |
+
)
|
| 1233 |
+
|
| 1234 |
+
return final_chart
|
app.py
ADDED
|
@@ -0,0 +1,1070 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import io
|
| 2 |
+
import json
|
| 3 |
+
import os
|
| 4 |
+
import textwrap
|
| 5 |
+
import time
|
| 6 |
+
import uuid
|
| 7 |
+
from datetime import datetime
|
| 8 |
+
from functools import wraps
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import pandas as pd
|
| 12 |
+
import plotly.express as px
|
| 13 |
+
import streamlit as st
|
| 14 |
+
from great_tables import GT, html
|
| 15 |
+
from matplotlib import pyplot as plt
|
| 16 |
+
|
| 17 |
+
from analysis import (
|
| 18 |
+
altair_plot_do_temp_relationship,
|
| 19 |
+
altair_plot_np_ratios,
|
| 20 |
+
altair_plot_sector_trends,
|
| 21 |
+
plot_analyte_trends,
|
| 22 |
+
plot_calendar_heatmap,
|
| 23 |
+
plot_do_temp_relationship,
|
| 24 |
+
plot_np_ratios,
|
| 25 |
+
plot_parameter_correlations,
|
| 26 |
+
plot_seasonal_salinity_for_bays,
|
| 27 |
+
plot_sector_trends,
|
| 28 |
+
)
|
| 29 |
+
from main import (
|
| 30 |
+
create_multiindex_columns,
|
| 31 |
+
create_overall_summary,
|
| 32 |
+
create_summary_by_station_and_position,
|
| 33 |
+
get_analyte_data_with_lat_long,
|
| 34 |
+
get_raw_data,
|
| 35 |
+
get_stations_data,
|
| 36 |
+
)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def log_visit():
|
| 40 |
+
"""Log visitor analytics including timestamp, user agent, and page info"""
|
| 41 |
+
if st.session_state.get("admin_authenticated", False):
|
| 42 |
+
return
|
| 43 |
+
log_file = Path("analytics.json")
|
| 44 |
+
now = datetime.now()
|
| 45 |
+
today = now.strftime("%Y-%m-%d")
|
| 46 |
+
|
| 47 |
+
if "visitor_id" not in st.session_state:
|
| 48 |
+
st.session_state.visitor_id = str(uuid.uuid4())
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
user_agent = st.context.headers.get("User-Agent", "Unknown")
|
| 52 |
+
except Exception:
|
| 53 |
+
user_agent = "Unknown"
|
| 54 |
+
|
| 55 |
+
visit_type = (
|
| 56 |
+
"initial" if not st.session_state.get("logged_visit") else "section_change"
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
+
visit_data = {
|
| 60 |
+
"timestamp": now.isoformat(),
|
| 61 |
+
"date": today,
|
| 62 |
+
"user_agent": user_agent,
|
| 63 |
+
"visitor_id": st.session_state.visitor_id,
|
| 64 |
+
"page_section": st.session_state.get("current_section", "Overall Summary"),
|
| 65 |
+
"visit_type": visit_type,
|
| 66 |
+
"query_params": dict(st.query_params),
|
| 67 |
+
}
|
| 68 |
+
|
| 69 |
+
if log_file.exists():
|
| 70 |
+
with open(log_file, "r") as f:
|
| 71 |
+
data = json.load(f)
|
| 72 |
+
if "visits" not in data:
|
| 73 |
+
data["visits"] = []
|
| 74 |
+
if "daily_counts" not in data:
|
| 75 |
+
data["daily_counts"] = {}
|
| 76 |
+
if "section_counts" not in data:
|
| 77 |
+
data["section_counts"] = {}
|
| 78 |
+
if "daily_visitors" not in data:
|
| 79 |
+
data["daily_visitors"] = {}
|
| 80 |
+
else:
|
| 81 |
+
data = {
|
| 82 |
+
"visits": [],
|
| 83 |
+
"daily_counts": {},
|
| 84 |
+
"section_counts": {},
|
| 85 |
+
"daily_visitors": {},
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
if today not in data["daily_visitors"]:
|
| 89 |
+
data["daily_visitors"][today] = []
|
| 90 |
+
if st.session_state.visitor_id not in data["daily_visitors"][today]:
|
| 91 |
+
data["daily_visitors"][today].append(st.session_state.visitor_id)
|
| 92 |
+
data["daily_counts"][today] = len(data["daily_visitors"][today])
|
| 93 |
+
|
| 94 |
+
data["visits"].append(visit_data)
|
| 95 |
+
current_section = visit_data["page_section"]
|
| 96 |
+
data["section_counts"][current_section] = (
|
| 97 |
+
data["section_counts"].get(current_section, 0) + 1
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
with open(log_file, "w") as f:
|
| 101 |
+
json.dump(data, f, indent=2)
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
if not st.session_state.get("logged_visit"):
|
| 105 |
+
log_visit()
|
| 106 |
+
st.session_state["logged_visit"] = True
|
| 107 |
+
|
| 108 |
+
ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD", "")
|
| 109 |
+
ENABLE_TIMING = False
|
| 110 |
+
ENABLE_ALTAIR = False
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def check_admin_access():
|
| 114 |
+
"""Handle admin authentication logic only"""
|
| 115 |
+
if not ADMIN_PASSWORD:
|
| 116 |
+
return False
|
| 117 |
+
|
| 118 |
+
if "admin_authenticated" not in st.session_state:
|
| 119 |
+
st.session_state.admin_authenticated = False
|
| 120 |
+
|
| 121 |
+
return st.session_state.admin_authenticated
|
| 122 |
+
|
| 123 |
+
|
| 124 |
+
def render_admin_panel():
|
| 125 |
+
"""Handle admin UI elements only"""
|
| 126 |
+
with st.sidebar:
|
| 127 |
+
st.markdown("---")
|
| 128 |
+
with st.expander("🔒 Admin", expanded=False):
|
| 129 |
+
if st.session_state.admin_authenticated:
|
| 130 |
+
if st.button("Logout"):
|
| 131 |
+
st.session_state.admin_authenticated = False
|
| 132 |
+
st.rerun()
|
| 133 |
+
else:
|
| 134 |
+
password_input = st.text_input("Password", type="password")
|
| 135 |
+
if st.button("Login"):
|
| 136 |
+
if password_input == ADMIN_PASSWORD:
|
| 137 |
+
st.session_state.admin_authenticated = True
|
| 138 |
+
st.rerun()
|
| 139 |
+
else:
|
| 140 |
+
st.error("Incorrect password")
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
st.set_page_config(
|
| 144 |
+
page_title="Water Quality Summary",
|
| 145 |
+
page_icon="💧",
|
| 146 |
+
layout="wide",
|
| 147 |
+
initial_sidebar_state="expanded",
|
| 148 |
+
menu_items={"Get Help": None, "Report a bug": None, "About": None},
|
| 149 |
+
)
|
| 150 |
+
|
| 151 |
+
st.sidebar.title("Navigation")
|
| 152 |
+
|
| 153 |
+
sections = [
|
| 154 |
+
"Overall Summary",
|
| 155 |
+
"Summary by Station",
|
| 156 |
+
"Nutrient Ratios",
|
| 157 |
+
"Sector Trends",
|
| 158 |
+
"Trends by Station",
|
| 159 |
+
"Parameter Correlations",
|
| 160 |
+
"DO/Temp Relationship",
|
| 161 |
+
"Calendar Heatmaps",
|
| 162 |
+
"Seasonal Trends",
|
| 163 |
+
"Raw Data",
|
| 164 |
+
]
|
| 165 |
+
|
| 166 |
+
is_admin = check_admin_access()
|
| 167 |
+
if is_admin:
|
| 168 |
+
sections.append("Analytics")
|
| 169 |
+
if is_admin:
|
| 170 |
+
ENABLE_TIMING = st.sidebar.toggle("Enable Timing", value=ENABLE_TIMING)
|
| 171 |
+
|
| 172 |
+
section = st.sidebar.radio(
|
| 173 |
+
"Go to",
|
| 174 |
+
sections,
|
| 175 |
+
)
|
| 176 |
+
if not st.session_state.get("admin_authenticated", False) and (
|
| 177 |
+
"current_section" not in st.session_state
|
| 178 |
+
or st.session_state.current_section != section
|
| 179 |
+
):
|
| 180 |
+
st.session_state.current_section = section
|
| 181 |
+
log_visit()
|
| 182 |
+
|
| 183 |
+
if not st.session_state.get("admin_authenticated", False) and not st.session_state.get(
|
| 184 |
+
"logged_visit"
|
| 185 |
+
):
|
| 186 |
+
log_visit()
|
| 187 |
+
st.session_state["logged_visit"] = True
|
| 188 |
+
|
| 189 |
+
if section == "Overall Summary":
|
| 190 |
+
render_admin_panel()
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def summarize_parameter_value(value: str, max_length: int = 100) -> str:
|
| 194 |
+
"""Summarize parameter values that are too long or complex."""
|
| 195 |
+
if not value:
|
| 196 |
+
return ""
|
| 197 |
+
|
| 198 |
+
# Handle DataFrames
|
| 199 |
+
if "DataFrame" in value and "[" in value and "]" in value:
|
| 200 |
+
try:
|
| 201 |
+
# Extract dimensions if present in string like "DataFrame[1000x20]"
|
| 202 |
+
dims = value[value.find("[") + 1 : value.find("]")]
|
| 203 |
+
return f"DataFrame[{dims}]"
|
| 204 |
+
except Exception:
|
| 205 |
+
return "DataFrame"
|
| 206 |
+
|
| 207 |
+
# Handle lists, tuples, and other sequences
|
| 208 |
+
if value.startswith(("[", "(", "{")):
|
| 209 |
+
try:
|
| 210 |
+
# Count items if it's a sequence
|
| 211 |
+
item_count = value.count(",") + 1
|
| 212 |
+
return f"{value[:20]}... ({item_count} items)"
|
| 213 |
+
except Exception:
|
| 214 |
+
return f"{value[:20]}..."
|
| 215 |
+
|
| 216 |
+
# Handle long strings
|
| 217 |
+
if len(value) > max_length:
|
| 218 |
+
return f"{value[:max_length]}..."
|
| 219 |
+
|
| 220 |
+
return value
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
def timer(include_params=False):
|
| 224 |
+
def decorator(func):
|
| 225 |
+
@wraps(func)
|
| 226 |
+
def wrapper(*args, **kwargs):
|
| 227 |
+
if not ENABLE_TIMING:
|
| 228 |
+
return func(*args, **kwargs)
|
| 229 |
+
|
| 230 |
+
start = time.perf_counter()
|
| 231 |
+
result = func(*args, **kwargs)
|
| 232 |
+
end = time.perf_counter()
|
| 233 |
+
duration = end - start
|
| 234 |
+
|
| 235 |
+
# Initialize timing_stats if it doesn't exist
|
| 236 |
+
if "timing_stats" not in st.session_state:
|
| 237 |
+
st.session_state.timing_stats = {}
|
| 238 |
+
st.session_state.timing_logs = []
|
| 239 |
+
|
| 240 |
+
# Initialize list for this function if it doesn't exist
|
| 241 |
+
if func.__name__ not in st.session_state.timing_stats:
|
| 242 |
+
st.session_state.timing_stats[func.__name__] = []
|
| 243 |
+
|
| 244 |
+
# Append new duration to the list
|
| 245 |
+
st.session_state.timing_stats[func.__name__].append(duration)
|
| 246 |
+
|
| 247 |
+
# Create log entry with optional parameter info
|
| 248 |
+
log_entry = {
|
| 249 |
+
"timestamp": datetime.now().isoformat(),
|
| 250 |
+
"function": func.__name__,
|
| 251 |
+
"duration": duration,
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
if include_params:
|
| 255 |
+
# Get parameter names from function signature
|
| 256 |
+
import inspect
|
| 257 |
+
|
| 258 |
+
sig = inspect.signature(func)
|
| 259 |
+
param_names = list(sig.parameters.keys())
|
| 260 |
+
|
| 261 |
+
# Combine args and kwargs into a parameter dictionary
|
| 262 |
+
param_values = {}
|
| 263 |
+
for i, arg in enumerate(args):
|
| 264 |
+
if i < len(param_names):
|
| 265 |
+
param_values[param_names[i]] = summarize_parameter_value(
|
| 266 |
+
str(arg),
|
| 267 |
+
max_length=40,
|
| 268 |
+
)
|
| 269 |
+
param_values.update(
|
| 270 |
+
{
|
| 271 |
+
k: summarize_parameter_value(str(v), max_length=40)
|
| 272 |
+
for k, v in kwargs.items()
|
| 273 |
+
}
|
| 274 |
+
)
|
| 275 |
+
|
| 276 |
+
log_entry["parameters"] = param_values
|
| 277 |
+
|
| 278 |
+
st.session_state.timing_logs.append(log_entry)
|
| 279 |
+
|
| 280 |
+
return result
|
| 281 |
+
|
| 282 |
+
return wrapper
|
| 283 |
+
|
| 284 |
+
return decorator
|
| 285 |
+
|
| 286 |
+
|
| 287 |
+
@timer(include_params=False)
|
| 288 |
+
def load_raw_data():
|
| 289 |
+
return get_raw_data("data/master_data_file_2019-01-01_-_2024-10-31.parquet")
|
| 290 |
+
|
| 291 |
+
|
| 292 |
+
@timer(include_params=False)
|
| 293 |
+
def create_summaries(raw_df):
|
| 294 |
+
summary_by_station = create_summary_by_station_and_position(raw_df)
|
| 295 |
+
overall_summary = create_overall_summary(raw_df)
|
| 296 |
+
multiindex_df = create_multiindex_columns(summary_by_station)
|
| 297 |
+
return summary_by_station, overall_summary, multiindex_df
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
@timer(include_params=False)
|
| 301 |
+
def prepare_downloads(summary_by_station, multiindex_df, raw_df):
|
| 302 |
+
summary_csv = summary_by_station.reset_index().to_csv(index=False)
|
| 303 |
+
excel_buffer = io.BytesIO()
|
| 304 |
+
with pd.ExcelWriter(excel_buffer, engine="xlsxwriter") as writer:
|
| 305 |
+
multiindex_df.to_excel(writer, sheet_name="Water Quality Summary")
|
| 306 |
+
summary_excel = excel_buffer.getvalue()
|
| 307 |
+
raw_csv = raw_df.to_csv(index=False)
|
| 308 |
+
return {
|
| 309 |
+
"summary": {
|
| 310 |
+
"CSV": (summary_csv, "csv", "text/csv"),
|
| 311 |
+
"Excel": (summary_excel, "xlsx", "application/vnd.ms-excel"),
|
| 312 |
+
},
|
| 313 |
+
"raw": {
|
| 314 |
+
"CSV": (raw_csv, "csv", "text/csv"),
|
| 315 |
+
},
|
| 316 |
+
}
|
| 317 |
+
|
| 318 |
+
|
| 319 |
+
@timer(include_params=False)
|
| 320 |
+
def load_seasonal_data(raw_df, analyte):
|
| 321 |
+
"""Load and prepare data for seasonal trends analysis"""
|
| 322 |
+
return get_analyte_data_with_lat_long(raw_df, analyte)
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
@timer(include_params=True)
|
| 326 |
+
def generate_seasonal_plot(data, year, shapefile_path):
|
| 327 |
+
"""Generate the seasonal trends plot"""
|
| 328 |
+
return plot_seasonal_salinity_for_bays(data, year, shapefile_path=shapefile_path)
|
| 329 |
+
|
| 330 |
+
|
| 331 |
+
# if ENABLE_TIMING:
|
| 332 |
+
|
| 333 |
+
# def load_data():
|
| 334 |
+
# """
|
| 335 |
+
# Load all data views needed by the application.
|
| 336 |
+
|
| 337 |
+
# Returns:
|
| 338 |
+
# dict: Contains different views of the data
|
| 339 |
+
# """
|
| 340 |
+
# raw_df = load_raw_data()
|
| 341 |
+
# summary_by_station, overall_summary, multiindex_df = create_summaries(raw_df)
|
| 342 |
+
# downloads = prepare_downloads(summary_by_station, multiindex_df, raw_df)
|
| 343 |
+
# return {
|
| 344 |
+
# "raw_df": raw_df,
|
| 345 |
+
# "summary_by_station": summary_by_station,
|
| 346 |
+
# "overall_summary": overall_summary,
|
| 347 |
+
# "multiindex_df": multiindex_df,
|
| 348 |
+
# "downloads": downloads,
|
| 349 |
+
# }
|
| 350 |
+
# else:
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
@st.cache_data
|
| 354 |
+
def load_data():
|
| 355 |
+
"""
|
| 356 |
+
Load and cache all data views needed by the application.
|
| 357 |
+
|
| 358 |
+
Returns:
|
| 359 |
+
dict: Contains different views of the data
|
| 360 |
+
"""
|
| 361 |
+
raw_df = load_raw_data()
|
| 362 |
+
summary_by_station, overall_summary, multiindex_df = create_summaries(raw_df)
|
| 363 |
+
downloads = prepare_downloads(summary_by_station, multiindex_df, raw_df)
|
| 364 |
+
return {
|
| 365 |
+
"raw_df": raw_df,
|
| 366 |
+
"summary_by_station": summary_by_station,
|
| 367 |
+
"overall_summary": overall_summary,
|
| 368 |
+
"multiindex_df": multiindex_df,
|
| 369 |
+
"downloads": downloads,
|
| 370 |
+
}
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
@st.cache_data
|
| 374 |
+
def generate_correlation_plot(
|
| 375 |
+
subset_df, analyte_names, subset_by, subset, position_filter
|
| 376 |
+
):
|
| 377 |
+
fig = plot_parameter_correlations(
|
| 378 |
+
subset_df, analyte_names, subset_by, subset, position_filter
|
| 379 |
+
)
|
| 380 |
+
return fig
|
| 381 |
+
|
| 382 |
+
|
| 383 |
+
def create_overall_summary_table(df: pd.DataFrame) -> GT:
|
| 384 |
+
df.index.name = "Statistic"
|
| 385 |
+
df = df.reset_index()
|
| 386 |
+
|
| 387 |
+
return (
|
| 388 |
+
GT(df, rowname_col="Statistic")
|
| 389 |
+
.tab_header(
|
| 390 |
+
title="Overall Water Quality",
|
| 391 |
+
subtitle="Summary statistics for all data analyzed during study period",
|
| 392 |
+
)
|
| 393 |
+
.fmt_number(
|
| 394 |
+
columns=[
|
| 395 |
+
"Secchi Depth (feet)",
|
| 396 |
+
"Temperature (°C)",
|
| 397 |
+
"Dissolved Oxygen (mg/L)",
|
| 398 |
+
],
|
| 399 |
+
decimals=1,
|
| 400 |
+
)
|
| 401 |
+
.fmt_integer(
|
| 402 |
+
columns=list(df.columns[1:]),
|
| 403 |
+
rows=lambda x: x["Statistic"] == "Count", # type: ignore
|
| 404 |
+
use_seps=True,
|
| 405 |
+
)
|
| 406 |
+
.cols_label(
|
| 407 |
+
**{
|
| 408 |
+
col: html(f"{col.rpartition(' ')[0]}<br>{col.rpartition(' ')[-1]}")
|
| 409 |
+
if col != "pH"
|
| 410 |
+
else html(f"{col}<br> ")
|
| 411 |
+
for col in df.columns[1:]
|
| 412 |
+
} # type: ignore
|
| 413 |
+
)
|
| 414 |
+
.cols_width(cases={col: "14%" for col in df.columns[1:]})
|
| 415 |
+
.opt_align_table_header(align="center")
|
| 416 |
+
)
|
| 417 |
+
|
| 418 |
+
|
| 419 |
+
data = load_data()
|
| 420 |
+
|
| 421 |
+
if section == "Overall Summary":
|
| 422 |
+
st.title("Overall Summary")
|
| 423 |
+
st.html(create_overall_summary_table(data["overall_summary"]).as_raw_html())
|
| 424 |
+
|
| 425 |
+
st.markdown("### Sampling Stations Map")
|
| 426 |
+
stations_df = get_stations_data()
|
| 427 |
+
fig = px.scatter_mapbox(
|
| 428 |
+
stations_df,
|
| 429 |
+
lat="Latitude",
|
| 430 |
+
lon="Longitude",
|
| 431 |
+
hover_data={
|
| 432 |
+
"Number": True,
|
| 433 |
+
"U_of_F": True,
|
| 434 |
+
"Sector": True,
|
| 435 |
+
"WBID": True,
|
| 436 |
+
"Latitude": False,
|
| 437 |
+
"Longitude": False,
|
| 438 |
+
},
|
| 439 |
+
hover_name="Name",
|
| 440 |
+
zoom=10,
|
| 441 |
+
height=700,
|
| 442 |
+
labels={
|
| 443 |
+
"Number": "Station Number",
|
| 444 |
+
"U_of_F": "ID",
|
| 445 |
+
"Sector": "Sector",
|
| 446 |
+
"WBID": "WBID",
|
| 447 |
+
},
|
| 448 |
+
)
|
| 449 |
+
fig.update_layout(
|
| 450 |
+
mapbox_style="carto-positron",
|
| 451 |
+
margin={"r": 0, "t": 0, "l": 0, "b": 0},
|
| 452 |
+
)
|
| 453 |
+
st.plotly_chart(fig, use_container_width=True)
|
| 454 |
+
|
| 455 |
+
|
| 456 |
+
elif section == "Summary by Station":
|
| 457 |
+
st.title("Summary by Station")
|
| 458 |
+
download_format = st.radio(
|
| 459 |
+
"Select download format:",
|
| 460 |
+
["CSV", "Excel"],
|
| 461 |
+
key="summary_download",
|
| 462 |
+
horizontal=True,
|
| 463 |
+
)
|
| 464 |
+
download_data = data["downloads"]["summary"][download_format]
|
| 465 |
+
st.download_button(
|
| 466 |
+
label=f"Download Summary Data ({download_format})",
|
| 467 |
+
data=download_data[0],
|
| 468 |
+
file_name=f"water_quality_summary.{download_data[1]}",
|
| 469 |
+
mime=download_data[2],
|
| 470 |
+
)
|
| 471 |
+
|
| 472 |
+
st.markdown("""
|
| 473 |
+
This table shows summary statistics for various water quality measurements across different stations.
|
| 474 |
+
Each station's measurements are broken down into surface and bottom readings where applicable.
|
| 475 |
+
""")
|
| 476 |
+
st.dataframe(
|
| 477 |
+
data["multiindex_df"]
|
| 478 |
+
.style.format(precision=2)
|
| 479 |
+
.highlight_null(props="background-color: lightgray"),
|
| 480 |
+
use_container_width=True,
|
| 481 |
+
height=600,
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
st.markdown("---")
|
| 485 |
+
total_stations = len(data["summary_by_station"].index.get_level_values(0).unique())
|
| 486 |
+
st.markdown(f"Total number of stations: **{total_stations}**")
|
| 487 |
+
|
| 488 |
+
elif section == "Trends by Station":
|
| 489 |
+
st.title("Trends by Station")
|
| 490 |
+
analyte_names = [
|
| 491 |
+
"Dissolved Oxygen",
|
| 492 |
+
"Salinity",
|
| 493 |
+
"pH",
|
| 494 |
+
"Depth, Secchi Disk Depth",
|
| 495 |
+
"Turbidity",
|
| 496 |
+
"Fecal Coliform (MPN)",
|
| 497 |
+
"Total Nitrogen",
|
| 498 |
+
"Total Phosphorus",
|
| 499 |
+
]
|
| 500 |
+
st.sidebar.markdown("### Filter Options")
|
| 501 |
+
|
| 502 |
+
selected_station = st.sidebar.selectbox(
|
| 503 |
+
"Station:",
|
| 504 |
+
sorted(data["raw_df"]["Station_Number"].unique()),
|
| 505 |
+
index=sorted(data["raw_df"]["Station_Number"].unique()).index("3.20"),
|
| 506 |
+
)
|
| 507 |
+
selected_position = st.sidebar.segmented_control(
|
| 508 |
+
"Sample Position:",
|
| 509 |
+
("All", "Surface", "Bottom"),
|
| 510 |
+
default="All",
|
| 511 |
+
selection_mode="single",
|
| 512 |
+
)
|
| 513 |
+
selected_position = selected_position or "All"
|
| 514 |
+
filtered_df = data["raw_df"].query("Station_Number == @selected_station")
|
| 515 |
+
if selected_position != "All":
|
| 516 |
+
filtered_df = filtered_df.query("Sample_Position == @selected_position")
|
| 517 |
+
|
| 518 |
+
csv_buffer = io.StringIO()
|
| 519 |
+
filtered_df.to_csv(csv_buffer, index=False)
|
| 520 |
+
st.sidebar.download_button(
|
| 521 |
+
label="Download Filtered Data (CSV)",
|
| 522 |
+
data=csv_buffer.getvalue(),
|
| 523 |
+
file_name=f"station_{selected_station}_{selected_position.lower()}_data.csv",
|
| 524 |
+
mime="text/csv",
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
with st.sidebar.expander("Preview Filtered Data"):
|
| 528 |
+
st.markdown(f"**{len(filtered_df):,}** records")
|
| 529 |
+
display_columns = [
|
| 530 |
+
"Activity_Start_Date_Time",
|
| 531 |
+
"Sample_Position",
|
| 532 |
+
"Org_Analyte_Name",
|
| 533 |
+
"Org_Result_Value",
|
| 534 |
+
"Org_Result_Unit",
|
| 535 |
+
]
|
| 536 |
+
preview_df = filtered_df[["Station_Number"] + display_columns].copy()
|
| 537 |
+
preview_df.set_index("Station_Number", inplace=True)
|
| 538 |
+
st.dataframe(
|
| 539 |
+
preview_df.style.format(precision=2),
|
| 540 |
+
use_container_width=True,
|
| 541 |
+
height=300,
|
| 542 |
+
)
|
| 543 |
+
|
| 544 |
+
if not filtered_df.empty:
|
| 545 |
+
fig = plot_analyte_trends(filtered_df, analyte_names, selected_position)
|
| 546 |
+
st.pyplot(fig)
|
| 547 |
+
else:
|
| 548 |
+
st.warning(
|
| 549 |
+
"No data available for the selected station and position combination."
|
| 550 |
+
)
|
| 551 |
+
|
| 552 |
+
elif section == "Sector Trends":
|
| 553 |
+
st.title("Sector Trends")
|
| 554 |
+
ENABLE_ALTAIR = st.sidebar.toggle("Interactive Plots", value=ENABLE_ALTAIR)
|
| 555 |
+
default_analytes = [
|
| 556 |
+
"Dissolved Oxygen",
|
| 557 |
+
"Salinity",
|
| 558 |
+
"Depth, Secchi Disk Depth",
|
| 559 |
+
"Total Nitrogen",
|
| 560 |
+
"Total Phosphorus",
|
| 561 |
+
]
|
| 562 |
+
all_analytes = default_analytes + [
|
| 563 |
+
x
|
| 564 |
+
for x in sorted(data["raw_df"]["Org_Analyte_Name"].unique())
|
| 565 |
+
if x not in default_analytes
|
| 566 |
+
]
|
| 567 |
+
|
| 568 |
+
selected_analytes = st.sidebar.multiselect(
|
| 569 |
+
"Select Analytes:",
|
| 570 |
+
options=all_analytes,
|
| 571 |
+
default=default_analytes,
|
| 572 |
+
key="sector_analyte_select",
|
| 573 |
+
help="Choose one or more analytes to plot.",
|
| 574 |
+
)
|
| 575 |
+
if selected_analytes and not data["raw_df"].empty:
|
| 576 |
+
if ENABLE_ALTAIR:
|
| 577 |
+
charts = altair_plot_sector_trends(data["raw_df"], selected_analytes)
|
| 578 |
+
st.altair_chart(charts, use_container_width=True) # type: ignore
|
| 579 |
+
else:
|
| 580 |
+
fig = plot_sector_trends(data["raw_df"], selected_analytes, base_height=3.5)
|
| 581 |
+
st.pyplot(fig)
|
| 582 |
+
elif not selected_analytes:
|
| 583 |
+
st.warning("No analytes selected.")
|
| 584 |
+
else:
|
| 585 |
+
st.warning("No data available for the selected analytes.")
|
| 586 |
+
|
| 587 |
+
elif section == "Parameter Correlations":
|
| 588 |
+
st.title("Parameter Correlations")
|
| 589 |
+
subset_by = "Sector"
|
| 590 |
+
st.sidebar.markdown("### Filter Options")
|
| 591 |
+
position_filter = st.sidebar.selectbox(
|
| 592 |
+
"Sample Position:", ["All", "Surface", "Bottom"], index=0
|
| 593 |
+
)
|
| 594 |
+
with st.spinner("Loading data for correlation plots..."):
|
| 595 |
+
analyte_names = [
|
| 596 |
+
"Dissolved Oxygen",
|
| 597 |
+
"Salinity",
|
| 598 |
+
"pH",
|
| 599 |
+
"Depth, Secchi Disk Depth",
|
| 600 |
+
"Turbidity",
|
| 601 |
+
"Fecal Coliform (MPN)",
|
| 602 |
+
"Total Nitrogen",
|
| 603 |
+
"Total Phosphorus",
|
| 604 |
+
]
|
| 605 |
+
raw_df = data["raw_df"]
|
| 606 |
+
raw_df["Year"] = raw_df["Activity_Start_Date_Time"].dt.year
|
| 607 |
+
years = ["All"] + sorted(raw_df["Year"].unique().tolist(), reverse=True)
|
| 608 |
+
year_filter = st.sidebar.selectbox("Year:", years, index=0)
|
| 609 |
+
plot_df = raw_df.copy()
|
| 610 |
+
if position_filter != "All":
|
| 611 |
+
plot_df = plot_df[plot_df["Sample_Position"] == position_filter]
|
| 612 |
+
if year_filter != "All":
|
| 613 |
+
plot_df = plot_df[plot_df["Year"] == year_filter]
|
| 614 |
+
plot_df_download = plot_df.copy()
|
| 615 |
+
csv_buffer = io.StringIO()
|
| 616 |
+
plot_df_download.to_csv(csv_buffer, index=False)
|
| 617 |
+
st.sidebar.download_button(
|
| 618 |
+
label="Download Filtered Data (CSV)",
|
| 619 |
+
data=csv_buffer.getvalue(),
|
| 620 |
+
file_name=f"correlation_data_{subset_by}_{position_filter}_{year_filter}.csv",
|
| 621 |
+
mime="text/csv",
|
| 622 |
+
)
|
| 623 |
+
st.sidebar.markdown("### Group By")
|
| 624 |
+
subset_by = st.sidebar.selectbox(
|
| 625 |
+
"Group correlations by:", ["Sector", "Waterbody_Class"], index=0
|
| 626 |
+
)
|
| 627 |
+
unique_subsets = sorted(plot_df[subset_by].unique())
|
| 628 |
+
|
| 629 |
+
selected_groups = st.sidebar.multiselect(
|
| 630 |
+
"Select groups to display:",
|
| 631 |
+
options=unique_subsets,
|
| 632 |
+
default=unique_subsets,
|
| 633 |
+
key="group_selector",
|
| 634 |
+
)
|
| 635 |
+
# Add ordering control
|
| 636 |
+
order_by = st.sidebar.radio(
|
| 637 |
+
"Order groups by:", ["Number of Records", "Alphabetical"], key="group_order"
|
| 638 |
+
)
|
| 639 |
+
|
| 640 |
+
### FIX THIS
|
| 641 |
+
# Add download button for grouped correlation data
|
| 642 |
+
if selected_groups:
|
| 643 |
+
grouped_data = []
|
| 644 |
+
for group in selected_groups:
|
| 645 |
+
subset_df = plot_df[plot_df[subset_by] == group]
|
| 646 |
+
if not subset_df.empty:
|
| 647 |
+
# Filter for just the analytes we want to correlate
|
| 648 |
+
analyte_df = subset_df[
|
| 649 |
+
subset_df["Org_Analyte_Name"].isin(analyte_names)
|
| 650 |
+
].copy()
|
| 651 |
+
analyte_df["Group"] = group
|
| 652 |
+
grouped_data.append(analyte_df)
|
| 653 |
+
|
| 654 |
+
if grouped_data:
|
| 655 |
+
combined_data = pd.concat(grouped_data)
|
| 656 |
+
csv_buffer = io.StringIO()
|
| 657 |
+
combined_data.to_csv(csv_buffer, index=False)
|
| 658 |
+
st.sidebar.download_button(
|
| 659 |
+
label="Download Grouped Correlation Data (CSV)",
|
| 660 |
+
data=csv_buffer.getvalue(),
|
| 661 |
+
file_name=f"grouped_correlation_data_{subset_by}_{position_filter}_{year_filter}.csv",
|
| 662 |
+
mime="text/csv",
|
| 663 |
+
)
|
| 664 |
+
|
| 665 |
+
# Order the selected groups
|
| 666 |
+
if order_by == "Number of Records":
|
| 667 |
+
group_counts = {
|
| 668 |
+
group: len(plot_df[plot_df[subset_by] == group])
|
| 669 |
+
for group in selected_groups
|
| 670 |
+
}
|
| 671 |
+
selected_groups = sorted(
|
| 672 |
+
selected_groups, key=lambda x: group_counts[x], reverse=True
|
| 673 |
+
)
|
| 674 |
+
else:
|
| 675 |
+
selected_groups = sorted(selected_groups)
|
| 676 |
+
|
| 677 |
+
# Loop with filtered groups
|
| 678 |
+
cols = st.columns(2)
|
| 679 |
+
for idx, subset in enumerate(selected_groups):
|
| 680 |
+
subset_df = plot_df[plot_df[subset_by] == subset]
|
| 681 |
+
if not subset_df.empty:
|
| 682 |
+
fig = generate_correlation_plot(
|
| 683 |
+
subset_df, analyte_names, subset_by, subset, position_filter
|
| 684 |
+
)
|
| 685 |
+
cols[idx % 2].pyplot(fig)
|
| 686 |
+
plt.close()
|
| 687 |
+
with cols[idx % 2].expander(f"View {subset} Data"):
|
| 688 |
+
st.markdown(f"**{len(subset_df):,}** records")
|
| 689 |
+
display_columns = [
|
| 690 |
+
"Activity_Start_Date_Time",
|
| 691 |
+
"Station_Number",
|
| 692 |
+
"Sample_Position",
|
| 693 |
+
"Org_Analyte_Name",
|
| 694 |
+
"Org_Result_Value",
|
| 695 |
+
"Org_Result_Unit",
|
| 696 |
+
]
|
| 697 |
+
st.dataframe(
|
| 698 |
+
subset_df[display_columns].style.format(precision=2),
|
| 699 |
+
use_container_width=True,
|
| 700 |
+
height=300,
|
| 701 |
+
)
|
| 702 |
+
csv_buffer = io.StringIO()
|
| 703 |
+
subset_df.to_csv(csv_buffer, index=False)
|
| 704 |
+
st.download_button(
|
| 705 |
+
label=f"Download {subset} Data (CSV)",
|
| 706 |
+
data=csv_buffer.getvalue(),
|
| 707 |
+
file_name=f"correlation_data_{subset}_{position_filter}_{year_filter}.csv",
|
| 708 |
+
mime="text/csv",
|
| 709 |
+
)
|
| 710 |
+
|
| 711 |
+
elif section == "DO/Temp Relationship":
|
| 712 |
+
ENABLE_ALTAIR = st.sidebar.toggle("Interactive Plot", value=ENABLE_ALTAIR)
|
| 713 |
+
st.title("DO/Temp Relationship")
|
| 714 |
+
st.markdown(
|
| 715 |
+
"This plot shows the relationship between dissolved oxygen and water temperature for all data."
|
| 716 |
+
)
|
| 717 |
+
if ENABLE_ALTAIR:
|
| 718 |
+
fig = altair_plot_do_temp_relationship(data["raw_df"])
|
| 719 |
+
st.altair_chart(fig, use_container_width=True) # type: ignore
|
| 720 |
+
else:
|
| 721 |
+
fig = plot_do_temp_relationship(data["raw_df"])
|
| 722 |
+
st.pyplot(fig)
|
| 723 |
+
|
| 724 |
+
elif section == "Calendar Heatmaps":
|
| 725 |
+
st.title("Calendar Heatmaps")
|
| 726 |
+
st.info(
|
| 727 |
+
"💡 You can customize the colormaps using the 'Plot Settings' expander in the sidebar."
|
| 728 |
+
)
|
| 729 |
+
raw_df = data["raw_df"]
|
| 730 |
+
raw_df["Date"] = pd.to_datetime(raw_df["Activity_Start_Date_Time"]).dt.date
|
| 731 |
+
|
| 732 |
+
default_analytes = [
|
| 733 |
+
"Temperature, Water",
|
| 734 |
+
"Dissolved Oxygen",
|
| 735 |
+
"Salinity",
|
| 736 |
+
"pH",
|
| 737 |
+
"Turbidity",
|
| 738 |
+
"Depth, Secchi Disk Depth",
|
| 739 |
+
"Fecal Coliform (MPN)",
|
| 740 |
+
"Total Nitrogen",
|
| 741 |
+
"Total Phosphorus",
|
| 742 |
+
"Chlorophyll-uncorrected",
|
| 743 |
+
]
|
| 744 |
+
|
| 745 |
+
# Get all unique analytes and ensure defaults are at the start of the list
|
| 746 |
+
all_analytes = default_analytes + [
|
| 747 |
+
x
|
| 748 |
+
for x in sorted(raw_df["Org_Analyte_Name"].unique())
|
| 749 |
+
if x not in default_analytes
|
| 750 |
+
]
|
| 751 |
+
selected_analytes = st.sidebar.multiselect(
|
| 752 |
+
"Select Analytes:",
|
| 753 |
+
options=all_analytes,
|
| 754 |
+
default=default_analytes,
|
| 755 |
+
key="calendar_analyte_select",
|
| 756 |
+
help="Choose one or more analytes to display in the heatmap.",
|
| 757 |
+
)
|
| 758 |
+
|
| 759 |
+
# Filter Options
|
| 760 |
+
st.sidebar.markdown("### Filter Options")
|
| 761 |
+
sector_filter = st.sidebar.selectbox(
|
| 762 |
+
"Sector:",
|
| 763 |
+
["All"] + sorted(raw_df["Sector"].unique().tolist()),
|
| 764 |
+
index=0,
|
| 765 |
+
key="calendar_sector_select",
|
| 766 |
+
)
|
| 767 |
+
position_filter = st.sidebar.selectbox(
|
| 768 |
+
"Position:",
|
| 769 |
+
["All", "Surface", "Bottom"],
|
| 770 |
+
index=0,
|
| 771 |
+
key="calendar_position_select",
|
| 772 |
+
)
|
| 773 |
+
|
| 774 |
+
def format_colormap_option(option):
|
| 775 |
+
append = ""
|
| 776 |
+
if option in [
|
| 777 |
+
"viridis", # Sequential
|
| 778 |
+
"plasma", # Sequential
|
| 779 |
+
"inferno", # Sequential
|
| 780 |
+
"magma", # Sequential
|
| 781 |
+
"GnBu", # Sequential (Multi-hue)
|
| 782 |
+
"Blues", # Sequential (Single-hue)
|
| 783 |
+
"Blues_r", # Sequential (Single-hue, reversed)
|
| 784 |
+
]:
|
| 785 |
+
append = " [Sequential]"
|
| 786 |
+
elif option in [
|
| 787 |
+
"YlOrRd", # Sequential (Multi-hue)
|
| 788 |
+
"YlGnBu", # Sequential (Multi-hue)
|
| 789 |
+
"RdPu", # Sequential (Multi-hue)
|
| 790 |
+
]:
|
| 791 |
+
append = " [Sequential (Multi-hue)]"
|
| 792 |
+
elif option in [
|
| 793 |
+
"RdYlBu", # Diverging
|
| 794 |
+
"RdBu", # Diverging
|
| 795 |
+
"coolwarm", # Diverging
|
| 796 |
+
]:
|
| 797 |
+
append = " [Diverging]"
|
| 798 |
+
return option + append
|
| 799 |
+
|
| 800 |
+
colormap_help_text = """
|
| 801 |
+
Any selection here will override the default color scheme for all of the displayed
|
| 802 |
+
heatmaps. Selecting Default will revert to the analyte-specific default color schemes.
|
| 803 |
+
|
| 804 |
+
**The default color schemes are:**
|
| 805 |
+
|
| 806 |
+
`Fecal Coliform (MPN)` : `viridis` _(blue-green-yellow)_
|
| 807 |
+
`Temperature, Water` : `coolwarm` _(red-white-blue)_
|
| 808 |
+
`Dissolved Oxygen` : `RdYlBu` _(red-yellow-blue)_
|
| 809 |
+
`Total Nitrogen/Phosphorus` : `GnBu` _(green-blue)_
|
| 810 |
+
`Depth, Secchi Disk Depth` : `Blues_r` _(reversed blues)_
|
| 811 |
+
`All other analytes` : `Blues` _(blue)_
|
| 812 |
+
"""
|
| 813 |
+
with st.sidebar.expander("Plot Settings", expanded=False):
|
| 814 |
+
colormap = st.radio(
|
| 815 |
+
"Color Scheme",
|
| 816 |
+
options=[
|
| 817 |
+
"Default",
|
| 818 |
+
# Sequential (Perceptually Uniform)
|
| 819 |
+
"viridis",
|
| 820 |
+
"plasma",
|
| 821 |
+
"inferno",
|
| 822 |
+
"magma",
|
| 823 |
+
# Sequential (Single-hue)
|
| 824 |
+
"Blues",
|
| 825 |
+
"Blues_r",
|
| 826 |
+
# Sequential (Multi-hue)
|
| 827 |
+
"GnBu",
|
| 828 |
+
"YlOrRd",
|
| 829 |
+
"YlGnBu",
|
| 830 |
+
"RdPu",
|
| 831 |
+
# Diverging
|
| 832 |
+
"RdYlBu",
|
| 833 |
+
"RdBu",
|
| 834 |
+
"coolwarm",
|
| 835 |
+
],
|
| 836 |
+
index=0,
|
| 837 |
+
help=colormap_help_text,
|
| 838 |
+
format_func=format_colormap_option,
|
| 839 |
+
)
|
| 840 |
+
|
| 841 |
+
if colormap == "Default":
|
| 842 |
+
colormap = None
|
| 843 |
+
|
| 844 |
+
# Filter data
|
| 845 |
+
plot_df = raw_df.copy()
|
| 846 |
+
if sector_filter != "All":
|
| 847 |
+
plot_df = plot_df[plot_df["Sector"] == sector_filter]
|
| 848 |
+
if position_filter != "All":
|
| 849 |
+
plot_df = plot_df[plot_df["Sample_Position"] == position_filter]
|
| 850 |
+
if not plot_df.empty:
|
| 851 |
+
for analyte in selected_analytes:
|
| 852 |
+
fig = plot_calendar_heatmap(plot_df, analyte, colormap)
|
| 853 |
+
st.pyplot(fig)
|
| 854 |
+
else:
|
| 855 |
+
st.warning("No data available for the selected filters.")
|
| 856 |
+
|
| 857 |
+
elif section == "Seasonal Trends":
|
| 858 |
+
st.title("Seasonal Trends")
|
| 859 |
+
raw_df = data["raw_df"]
|
| 860 |
+
years = sorted(pd.to_datetime(raw_df["Activity_Start_Date_Time"]).dt.year.unique())
|
| 861 |
+
col1, col2 = st.columns(2)
|
| 862 |
+
with col1:
|
| 863 |
+
analyte = st.selectbox(
|
| 864 |
+
"Select Analyte:", ["Salinity"], index=0, key="seasonal_analyte_select"
|
| 865 |
+
)
|
| 866 |
+
with col2:
|
| 867 |
+
selected_year = st.selectbox(
|
| 868 |
+
"Select Year:",
|
| 869 |
+
sorted(years, reverse=True),
|
| 870 |
+
index=0,
|
| 871 |
+
key="seasonal_year_select",
|
| 872 |
+
)
|
| 873 |
+
if not raw_df.empty:
|
| 874 |
+
seasonal_data = load_seasonal_data(raw_df, analyte)
|
| 875 |
+
fig = generate_seasonal_plot(
|
| 876 |
+
seasonal_data,
|
| 877 |
+
str(selected_year),
|
| 878 |
+
shapefile_path="data/SAB/SAB.shp",
|
| 879 |
+
)
|
| 880 |
+
st.pyplot(fig)
|
| 881 |
+
else:
|
| 882 |
+
st.warning("No data available for seasonal analysis.")
|
| 883 |
+
|
| 884 |
+
elif section == "Nutrient Ratios":
|
| 885 |
+
ENABLE_ALTAIR = st.sidebar.toggle("Interactive Plots", value=ENABLE_ALTAIR)
|
| 886 |
+
st.title("Nutrient Ratios")
|
| 887 |
+
raw_df = data["raw_df"]
|
| 888 |
+
if not raw_df.empty:
|
| 889 |
+
if ENABLE_ALTAIR:
|
| 890 |
+
vconcat = altair_plot_np_ratios(raw_df)
|
| 891 |
+
st.altair_chart(vconcat, use_container_width=True) # type: ignore
|
| 892 |
+
else:
|
| 893 |
+
fig = plot_np_ratios(raw_df)
|
| 894 |
+
st.pyplot(fig)
|
| 895 |
+
else:
|
| 896 |
+
st.warning("No data available for nutrient ratio analysis.")
|
| 897 |
+
|
| 898 |
+
elif section == "Raw Data":
|
| 899 |
+
st.title("Raw Data")
|
| 900 |
+
raw_df = data["raw_df"]
|
| 901 |
+
raw_download_format = st.radio(
|
| 902 |
+
"Select download format:",
|
| 903 |
+
["CSV", "Excel"],
|
| 904 |
+
key="raw_download",
|
| 905 |
+
horizontal=True,
|
| 906 |
+
)
|
| 907 |
+
|
| 908 |
+
if raw_download_format == "Excel":
|
| 909 |
+
excel_buffer = io.BytesIO()
|
| 910 |
+
with pd.ExcelWriter(excel_buffer, engine="xlsxwriter") as writer:
|
| 911 |
+
raw_df.to_excel(writer, sheet_name="Raw Water Quality Data", index=False)
|
| 912 |
+
raw_excel = excel_buffer.getvalue()
|
| 913 |
+
download_data = (raw_excel, "xlsx", "application/vnd.ms-excel")
|
| 914 |
+
else:
|
| 915 |
+
download_data = data["downloads"]["raw"]["CSV"]
|
| 916 |
+
|
| 917 |
+
st.download_button(
|
| 918 |
+
label=f"Download Raw Data ({raw_download_format})",
|
| 919 |
+
data=download_data[0],
|
| 920 |
+
file_name=f"water_quality_raw_2019-01-01_-_2024-10-31.{download_data[1]}",
|
| 921 |
+
mime=download_data[2],
|
| 922 |
+
)
|
| 923 |
+
st.markdown(f"""
|
| 924 |
+
Preview of the first 1,000 of {raw_df.shape[0]:,} records in the dataset.
|
| 925 |
+
""")
|
| 926 |
+
st.dataframe(
|
| 927 |
+
raw_df.head(1000).style.format(precision=2),
|
| 928 |
+
use_container_width=True,
|
| 929 |
+
height=600,
|
| 930 |
+
)
|
| 931 |
+
|
| 932 |
+
elif section == "Analytics":
|
| 933 |
+
st.title("Analytics")
|
| 934 |
+
|
| 935 |
+
log_file = Path("analytics.json")
|
| 936 |
+
if log_file.exists():
|
| 937 |
+
with open(log_file, "r") as f:
|
| 938 |
+
analytics_data = json.load(f)
|
| 939 |
+
|
| 940 |
+
col1, col2 = st.columns(2)
|
| 941 |
+
|
| 942 |
+
with col1:
|
| 943 |
+
visits_df = pd.DataFrame(analytics_data["visits"])
|
| 944 |
+
visits_df["timestamp"] = pd.to_datetime(visits_df["timestamp"])
|
| 945 |
+
|
| 946 |
+
daily_visits_df = (
|
| 947 |
+
visits_df.groupby("date")["visitor_id"]
|
| 948 |
+
.agg(["nunique", "count"])
|
| 949 |
+
.reset_index()
|
| 950 |
+
.rename(columns={"nunique": "Unique Visitors", "count": "Total Views"})
|
| 951 |
+
)
|
| 952 |
+
daily_visits_df["date"] = pd.to_datetime(daily_visits_df["date"])
|
| 953 |
+
daily_visits_df = daily_visits_df.sort_values("date")
|
| 954 |
+
|
| 955 |
+
total_unique_visitors = visits_df["visitor_id"].nunique()
|
| 956 |
+
total_views = len(visits_df)
|
| 957 |
+
avg_views_per_visitor = total_views / total_unique_visitors
|
| 958 |
+
|
| 959 |
+
st.subheader("Visitor Metrics")
|
| 960 |
+
metrics_col1, metrics_col2, metrics_col3 = st.columns(3)
|
| 961 |
+
metrics_col1.metric("Total Unique Visitors", total_unique_visitors)
|
| 962 |
+
metrics_col2.metric("Total Page Views", total_views)
|
| 963 |
+
metrics_col3.metric("Avg Views per Visitor", f"{avg_views_per_visitor:.1f}")
|
| 964 |
+
|
| 965 |
+
st.subheader("Daily Statistics")
|
| 966 |
+
st.dataframe(
|
| 967 |
+
daily_visits_df.style.format(
|
| 968 |
+
{"Unique Visitors": "{:,.0f}", "Total Views": "{:,.0f}"}
|
| 969 |
+
),
|
| 970 |
+
hide_index=True,
|
| 971 |
+
)
|
| 972 |
+
|
| 973 |
+
with col2:
|
| 974 |
+
section_visits_df = pd.DataFrame(
|
| 975 |
+
{
|
| 976 |
+
"Section": analytics_data["section_counts"].keys(),
|
| 977 |
+
"Views": analytics_data["section_counts"].values(),
|
| 978 |
+
}
|
| 979 |
+
)
|
| 980 |
+
section_visits_df = section_visits_df.sort_values("Views", ascending=True)
|
| 981 |
+
|
| 982 |
+
st.subheader("Total Section Views")
|
| 983 |
+
st.bar_chart(section_visits_df.set_index("Section"))
|
| 984 |
+
|
| 985 |
+
with st.expander("Raw Visit Data"):
|
| 986 |
+
visits_df = pd.DataFrame(analytics_data["visits"])
|
| 987 |
+
visits_df["timestamp"] = pd.to_datetime(visits_df["timestamp"])
|
| 988 |
+
st.dataframe(visits_df)
|
| 989 |
+
else:
|
| 990 |
+
st.warning("No analytics data available.")
|
| 991 |
+
|
| 992 |
+
if ENABLE_TIMING:
|
| 993 |
+
st.markdown("---")
|
| 994 |
+
st.subheader("⚡ Performance Metrics")
|
| 995 |
+
|
| 996 |
+
if hasattr(st.session_state, "timing_stats"):
|
| 997 |
+
st.markdown("#### Summary Statistics")
|
| 998 |
+
# Create a summary dataframe with min, max, mean, and count
|
| 999 |
+
timing_summary = []
|
| 1000 |
+
for func_name, durations in st.session_state.timing_stats.items():
|
| 1001 |
+
timing_summary.append(
|
| 1002 |
+
{
|
| 1003 |
+
"Function": func_name,
|
| 1004 |
+
"Min (seconds)": min(durations),
|
| 1005 |
+
"Max (seconds)": max(durations),
|
| 1006 |
+
"Mean (seconds)": sum(durations) / len(durations),
|
| 1007 |
+
"Calls": len(durations),
|
| 1008 |
+
}
|
| 1009 |
+
)
|
| 1010 |
+
|
| 1011 |
+
timing_df = pd.DataFrame(timing_summary).sort_values(
|
| 1012 |
+
"Mean (seconds)", ascending=False
|
| 1013 |
+
)
|
| 1014 |
+
|
| 1015 |
+
st.dataframe(
|
| 1016 |
+
timing_df.style.format(
|
| 1017 |
+
{
|
| 1018 |
+
"Min (seconds)": "{:.2f}",
|
| 1019 |
+
"Max (seconds)": "{:.2f}",
|
| 1020 |
+
"Mean (seconds)": "{:.2f}",
|
| 1021 |
+
"Calls": "{:,.0f}",
|
| 1022 |
+
}
|
| 1023 |
+
),
|
| 1024 |
+
use_container_width=True,
|
| 1025 |
+
)
|
| 1026 |
+
|
| 1027 |
+
st.markdown("#### Detailed Function Calls")
|
| 1028 |
+
if st.session_state.timing_logs:
|
| 1029 |
+
logs_df = pd.DataFrame(st.session_state.timing_logs)
|
| 1030 |
+
logs_df["timestamp"] = pd.to_datetime(logs_df["timestamp"])
|
| 1031 |
+
|
| 1032 |
+
# Format parameters column if it exists
|
| 1033 |
+
if "parameters" in logs_df.columns:
|
| 1034 |
+
# Option 1: Create wrapped text with newlines
|
| 1035 |
+
logs_df["parameters"] = logs_df["parameters"].apply(
|
| 1036 |
+
lambda x: (
|
| 1037 |
+
"\n".join(
|
| 1038 |
+
textwrap.wrap(
|
| 1039 |
+
"\n".join(f"{k}: {v}" for k, v in x.items()),
|
| 1040 |
+
width=50,
|
| 1041 |
+
break_long_words=False,
|
| 1042 |
+
replace_whitespace=False,
|
| 1043 |
+
)
|
| 1044 |
+
)
|
| 1045 |
+
if isinstance(x, dict)
|
| 1046 |
+
else str(x)
|
| 1047 |
+
)
|
| 1048 |
+
)
|
| 1049 |
+
|
| 1050 |
+
logs_df = logs_df.sort_values("timestamp", ascending=False)
|
| 1051 |
+
|
| 1052 |
+
st.dataframe(
|
| 1053 |
+
logs_df.style.format(
|
| 1054 |
+
{
|
| 1055 |
+
"duration": "{:.2f} seconds",
|
| 1056 |
+
"timestamp": lambda x: x.strftime("%H:%M:%S.%f")[:-3],
|
| 1057 |
+
}
|
| 1058 |
+
),
|
| 1059 |
+
use_container_width=True,
|
| 1060 |
+
height=400,
|
| 1061 |
+
column_config={
|
| 1062 |
+
"parameters": st.column_config.TextColumn(
|
| 1063 |
+
"parameters",
|
| 1064 |
+
width="large",
|
| 1065 |
+
help="Function parameters and their values",
|
| 1066 |
+
)
|
| 1067 |
+
},
|
| 1068 |
+
)
|
| 1069 |
+
else:
|
| 1070 |
+
st.info("No timing statistics available yet. Try refreshing the page.")
|
data/SAB/SAB.cpg
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ad3031f5503a4404af825262ee8232cc04d4ea6683d42c5dd0a2f2a27ac9824
|
| 3 |
+
size 5
|
data/SAB/SAB.prj
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0360a15fcf4a096367d80e8c723d6dde12e82e4b05b906398443ddfc6a17b6cb
|
| 3 |
+
size 454
|
data/SAB/SAB.qmd
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE qgis PUBLIC 'http://mrcc.com/qgis.dtd' 'SYSTEM'>
|
| 2 |
+
<qgis version="3.40.0-Bratislava">
|
| 3 |
+
<identifier></identifier>
|
| 4 |
+
<parentidentifier></parentidentifier>
|
| 5 |
+
<language></language>
|
| 6 |
+
<type>dataset</type>
|
| 7 |
+
<title></title>
|
| 8 |
+
<abstract></abstract>
|
| 9 |
+
<links/>
|
| 10 |
+
<dates/>
|
| 11 |
+
<fees></fees>
|
| 12 |
+
<encoding></encoding>
|
| 13 |
+
<crs>
|
| 14 |
+
<spatialrefsys nativeFormat="Wkt">
|
| 15 |
+
<wkt>PROJCRS["NAD83(2011) / Florida GDL Albers",BASEGEOGCRS["NAD83(2011)",DATUM["NAD83 (National Spatial Reference System 2011)",ELLIPSOID["GRS 1980",6378137,298.257222101,LENGTHUNIT["metre",1]]],PRIMEM["Greenwich",0,ANGLEUNIT["degree",0.0174532925199433]],ID["EPSG",6318]],CONVERSION["Florida GDL Albers (meters)",METHOD["Albers Equal Area",ID["EPSG",9822]],PARAMETER["Latitude of false origin",24,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8821]],PARAMETER["Longitude of false origin",-84,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8822]],PARAMETER["Latitude of 1st standard parallel",24,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8823]],PARAMETER["Latitude of 2nd standard parallel",31.5,ANGLEUNIT["degree",0.0174532925199433],ID["EPSG",8824]],PARAMETER["Easting at false origin",400000,LENGTHUNIT["metre",1],ID["EPSG",8826]],PARAMETER["Northing at false origin",0,LENGTHUNIT["metre",1],ID["EPSG",8827]]],CS[Cartesian,2],AXIS["easting (X)",east,ORDER[1],LENGTHUNIT["metre",1]],AXIS["northing (Y)",north,ORDER[2],LENGTHUNIT["metre",1]],USAGE[SCOPE["State-wide spatial data management."],AREA["United States (USA) - Florida."],BBOX[24.41,-87.63,31.01,-79.97]],ID["EPSG",6439]]</wkt>
|
| 16 |
+
<proj4>+proj=aea +lat_0=24 +lon_0=-84 +lat_1=24 +lat_2=31.5 +x_0=400000 +y_0=0 +ellps=GRS80 +units=m +no_defs</proj4>
|
| 17 |
+
<srsid>28506</srsid>
|
| 18 |
+
<srid>6439</srid>
|
| 19 |
+
<authid>EPSG:6439</authid>
|
| 20 |
+
<description>NAD83(2011) / Florida GDL Albers</description>
|
| 21 |
+
<projectionacronym>aea</projectionacronym>
|
| 22 |
+
<ellipsoidacronym>EPSG:7019</ellipsoidacronym>
|
| 23 |
+
<geographicflag>false</geographicflag>
|
| 24 |
+
</spatialrefsys>
|
| 25 |
+
</crs>
|
| 26 |
+
<extent/>
|
| 27 |
+
</qgis>
|
main.py
ADDED
|
@@ -0,0 +1,232 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def get_raw_data(file_path: str):
|
| 5 |
+
"""
|
| 6 |
+
Read raw data from a CSV or Parquet file.
|
| 7 |
+
"""
|
| 8 |
+
if file_path.endswith(".parquet"):
|
| 9 |
+
return pd.read_parquet(file_path)
|
| 10 |
+
|
| 11 |
+
categorical_columns = [
|
| 12 |
+
"Monitoring_Location_ID",
|
| 13 |
+
"Activity_Depth_Unit",
|
| 14 |
+
"Sample_Position",
|
| 15 |
+
"Time_Zone",
|
| 16 |
+
"Activity_Type",
|
| 17 |
+
"Waterbody_Class",
|
| 18 |
+
"WBID",
|
| 19 |
+
"Name",
|
| 20 |
+
"Sector",
|
| 21 |
+
"Total_Depth_Unit",
|
| 22 |
+
"Org_Analyte_Name",
|
| 23 |
+
]
|
| 24 |
+
|
| 25 |
+
dtype_dict = {
|
| 26 |
+
"Station_Number": str,
|
| 27 |
+
**{col: "category" for col in categorical_columns},
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
return pd.read_csv(file_path, dtype=dtype_dict).assign(
|
| 31 |
+
Org_Result_Value=lambda df: pd.to_numeric(
|
| 32 |
+
df["Org_Result_Value"].replace("Not Reported", pd.NA), errors="coerce"
|
| 33 |
+
),
|
| 34 |
+
Activity_Start_Date_Time=lambda df: pd.to_datetime(
|
| 35 |
+
df["Activity_Start_Date_Time"]
|
| 36 |
+
),
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def get_stations_data() -> pd.DataFrame:
|
| 41 |
+
"""
|
| 42 |
+
Return stations data as a dataframe.
|
| 43 |
+
"""
|
| 44 |
+
return pd.read_csv("data/Stations-Locations.csv")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def add_lat_long(raw_df: pd.DataFrame, stations_df: pd.DataFrame) -> pd.DataFrame:
|
| 48 |
+
"""
|
| 49 |
+
Add latitude and longitude to raw data based on station number.
|
| 50 |
+
"""
|
| 51 |
+
raw_df["Number"] = raw_df["Station_Number"].astype(float)
|
| 52 |
+
raw_df = raw_df.merge(
|
| 53 |
+
stations_df[["Number", "Latitude", "Longitude"]],
|
| 54 |
+
left_on="Number",
|
| 55 |
+
right_on="Number",
|
| 56 |
+
how="left",
|
| 57 |
+
)
|
| 58 |
+
return raw_df.drop("Number", axis=1)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def get_analyte_data_with_lat_long(df: pd.DataFrame, analyte: str) -> pd.DataFrame:
|
| 62 |
+
"""
|
| 63 |
+
Extract and transform data for a specific analyte, adding geographical coordinates.
|
| 64 |
+
|
| 65 |
+
This function processes raw water quality data by:
|
| 66 |
+
1. Adding latitude/longitude coordinates from stations data
|
| 67 |
+
2. Filtering for a specific analyte
|
| 68 |
+
3. Removing rows with missing values
|
| 69 |
+
4. Aggregating duplicate measurements using mean values
|
| 70 |
+
|
| 71 |
+
Args:
|
| 72 |
+
df (pd.DataFrame): Raw water quality data containing at minimum these columns:
|
| 73 |
+
- Station_Number
|
| 74 |
+
- Org_Analyte_Name
|
| 75 |
+
- Org_Result_Value
|
| 76 |
+
analyte (str): Name of the analyte to filter for (e.g., "Temperature, Water")
|
| 77 |
+
|
| 78 |
+
Returns:
|
| 79 |
+
pd.DataFrame: Processed dataframe with columns:
|
| 80 |
+
- Activity_Start_Date_Time: Timestamp of measurement
|
| 81 |
+
- Station_Number: Monitoring station identifier
|
| 82 |
+
- Sector: Geographical sector
|
| 83 |
+
- WBID: Waterbody ID
|
| 84 |
+
- Sample_Position: Position of sample (e.g., "Surface", "Bottom")
|
| 85 |
+
- Activity_Depth: Depth of measurement
|
| 86 |
+
- Latitude: Station latitude
|
| 87 |
+
- Longitude: Station longitude
|
| 88 |
+
- {analyte}: Measured value for the specified analyte
|
| 89 |
+
|
| 90 |
+
Note:
|
| 91 |
+
Duplicate measurements at the same location and time are averaged.
|
| 92 |
+
"""
|
| 93 |
+
return (
|
| 94 |
+
df.pipe(add_lat_long, get_stations_data())
|
| 95 |
+
.query(f"Org_Analyte_Name == '{analyte}'")
|
| 96 |
+
.dropna(subset=["Org_Result_Value"])
|
| 97 |
+
.pivot_table(
|
| 98 |
+
index=[
|
| 99 |
+
"Activity_Start_Date_Time",
|
| 100 |
+
"Station_Number",
|
| 101 |
+
"Sector",
|
| 102 |
+
"WBID",
|
| 103 |
+
"Sample_Position",
|
| 104 |
+
"Activity_Depth",
|
| 105 |
+
"Latitude",
|
| 106 |
+
"Longitude",
|
| 107 |
+
],
|
| 108 |
+
values="Org_Result_Value",
|
| 109 |
+
aggfunc="mean",
|
| 110 |
+
observed=True,
|
| 111 |
+
)
|
| 112 |
+
.reset_index()
|
| 113 |
+
.rename(columns={"Org_Result_Value": analyte})
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def create_station_stats(
|
| 118 |
+
pivoted: pd.DataFrame, station: str | float | int
|
| 119 |
+
) -> pd.DataFrame:
|
| 120 |
+
"""
|
| 121 |
+
Create statistics for a specific station from pivoted data.
|
| 122 |
+
|
| 123 |
+
Args:
|
| 124 |
+
pivoted: Pivoted DataFrame containing water quality measurements
|
| 125 |
+
station: Station identifier
|
| 126 |
+
|
| 127 |
+
Returns:
|
| 128 |
+
DataFrame with statistics for various water quality parameters
|
| 129 |
+
"""
|
| 130 |
+
PARAMETERS = {
|
| 131 |
+
"Secchi Depth (feet)": ("Depth, Secchi Disk Depth", ["Surface"]),
|
| 132 |
+
"Temperature (°C)": ("Temperature, Water", ["Surface", "Bottom"]),
|
| 133 |
+
"Dissolved Oxygen (mg/L)": ("Dissolved Oxygen", ["Surface", "Bottom"]),
|
| 134 |
+
"Turbidity (NTU)": ("Turbidity", ["Surface", "Bottom"]),
|
| 135 |
+
"Salinity (ppt)": ("Salinity", ["Surface", "Bottom"]),
|
| 136 |
+
"pH": ("pH", ["Surface", "Bottom"]),
|
| 137 |
+
}
|
| 138 |
+
STATS = {"Average": "mean", "Maximum": "max", "Minimum": "min", "n=": "count"}
|
| 139 |
+
data = {"Station": station, "Statistic": list(STATS.keys())}
|
| 140 |
+
for param_name, (param_code, positions) in PARAMETERS.items():
|
| 141 |
+
for position in positions:
|
| 142 |
+
col_name = f"{param_name} {position}" if len(positions) > 1 else param_name
|
| 143 |
+
data[col_name] = [
|
| 144 |
+
pivoted[stat][position][station, param_code] for stat in STATS.values()
|
| 145 |
+
]
|
| 146 |
+
return pd.DataFrame(data)
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def create_overall_summary(df: pd.DataFrame) -> pd.DataFrame:
|
| 150 |
+
summary = (
|
| 151 |
+
df.groupby(["Org_Analyte_Name"], observed=False)["Org_Result_Value"]
|
| 152 |
+
.agg(["mean", "max", "min", "count"])
|
| 153 |
+
.round(2)
|
| 154 |
+
.rename(
|
| 155 |
+
columns={
|
| 156 |
+
"count": "Count",
|
| 157 |
+
"mean": "Mean",
|
| 158 |
+
"max": "Maximum",
|
| 159 |
+
"min": "Minimum",
|
| 160 |
+
}
|
| 161 |
+
)
|
| 162 |
+
)
|
| 163 |
+
summary.index.name = None
|
| 164 |
+
transposed = summary.T
|
| 165 |
+
return transposed.rename(
|
| 166 |
+
columns={
|
| 167 |
+
"Depth, Secchi Disk Depth": "Secchi Depth (feet)",
|
| 168 |
+
"Dissolved Oxygen": "Dissolved Oxygen (mg/L)",
|
| 169 |
+
"Salinity": "Salinity (ppt)",
|
| 170 |
+
"Turbidity": "Turbidity (NTU)",
|
| 171 |
+
"Temperature, Water": "Temperature (°C)",
|
| 172 |
+
}
|
| 173 |
+
).loc[
|
| 174 |
+
:,
|
| 175 |
+
[
|
| 176 |
+
"Secchi Depth (feet)",
|
| 177 |
+
"Temperature (°C)",
|
| 178 |
+
"Dissolved Oxygen (mg/L)",
|
| 179 |
+
"Turbidity (NTU)",
|
| 180 |
+
"Salinity (ppt)",
|
| 181 |
+
"pH",
|
| 182 |
+
],
|
| 183 |
+
]
|
| 184 |
+
|
| 185 |
+
|
| 186 |
+
def create_summary_by_station_and_position(
|
| 187 |
+
df: pd.DataFrame, exclude_analytes: list[str] | None = None
|
| 188 |
+
) -> pd.DataFrame:
|
| 189 |
+
"""
|
| 190 |
+
Create a summary statistics table from water quality measurements.
|
| 191 |
+
|
| 192 |
+
Args:
|
| 193 |
+
df (pd.DataFrame): Processed dataframe from get_data function
|
| 194 |
+
|
| 195 |
+
Returns:
|
| 196 |
+
pd.DataFrame: Summary statistics table with surface/bottom measurements
|
| 197 |
+
"""
|
| 198 |
+
if exclude_analytes is None:
|
| 199 |
+
exclude_analytes = []
|
| 200 |
+
|
| 201 |
+
summary = (
|
| 202 |
+
df.query("Org_Analyte_Name not in @exclude_analytes")
|
| 203 |
+
.groupby(
|
| 204 |
+
["Station_Number", "Sample_Position", "Org_Analyte_Name"], observed=False
|
| 205 |
+
)["Org_Result_Value"]
|
| 206 |
+
.agg(["mean", "max", "min", "count"])
|
| 207 |
+
.round(2)
|
| 208 |
+
)
|
| 209 |
+
pivoted = summary.reset_index().pivot_table(
|
| 210 |
+
index=["Station_Number", "Org_Analyte_Name"],
|
| 211 |
+
columns=["Sample_Position"],
|
| 212 |
+
values=["mean", "max", "min", "count"],
|
| 213 |
+
observed=False,
|
| 214 |
+
)
|
| 215 |
+
stations = sorted(df["Station_Number"].unique())
|
| 216 |
+
return pd.concat(
|
| 217 |
+
[create_station_stats(pivoted, station) for station in stations]
|
| 218 |
+
).set_index(["Station", "Statistic"])
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def create_multiindex_columns(df: pd.DataFrame) -> pd.DataFrame:
|
| 222 |
+
new_df = df.copy()
|
| 223 |
+
new_df.columns = pd.MultiIndex.from_tuples(
|
| 224 |
+
[
|
| 225 |
+
(col.rsplit(" ", 1)[0], col.rsplit(" ", 1)[1])
|
| 226 |
+
if col != "Secchi Depth (feet)"
|
| 227 |
+
else ("", col)
|
| 228 |
+
for col in df.columns
|
| 229 |
+
],
|
| 230 |
+
names=["Analyte", "Position"],
|
| 231 |
+
)
|
| 232 |
+
return new_df
|
pyproject.toml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "state-of-the-bay"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"contextily>=1.6.2",
|
| 9 |
+
"folium>=0.18.0",
|
| 10 |
+
"geopandas[all]>=1.0.1",
|
| 11 |
+
"great-tables>=0.13.0",
|
| 12 |
+
"ipykernel>=6.29.5",
|
| 13 |
+
"matplotlib>=3.9.2",
|
| 14 |
+
"nbformat>=5.10.4",
|
| 15 |
+
"osmnx>=1.9.3",
|
| 16 |
+
"pandas>=2.2.3",
|
| 17 |
+
"plotly>=5.24.1",
|
| 18 |
+
"plotnine>=0.14.1",
|
| 19 |
+
"polars>=1.12.0",
|
| 20 |
+
"pygwalker>=0.4.9.13",
|
| 21 |
+
"pytest>=8.3.3",
|
| 22 |
+
"scipy>=1.14.1",
|
| 23 |
+
"seaborn>=0.13.2",
|
| 24 |
+
"streamlit>=1.40.0",
|
| 25 |
+
"watchdog>=5.0.3",
|
| 26 |
+
"xlsxwriter>=3.2.0",
|
| 27 |
+
]
|
| 28 |
+
|
| 29 |
+
[tool.uv]
|
| 30 |
+
dev-dependencies = [
|
| 31 |
+
"ipykernel>=6.29.5",
|
| 32 |
+
]
|
requirements.txt
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit
|
| 2 |
+
pandas
|
| 3 |
+
xlsxwriter
|
| 4 |
+
numpy
|
| 5 |
+
scipy
|
| 6 |
+
matplotlib
|
| 7 |
+
plotly
|
| 8 |
+
great-tables
|
| 9 |
+
polars
|
| 10 |
+
seaborn
|
| 11 |
+
geopandas[all]
|
| 12 |
+
contextily
|
| 13 |
+
plotly-express
|
| 14 |
+
altair
|
tests/test_main.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import pytest
|
| 4 |
+
|
| 5 |
+
from main import create_station_stats
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
@pytest.fixture
|
| 9 |
+
def sample_pivoted_data():
|
| 10 |
+
"""Create a sample pivoted dataset that matches the expected structure"""
|
| 11 |
+
# Create sample data
|
| 12 |
+
index = pd.MultiIndex.from_product(
|
| 13 |
+
[
|
| 14 |
+
["1.00", "3.20"], # Station_Number
|
| 15 |
+
[
|
| 16 |
+
"Depth, Secchi Disk Depth",
|
| 17 |
+
"Temperature, Water",
|
| 18 |
+
"Dissolved Oxygen",
|
| 19 |
+
"Turbidity",
|
| 20 |
+
"Salinity",
|
| 21 |
+
"pH",
|
| 22 |
+
], # Org_Analyte_Name
|
| 23 |
+
],
|
| 24 |
+
names=["Station_Number", "Org_Analyte_Name"],
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
# Create MultiIndex columns
|
| 28 |
+
columns = pd.MultiIndex.from_product(
|
| 29 |
+
[
|
| 30 |
+
["count", "max", "mean", "min"], # Aggregation functions
|
| 31 |
+
["Bottom", "Surface"], # Sample_Position
|
| 32 |
+
]
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Create sample data with consistent values
|
| 36 |
+
data = np.full((len(index), len(columns)), 10.0)
|
| 37 |
+
|
| 38 |
+
# Create DataFrame first
|
| 39 |
+
df = pd.DataFrame(data, index=index, columns=columns)
|
| 40 |
+
|
| 41 |
+
# Set count values to 100 using proper MultiIndex access
|
| 42 |
+
df.loc[:, ("count", "Bottom")] = 100
|
| 43 |
+
df.loc[:, ("count", "Surface")] = 100
|
| 44 |
+
|
| 45 |
+
return df
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def test_create_station_stats_basic(sample_pivoted_data):
|
| 49 |
+
"""Test basic functionality of create_station_stats"""
|
| 50 |
+
station = "3.20"
|
| 51 |
+
result = create_station_stats(sample_pivoted_data, station)
|
| 52 |
+
|
| 53 |
+
# Check basic structure
|
| 54 |
+
assert isinstance(result, pd.DataFrame)
|
| 55 |
+
assert len(result) == 4 # Average, Maximum, Minimum, n=
|
| 56 |
+
assert "Station" in result.columns
|
| 57 |
+
assert "Statistic" in result.columns
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def test_create_station_stats_values(sample_pivoted_data):
|
| 61 |
+
"""Test that values are correctly mapped from pivoted data"""
|
| 62 |
+
station = "3.20"
|
| 63 |
+
result = create_station_stats(sample_pivoted_data, station)
|
| 64 |
+
|
| 65 |
+
# Check specific values for Dissolved Oxygen
|
| 66 |
+
surface_do = result["Dissolved Oxygen (mg/L) Surface"].tolist()
|
| 67 |
+
assert surface_do == [10.0, 10.0, 10.0, 100] # mean, max, min, count
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def test_create_station_stats_columns(sample_pivoted_data):
|
| 71 |
+
"""Test that all expected columns are present"""
|
| 72 |
+
station = "3.20"
|
| 73 |
+
result = create_station_stats(sample_pivoted_data, station)
|
| 74 |
+
|
| 75 |
+
expected_columns = {
|
| 76 |
+
"Station",
|
| 77 |
+
"Statistic",
|
| 78 |
+
"Secchi Depth (feet)",
|
| 79 |
+
"Temperature (°C) Surface",
|
| 80 |
+
"Temperature (°C) Bottom",
|
| 81 |
+
"Dissolved Oxygen (mg/L) Surface",
|
| 82 |
+
"Dissolved Oxygen (mg/L) Bottom",
|
| 83 |
+
"Turbidity (NTU) Surface",
|
| 84 |
+
"Turbidity (NTU) Bottom",
|
| 85 |
+
"Salinity (ppt) Surface",
|
| 86 |
+
"Salinity (ppt) Bottom",
|
| 87 |
+
"pH Surface",
|
| 88 |
+
"pH Bottom",
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
assert set(result.columns) == expected_columns
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def test_create_station_stats_missing_data(sample_pivoted_data):
|
| 95 |
+
"""Test handling of missing data"""
|
| 96 |
+
station = "3.20"
|
| 97 |
+
# Introduce some NaN values
|
| 98 |
+
sample_pivoted_data.loc[(station, "pH"), ("mean", "Surface")] = np.nan
|
| 99 |
+
|
| 100 |
+
result = create_station_stats(sample_pivoted_data, station)
|
| 101 |
+
assert pd.isna(result["pH Surface"][0]) # Check if NaN is preserved
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def test_create_station_stats_statistics(sample_pivoted_data):
|
| 105 |
+
"""Test that statistics are in correct order"""
|
| 106 |
+
station = "3.20"
|
| 107 |
+
result = create_station_stats(sample_pivoted_data, station)
|
| 108 |
+
|
| 109 |
+
expected_statistics = ["Average", "Maximum", "Minimum", "n="]
|
| 110 |
+
assert result["Statistic"].tolist() == expected_statistics
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
def test_create_station_stats_invalid_station(sample_pivoted_data):
|
| 114 |
+
"""Test behavior with invalid station"""
|
| 115 |
+
with pytest.raises(KeyError):
|
| 116 |
+
create_station_stats(sample_pivoted_data, "invalid_station")
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|