diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..d016d98cfc301d7275d15f105df5c628c80df38e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,27 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install dependencies first (layer caching) +COPY requirements.txt . +COPY backend/requirements.txt backend/ +RUN pip install --no-cache-dir -r requirements.txt -r backend/requirements.txt + +# Non-root user for security +RUN groupadd -r solarwine && useradd -r -g solarwine solarwine + +# Copy application code (no Data/ — mount or fetch at runtime) +COPY src/ src/ +COPY config/ config/ +COPY backend/ backend/ + +ENV PYTHONPATH=/app + +# Switch to non-root +USER solarwine + +# HuggingFace Spaces requires port 7860 +EXPOSE 7860 +HEALTHCHECK --interval=30s --timeout=5s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/api/health')" || exit 1 +CMD ["uvicorn", "backend.api.main:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/README.md b/README.md index d05ea40b860de7bc066010e6b814fdfea7ae0530..9f6e73db0de8b648cab7071fcd01989a692c35e5 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,26 @@ --- -title: Api -emoji: 👁 -colorFrom: indigo -colorTo: pink +title: SolarWine API +emoji: 🌿 +colorFrom: green +colorTo: yellow sdk: docker -pinned: false -license: mit +app_port: 7860 +private: true --- -Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference +# SolarWine API + +FastAPI backend for the SolarWine agrivoltaic vineyard control system. + +## Endpoints + +- `GET /api/health` — health check +- `GET /api/weather/current` — current weather (IMS station 43) +- `GET /api/sensors/snapshot` — vine sensor readings (ThingsBoard) +- `GET /api/energy/current` — current power output +- `GET /api/photosynthesis/current` — photosynthesis rate (FvCB/ML) +- `GET /api/control/status` — last control loop tick +- `POST /api/chatbot/message` — AI vineyard advisor +- `GET /api/biology/rules` — biology rules + +Interactive docs at `/docs`. diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/api/__init__.py b/backend/api/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/api/auth.py b/backend/api/auth.py new file mode 100644 index 0000000000000000000000000000000000000000..4fdaeb5329fc833b32360935b9296f9d3fac6dcd --- /dev/null +++ b/backend/api/auth.py @@ -0,0 +1,120 @@ +""" +JWT authentication for the SolarWine API. + +Initially optional — endpoints work without auth. +Enable by setting JWT_SECRET in environment. + +Usage in routes:: + + from backend.api.auth import require_auth + + @router.get("/protected") + async def protected(user: dict = Depends(require_auth)): + return {"user": user} +""" + +from __future__ import annotations + +import os +import time +import logging +from typing import Optional + +from fastapi import Depends, HTTPException, status +from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer + +log = logging.getLogger(__name__) + +_security = HTTPBearer(auto_error=False) + +JWT_SECRET = os.environ.get("JWT_SECRET", "") +JWT_ALGORITHM = "HS256" +JWT_EXPIRY_HOURS = 24 + +if not JWT_SECRET: + log.warning("JWT_SECRET not set — authentication is DISABLED (all requests get guest/admin access)") + + +def _get_jwt(): + """Lazy import PyJWT.""" + try: + import jwt + return jwt + except ImportError: + log.warning("PyJWT not installed — auth disabled") + return None + + +# --------------------------------------------------------------------------- +# Token creation +# --------------------------------------------------------------------------- + +def create_token(username: str, role: str = "user") -> Optional[str]: + """Create a signed JWT token.""" + jwt = _get_jwt() + if not jwt or not JWT_SECRET: + return None + payload = { + "sub": username, + "role": role, + "iat": int(time.time()), + "exp": int(time.time()) + JWT_EXPIRY_HOURS * 3600, + } + return jwt.encode(payload, JWT_SECRET, algorithm=JWT_ALGORITHM) + + +# --------------------------------------------------------------------------- +# Token validation (FastAPI dependency) +# --------------------------------------------------------------------------- + +async def require_auth( + credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security), +) -> dict: + """Validate JWT and return payload. Raises 401 if invalid. + + When JWT_SECRET is not set, auth is bypassed (returns guest user). + """ + # Auth disabled — allow all + if not JWT_SECRET: + return {"sub": "guest", "role": "admin"} + + if not credentials: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Missing authorization header", + headers={"WWW-Authenticate": "Bearer"}, + ) + + jwt = _get_jwt() + if not jwt: + return {"sub": "guest", "role": "admin"} + + try: + payload = jwt.decode( + credentials.credentials, + JWT_SECRET, + algorithms=[JWT_ALGORITHM], + ) + return payload + except jwt.ExpiredSignatureError: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Token expired", + ) + except jwt.InvalidTokenError: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + detail="Invalid token", + ) + + +async def optional_auth( + credentials: Optional[HTTPAuthorizationCredentials] = Depends(_security), +) -> dict: + """Like require_auth but returns guest if no token provided or invalid.""" + if not credentials or not JWT_SECRET: + return {"sub": "guest", "role": "guest"} + try: + return await require_auth(credentials) + except HTTPException: + return {"sub": "guest", "role": "guest"} diff --git a/backend/api/deps.py b/backend/api/deps.py new file mode 100644 index 0000000000000000000000000000000000000000..1474e9d23fcc858fed3df81b1b472adbd43e9403 --- /dev/null +++ b/backend/api/deps.py @@ -0,0 +1,21 @@ +""" +Shared FastAPI dependencies — DataHub singleton + Redis. +""" + +from __future__ import annotations + +from functools import lru_cache + +from src.data.data_providers import DataHub +from src.data.redis_cache import get_redis + + +@lru_cache(maxsize=1) +def get_datahub() -> DataHub: + """Return a singleton DataHub (all services with Redis-backed caches).""" + return DataHub.default() + + +def get_redis_client(): + """Return the Redis client (or None).""" + return get_redis() diff --git a/backend/api/main.py b/backend/api/main.py new file mode 100644 index 0000000000000000000000000000000000000000..c0596aaffdcf592d1ce93cc6ecf894f6cfe55310 --- /dev/null +++ b/backend/api/main.py @@ -0,0 +1,120 @@ +""" +FastAPI application — SolarWine API Gateway. + +Deployed on HuggingFace Spaces (Docker SDK, port 7860). +""" + +from __future__ import annotations + +import logging +import os +import time +from contextlib import asynccontextmanager + +from fastapi import FastAPI, Request, Response +from fastapi.middleware.cors import CORSMiddleware +from slowapi import Limiter, _rate_limit_exceeded_handler +from slowapi.errors import RateLimitExceeded +from slowapi.util import get_remote_address + +from backend.api.routes import health, weather, sensors, energy, photosynthesis, control, chatbot, biology, login + +# --------------------------------------------------------------------------- +# Structured logging +# --------------------------------------------------------------------------- + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) +log = logging.getLogger("solarwine.api") + +# --------------------------------------------------------------------------- +# Sentry (optional — set SENTRY_DSN env var to enable) +# --------------------------------------------------------------------------- + +_sentry_dsn = os.environ.get("SENTRY_DSN", "") +if _sentry_dsn: + try: + import sentry_sdk + from sentry_sdk.integrations.fastapi import FastApiIntegration + from sentry_sdk.integrations.starlette import StarletteIntegration + sentry_sdk.init( + dsn=_sentry_dsn, + integrations=[StarletteIntegration(), FastApiIntegration()], + traces_sample_rate=0.1, + environment=os.environ.get("SENTRY_ENV", "production"), + ) + log.info("Sentry enabled (env=%s)", os.environ.get("SENTRY_ENV", "production")) + except ImportError: + log.warning("SENTRY_DSN set but sentry-sdk not installed — skipping") + +# --------------------------------------------------------------------------- +# Lifespan — one-time startup / shutdown +# --------------------------------------------------------------------------- + +_start_time: float = 0.0 + + +@asynccontextmanager +async def lifespan(app: FastAPI): + global _start_time + _start_time = time.time() + log.info("SolarWine API starting (port 7860)") + yield + log.info("SolarWine API shutting down (uptime=%.0fs)", get_uptime()) + + +def get_uptime() -> float: + return time.time() - _start_time + + +# --------------------------------------------------------------------------- +# App +# --------------------------------------------------------------------------- + +limiter = Limiter(key_func=get_remote_address, default_limits=["60/minute"]) + +app = FastAPI( + title="SolarWine API", + version="0.1.0", + description="Agrivoltaic vineyard control system API", + lifespan=lifespan, +) +app.state.limiter = limiter +app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler) + + +# -- Request logging -------------------------------------------------------- + +@app.middleware("http") +async def log_requests(request: Request, call_next): + start = time.time() + response = await call_next(request) + duration = (time.time() - start) * 1000 + if request.url.path != "/api/health": # skip noisy health checks + log.info("%s %s %d %.0fms", request.method, request.url.path, response.status_code, duration) + return response + +# -- CORS ------------------------------------------------------------------- + +allowed_origins = os.environ.get("ALLOWED_ORIGINS", "http://localhost:3000,http://localhost:5173").split(",") +app.add_middleware( + CORSMiddleware, + allow_origins=[o.strip() for o in allowed_origins], + allow_credentials=True, + allow_methods=["GET", "POST", "OPTIONS"], + allow_headers=["Content-Type", "Authorization"], +) + +# -- Routes ----------------------------------------------------------------- + +app.include_router(health.router, prefix="/api", tags=["health"]) +app.include_router(weather.router, prefix="/api/weather", tags=["weather"]) +app.include_router(sensors.router, prefix="/api/sensors", tags=["sensors"]) +app.include_router(energy.router, prefix="/api/energy", tags=["energy"]) +app.include_router(photosynthesis.router, prefix="/api/photosynthesis", tags=["photosynthesis"]) +app.include_router(control.router, prefix="/api/control", tags=["control"]) +app.include_router(chatbot.router, prefix="/api/chatbot", tags=["chatbot"]) +app.include_router(biology.router, prefix="/api/biology", tags=["biology"]) +app.include_router(login.router, prefix="/api/auth", tags=["auth"]) diff --git a/backend/api/routes/__init__.py b/backend/api/routes/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/api/routes/biology.py b/backend/api/routes/biology.py new file mode 100644 index 0000000000000000000000000000000000000000..0bee74147616a6104ac9ca1b0bd7ecf21e8c2724 --- /dev/null +++ b/backend/api/routes/biology.py @@ -0,0 +1,38 @@ +"""Biology endpoints — wraps BiologyService.""" + +from __future__ import annotations + +import logging + +from fastapi import APIRouter, Depends +from fastapi.responses import JSONResponse + +from backend.api.deps import get_datahub +from src.data.data_providers import DataHub + +log = logging.getLogger(__name__) +router = APIRouter() + + +@router.get("/phenology") +async def phenology(hub: DataHub = Depends(get_datahub)): + """Current phenological stage (GDD-based).""" + try: + from src.models.phenology import estimate_stage_combined + stage = estimate_stage_combined() + return {"stage": stage.name if hasattr(stage, "name") else str(stage)} + except Exception as exc: + log.error("Phenology estimation failed: %s", exc) + return JSONResponse(status_code=500, content={"error": "Phenology estimation failed"}) + + +@router.get("/rules") +async def biology_rules(hub: DataHub = Depends(get_datahub)): + """List all biology rules.""" + return hub.biology.list_rules() + + +@router.get("/rules/{rule_name}") +async def biology_rule_detail(rule_name: str, hub: DataHub = Depends(get_datahub)): + """Explain a specific biology rule.""" + return hub.biology.explain_rule(rule_name) diff --git a/backend/api/routes/chatbot.py b/backend/api/routes/chatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..612637d7daff534397d21c2bdc75f37db32e4a4b --- /dev/null +++ b/backend/api/routes/chatbot.py @@ -0,0 +1,79 @@ +"""Chatbot endpoints — wraps VineyardChatbot.""" + +from __future__ import annotations + +import logging +import threading + +from pydantic import BaseModel, Field +from fastapi import APIRouter, Depends, Request +from fastapi.responses import JSONResponse +from slowapi import Limiter +from slowapi.util import get_remote_address + +from backend.api.deps import get_datahub +from src.data.data_providers import DataHub + +log = logging.getLogger(__name__) +limiter = Limiter(key_func=get_remote_address) +router = APIRouter() + + +class ChatRequest(BaseModel): + message: str = Field(..., min_length=1, max_length=4000) + session_id: str = "default" + + +class FeedbackRequest(BaseModel): + session_id: str + message_id: str + rating: str = Field(..., pattern=r"^(up|down|flag)$") + comment: str = Field("", max_length=2000) + + +# Thread-safe lazy chatbot init +_chatbot = None +_chatbot_lock = threading.Lock() + + +def _get_chatbot(hub: DataHub): + global _chatbot + if _chatbot is not None: + return _chatbot + with _chatbot_lock: + if _chatbot is None: + from src.chatbot.vineyard_chatbot import VineyardChatbot + _chatbot = VineyardChatbot(hub=hub) + return _chatbot + + +@router.post("/message") +@limiter.limit("10/minute") +async def chat_message(request: Request, req: ChatRequest, hub: DataHub = Depends(get_datahub)): + bot = _get_chatbot(hub) + response = bot.chat(req.message) + return { + "message": response.message, + "confidence": getattr(response, "confidence", None), + "sources": getattr(response, "sources", []), + "caveats": getattr(response, "caveats", []), + "rule_violations": getattr(response, "rule_violations", []), + "response_mode": getattr(response, "response_mode", "info"), + } + + +@router.post("/feedback") +@limiter.limit("60/minute") +async def chat_feedback(request: Request, req: FeedbackRequest): + try: + from src.chatbot.feedback import log_feedback + log_feedback( + session_id=req.session_id, + message_id=req.message_id, + rating=req.rating, + comment=req.comment, + ) + return {"status": "ok"} + except Exception as exc: + log.error("Feedback logging failed: %s", exc) + return JSONResponse(status_code=500, content={"error": "Feedback logging failed"}) diff --git a/backend/api/routes/control.py b/backend/api/routes/control.py new file mode 100644 index 0000000000000000000000000000000000000000..7aed8ab381374fb70ee0b9eb8dbf8f5a2f863d99 --- /dev/null +++ b/backend/api/routes/control.py @@ -0,0 +1,77 @@ +"""Control system endpoints — reads state from Redis.""" + +from __future__ import annotations + +import logging + +from fastapi import APIRouter +from fastapi.responses import JSONResponse + +from backend.api.deps import get_datahub, get_redis_client + +log = logging.getLogger(__name__) +router = APIRouter() + + +@router.get("/status") +async def control_status(): + """Last ControlLoop tick result (stored in Redis by the worker).""" + redis = get_redis_client() + if redis: + data = redis.get_json("control:last_tick") + if data: + return data + return JSONResponse( + status_code=503, + content={"error": "No tick result available (worker may not have run yet)"}, + ) + + +@router.get("/plan") +async def control_plan(): + """Current day-ahead plan.""" + redis = get_redis_client() + if redis: + data = redis.get_json("control:plan") + if data: + return data + # Fallback: try loading from file + try: + import json + from config.settings import DAILY_PLAN_PATH + with open(DAILY_PLAN_PATH) as f: + return json.load(f) + except FileNotFoundError: + return JSONResponse(status_code=404, content={"error": "No plan available"}) + except Exception as exc: + log.error("Failed to load plan from file: %s", exc) + return JSONResponse(status_code=500, content={"error": "Plan loading failed"}) + + +@router.get("/budget") +async def control_budget(): + """Current energy budget state.""" + redis = get_redis_client() + if redis: + data = redis.get_json("control:budget") + if data: + return data + return JSONResponse( + status_code=503, + content={"error": "No budget data available"}, + ) + + +@router.get("/trackers") +async def control_trackers(): + """Live tracker angles from ThingsBoard.""" + hub = get_datahub() + try: + snapshot = hub.vine_sensors.get_snapshot(light=True) + return {"trackers": snapshot.get("trackers", {}), "source": "ThingsBoard"} + except Exception as exc: + log.error("Tracker fetch failed: %s", exc) + return JSONResponse( + status_code=502, + content={"error": "Tracker fetch failed"}, + ) diff --git a/backend/api/routes/energy.py b/backend/api/routes/energy.py new file mode 100644 index 0000000000000000000000000000000000000000..4c61896c79147c9326f796d27152d8d0b7cdabd2 --- /dev/null +++ b/backend/api/routes/energy.py @@ -0,0 +1,45 @@ +"""Energy endpoints — wraps EnergyService.""" + +from __future__ import annotations + +import re + +from fastapi import APIRouter, Depends, HTTPException, Query + +from backend.api.deps import get_datahub +from src.data.data_providers import DataHub + +router = APIRouter() + +_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$") + + +def _validate_date(value: str) -> str: + if not _DATE_RE.match(value): + raise HTTPException(status_code=400, detail=f"Invalid date format: {value!r}. Expected YYYY-MM-DD") + return value + + +@router.get("/current") +async def energy_current(hub: DataHub = Depends(get_datahub)): + return hub.energy.get_current() + + +@router.get("/daily/{target_date}") +async def energy_daily(target_date: str, hub: DataHub = Depends(get_datahub)): + _validate_date(target_date) + return hub.energy.get_daily_production(target_date=target_date) + + +@router.get("/history") +async def energy_history( + hours: int = Query(24, ge=1, le=8760, description="Hours of history (1–8760)"), + hub: DataHub = Depends(get_datahub), +): + return hub.energy.get_history(hours_back=hours) + + +@router.get("/predict/{target_date}") +async def energy_predict(target_date: str, hub: DataHub = Depends(get_datahub)): + _validate_date(target_date) + return hub.energy.predict(target_date=target_date) diff --git a/backend/api/routes/health.py b/backend/api/routes/health.py new file mode 100644 index 0000000000000000000000000000000000000000..e084d50abfb0124236e8480f5edb625fab2c1ef0 --- /dev/null +++ b/backend/api/routes/health.py @@ -0,0 +1,49 @@ +"""Health check endpoint.""" + +from __future__ import annotations + +import asyncio +import os + +from fastapi import APIRouter + +from backend.api.deps import get_redis_client + +router = APIRouter() + + +async def _check_thingsboard() -> bool: + """Check ThingsBoard connectivity without blocking the event loop.""" + tb_host = os.environ.get("THINGSBOARD_HOST", "") + if not tb_host: + return False + try: + import urllib.request + url = f"{tb_host.rstrip('/')}/api/noauth/health" + # Run sync urllib in thread pool to avoid blocking event loop + loop = asyncio.get_event_loop() + resp = await loop.run_in_executor( + None, lambda: urllib.request.urlopen(url, timeout=3) + ) + return resp.status == 200 + except Exception: + return False + + +@router.get("/health") +async def health(): + redis = get_redis_client() + redis_ok = redis.ping() if redis else False + + from backend.api.main import get_uptime + + tb_ok = await _check_thingsboard() + + return { + "status": "ok", + "uptime_seconds": round(get_uptime(), 1), + "redis_connected": redis_ok, + "thingsboard_reachable": tb_ok, + "ims_configured": bool(os.environ.get("IMS_API_TOKEN")), + "gemini_configured": bool(os.environ.get("GOOGLE_API_KEY")), + } diff --git a/backend/api/routes/login.py b/backend/api/routes/login.py new file mode 100644 index 0000000000000000000000000000000000000000..97719510e810017587c1793e85ba85bb42f56947 --- /dev/null +++ b/backend/api/routes/login.py @@ -0,0 +1,58 @@ +"""Login endpoint — issues JWT tokens.""" + +from __future__ import annotations + +import logging +import os +import secrets + +from pydantic import BaseModel, Field +from fastapi import APIRouter, HTTPException, Request +from slowapi import Limiter +from slowapi.util import get_remote_address + +from backend.api.auth import create_token + +log = logging.getLogger(__name__) + +limiter = Limiter(key_func=get_remote_address) +router = APIRouter() + +# Simple user store — replace with a real DB later. +# For now, a single admin user configured via environment. +_ADMIN_USER = os.environ.get("ADMIN_USERNAME", "admin") +_ADMIN_PASS = os.environ.get("ADMIN_PASSWORD", "") + +if not _ADMIN_PASS: + log.warning("ADMIN_PASSWORD not set — login endpoint will return 503") + + +class LoginRequest(BaseModel): + username: str = Field(..., min_length=1, max_length=100) + password: str = Field(..., min_length=1, max_length=200) + + +class LoginResponse(BaseModel): + access_token: str + token_type: str = "bearer" + + +@router.post("/login", response_model=LoginResponse) +@limiter.limit("5/minute") +async def login(request: Request, req: LoginRequest): + """Authenticate and return a JWT token.""" + if not _ADMIN_PASS: + raise HTTPException(status_code=503, detail="Auth not configured") + + # Constant-time comparison to prevent timing attacks + user_ok = secrets.compare_digest(req.username, _ADMIN_USER) + pass_ok = secrets.compare_digest(req.password, _ADMIN_PASS) + + if not user_ok or not pass_ok: + raise HTTPException(status_code=401, detail="Invalid credentials") + + token = create_token(username=req.username, role="admin") + if not token: + raise HTTPException(status_code=503, detail="JWT not configured") + + return LoginResponse(access_token=token) diff --git a/backend/api/routes/photosynthesis.py b/backend/api/routes/photosynthesis.py new file mode 100644 index 0000000000000000000000000000000000000000..a613d52c83174b7644e4647b255c031c1f22f88e --- /dev/null +++ b/backend/api/routes/photosynthesis.py @@ -0,0 +1,30 @@ +"""Photosynthesis endpoints — wraps PhotosynthesisService.""" + +from __future__ import annotations + +from enum import Enum + +from fastapi import APIRouter, Depends, Query + +from backend.api.deps import get_datahub +from src.data.data_providers import DataHub + +router = APIRouter() + + +class PSModel(str, Enum): + fvcb = "fvcb" + ml = "ml" + + +@router.get("/current") +async def ps_current( + model: PSModel = Query(PSModel.fvcb, description="Model: fvcb or ml"), + hub: DataHub = Depends(get_datahub), +): + return hub.photosynthesis.get_current(model=model.value) + + +@router.get("/forecast") +async def ps_forecast(hub: DataHub = Depends(get_datahub)): + return hub.photosynthesis.forecast_day_ahead() diff --git a/backend/api/routes/sensors.py b/backend/api/routes/sensors.py new file mode 100644 index 0000000000000000000000000000000000000000..14a34eb6c2d61111d29ce0a7d9990d2298b5983d --- /dev/null +++ b/backend/api/routes/sensors.py @@ -0,0 +1,47 @@ +"""Vine sensor endpoints — wraps VineSensorService.""" + +from __future__ import annotations + +from enum import Enum +from typing import Optional + +from fastapi import APIRouter, Depends, Query + +from backend.api.deps import get_datahub +from src.data.data_providers import DataHub + +router = APIRouter() + + +class DeviceType(str, Enum): + crop = "crop" + air = "air" + soil = "soil" + + +class AreaType(str, Enum): + treatment = "treatment" + reference = "reference" + ambient = "ambient" + + +@router.get("/snapshot") +async def sensors_snapshot( + light: bool = False, + hub: DataHub = Depends(get_datahub), +): + return hub.vine_sensors.get_snapshot(light=light) + + +@router.get("/history") +async def sensors_history( + type: DeviceType = Query(DeviceType.crop, description="Device type"), + area: Optional[AreaType] = Query(None, description="Area filter"), + hours: int = Query(24, ge=1, le=8760, description="Hours of history (1–8760)"), + hub: DataHub = Depends(get_datahub), +): + return hub.vine_sensors.get_history( + device_type=type.value, + area=area.value if area else None, + hours_back=hours, + ) diff --git a/backend/api/routes/weather.py b/backend/api/routes/weather.py new file mode 100644 index 0000000000000000000000000000000000000000..a3f0ead6f347b13b41f54590b41d079d697559f9 --- /dev/null +++ b/backend/api/routes/weather.py @@ -0,0 +1,48 @@ +"""Weather endpoints — wraps WeatherService.""" + +from __future__ import annotations + +import re +from datetime import date, timedelta + +from fastapi import APIRouter, Depends, HTTPException, Query +from fastapi.responses import JSONResponse + +from backend.api.deps import get_datahub +from src.data.data_providers import DataHub + +router = APIRouter() + +_DATE_RE = re.compile(r"^\d{4}-\d{2}-\d{2}$") + + +def _validate_date(value: str) -> str: + if not _DATE_RE.match(value): + raise HTTPException(status_code=400, detail=f"Invalid date format: {value!r}. Expected YYYY-MM-DD") + return value + + +@router.get("/current") +async def weather_current(hub: DataHub = Depends(get_datahub)): + return hub.weather.get_current() + + +@router.get("/history") +async def weather_history( + start_date: str = Query(None, description="Start date (YYYY-MM-DD). Default: 7 days ago"), + end_date: str = Query(None, description="End date (YYYY-MM-DD). Default: today"), + hub: DataHub = Depends(get_datahub), +): + end = end_date or str(date.today()) + start = start_date or str(date.today() - timedelta(days=7)) + _validate_date(start) + _validate_date(end) + return hub.weather.get_history(start_date=start, end_date=end) + + +@router.get("/forecast") +async def weather_forecast(hub: DataHub = Depends(get_datahub)): + try: + return hub.weather.get_forecast() + except AttributeError: + return JSONResponse(status_code=501, content={"error": "Forecast not implemented yet"}) diff --git a/backend/requirements.txt b/backend/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..20604bb3bc15cff663a94e34b1ae9996aeedddcd --- /dev/null +++ b/backend/requirements.txt @@ -0,0 +1,7 @@ +# Backend-specific dependencies (on top of root requirements.txt) +fastapi>=0.115.0 +uvicorn[standard]>=0.34.0 +pydantic>=2.0 +slowapi>=0.2.0 +PyJWT>=2.8.0 +sentry-sdk[fastapi]>=2.0 # optional: set SENTRY_DSN to enable diff --git a/backend/workers/__init__.py b/backend/workers/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/backend/workers/control_tick.py b/backend/workers/control_tick.py new file mode 100644 index 0000000000000000000000000000000000000000..8fe2ebb51fc6d7700dd7913a44209fe6dc593e0d --- /dev/null +++ b/backend/workers/control_tick.py @@ -0,0 +1,124 @@ +""" +ControlLoop single-tick worker. + +Entry point for GitHub Actions cron (every 15 min). +Usage: + python -m backend.workers.control_tick + python -m backend.workers.control_tick --dry-run +""" + +from __future__ import annotations + +import argparse +import json +import logging +import sys +from datetime import datetime, timezone +from pathlib import Path + +# Ensure project root is on sys.path +PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +# Load .env if present (local dev) +try: + from dotenv import load_dotenv + load_dotenv(PROJECT_ROOT / ".env") +except ImportError: + pass + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) +log = logging.getLogger("control_tick") + + +def main(): + parser = argparse.ArgumentParser(description="Run one ControlLoop tick") + parser.add_argument("--dry-run", action="store_true", help="Compute decisions without dispatching") + args = parser.parse_args() + + from src.control_loop import ControlLoop + from src.data.redis_cache import get_redis + + log.info("Starting control tick (dry_run=%s)", args.dry_run) + + loop = ControlLoop(dry_run=args.dry_run) + result = loop.tick() + + # Serialise result + result_dict = result.__dict__ if hasattr(result, "__dict__") else {"raw": str(result)} + result_dict["_timestamp"] = datetime.now(timezone.utc).isoformat() + result_dict["_dry_run"] = args.dry_run + + # Store in Redis for the API to read + redis = get_redis() + if redis: + # Convert to JSON-safe dict + safe = json.loads(json.dumps(result_dict, default=str)) + redis.set_json("control:last_tick", safe, ttl=1200) # 20 min TTL + log.info("Tick result saved to Redis") + else: + log.warning("Redis not available — tick result not persisted") + + log.info("Tick complete: %s", json.dumps(result_dict, default=str, indent=2)[:500]) + + # Budget alert: warn if >80% spent before 14:00 IST + _check_budget_alert(result_dict) + + +def _check_budget_alert(tick: dict) -> None: + """Log a warning (visible in GitHub Actions) if budget is nearly exhausted.""" + import os + try: + from datetime import datetime, timezone, timedelta + now_utc = datetime.now(timezone.utc) + now_israel = now_utc + timedelta(hours=2) # approximate IST + + remaining = tick.get("budget_remaining_kwh", None) + if remaining is None or remaining == 0: + return # no budget data or dormant season + + # Only alert before 14:00 IST (still daylight hours left) + if now_israel.hour >= 14: + return + + # Get today's total budget from Redis + from src.data.redis_cache import get_redis + redis = get_redis() + if not redis: + return + budget_data = redis.get_json("control:budget") + if not budget_data or "plan" not in budget_data: + return + + plan = budget_data["plan"] + total = sum(plan.get("slot_budgets", {}).values()) + plan.get("daily_margin_remaining_kWh", 0) + spent = plan.get("cumulative_spent", 0) + + if total > 0 and spent / (total + spent) > 0.8: + log.warning( + "BUDGET ALERT: %.1f%% of daily budget spent before %02d:00 IST " + "(spent=%.3f kWh, remaining=%.3f kWh)", + spent / (total + spent) * 100, + now_israel.hour, + spent, + remaining, + ) + # Future: send webhook/email here + webhook_url = os.environ.get("BUDGET_ALERT_WEBHOOK") + if webhook_url: + import requests + requests.post(webhook_url, json={ + "text": f"SolarWine Budget Alert: {spent/(total+spent)*100:.0f}% spent before {now_israel.hour}:00 IST", + "spent_kwh": round(spent, 3), + "remaining_kwh": round(remaining, 3), + }, timeout=5) + except Exception as exc: + log.debug("Budget alert check failed: %s", exc) + + +if __name__ == "__main__": + main() diff --git a/backend/workers/daily_planner.py b/backend/workers/daily_planner.py new file mode 100644 index 0000000000000000000000000000000000000000..eaff087ad6a02c1330e83ee544d8f40d2726c481 --- /dev/null +++ b/backend/workers/daily_planner.py @@ -0,0 +1,72 @@ +""" +Day-ahead planner worker. + +Entry point for GitHub Actions cron (daily 05:00 IST = 02:00 UTC). +Usage: + python -m backend.workers.daily_planner +""" + +from __future__ import annotations + +import json +import logging +import sys +from datetime import date, datetime, timezone +from pathlib import Path + +# Ensure project root is on sys.path +PROJECT_ROOT = Path(__file__).resolve().parent.parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +# Load .env if present (local dev) +try: + from dotenv import load_dotenv + load_dotenv(PROJECT_ROOT / ".env") +except ImportError: + pass + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", +) +log = logging.getLogger("daily_planner") + + +def main(): + from src.day_ahead_planner import DayAheadPlanner + from src.data.redis_cache import get_redis + from config.settings import DAILY_PLAN_PATH + + target = date.today() + log.info("Computing day-ahead plan for %s", target) + + planner = DayAheadPlanner() + plan = planner.plan(target_date=target) + + plan_dict = plan.to_dict() if hasattr(plan, "to_dict") else {"raw": str(plan)} + plan_dict["_computed_at"] = datetime.now(timezone.utc).isoformat() + + # Save to file (backup) + try: + Path(DAILY_PLAN_PATH).parent.mkdir(parents=True, exist_ok=True) + with open(DAILY_PLAN_PATH, "w") as f: + json.dump(plan_dict, f, default=str, indent=2) + log.info("Plan saved to %s", DAILY_PLAN_PATH) + except Exception as exc: + log.error("Failed to save plan file: %s", exc) + + # Save to Redis + redis = get_redis() + if redis: + safe = json.loads(json.dumps(plan_dict, default=str)) + redis.set_json("control:plan", safe, ttl=86400) # 24h TTL + log.info("Plan saved to Redis") + else: + log.warning("Redis not available — plan not shared") + + log.info("Plan complete: %d slots", len(plan_dict.get("slots", []))) + + +if __name__ == "__main__": + main() diff --git a/config/settings.py b/config/settings.py new file mode 100644 index 0000000000000000000000000000000000000000..41f43f75161e5cf790d2d1887f1068ccf5f4e3dd --- /dev/null +++ b/config/settings.py @@ -0,0 +1,204 @@ +# Configuration: paths, IMS station/channel config, model params + +from pathlib import Path + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +DATA_DIR = PROJECT_ROOT / "Data" +IMS_CACHE_DIR = DATA_DIR / "ims" +PROCESSED_DIR = DATA_DIR / "processed" +OUTPUTS_DIR = PROJECT_ROOT / "outputs" + +# On-site sensor data (Stage 1) +SEYMOUR_DIR = DATA_DIR / "Seymour" +SENSORS_WIDE_PATH = SEYMOUR_DIR / "sensors_wide.csv" +SENSORS_WIDE_SAMPLE_PATH = SEYMOUR_DIR / "sensors_wide_sample.csv" +SENSORS_WIDE_METADATA_PATH = SEYMOUR_DIR / "sensors_wide_metadata.csv" + +# IMS API (station 43 - Sde Boker) +IMS_STATION_ID = 43 +IMS_BASE_URL = "https://api.ims.gov.il/v1/envista/stations" + +# Station 43 channel IDs -> output column names (from --list-channels) +IMS_CHANNEL_MAP = { + 6: "air_temperature_c", # TD + 8: "tdmax_c", # TDmax + 9: "tdmin_c", # TDmin + 10: "ghi_w_m2", # Grad (GHI) + 7: "rh_percent", # RH + 20: "rain_mm", # Rain + 3: "wind_speed_ms", # WS + # Station 43 has no BP; WD optional: 4 +} + +# Preprocessor +TRAIN_RATIO = 0.75 + +# Growing season: vine is dormant Oct–April (no photosynthesis). Keep May–September only. +GROWING_SEASON_MONTHS = (5, 6, 7, 8, 9) # May through September + +# Site location (Sde Boker, Israel) +SITE_LATITUDE = 30.87 +SITE_LONGITUDE = 34.79 +SITE_ALTITUDE = 475.0 # meters + +# Agrivoltaic panel geometry +PANEL_WIDTH = 1.13 # m (E-W dimension) +PANEL_HEIGHT = 2.05 # m above ground +ROW_SPACING = 3.0 # m between vine row centers +CANOPY_HEIGHT = 1.2 # m (VSP trellis) +CANOPY_WIDTH = 0.6 # m +ROW_AZIMUTH = 315.0 # degrees CW from north (NW–SE row orientation) + +# === TRACKER CONSTRAINTS === +TRACKER_MAX_ANGLE = 60.0 # degrees — mechanical limit of single-axis tracker +TRACKER_GCR = 0.377 # ground coverage ratio (panel_width / row_spacing) + +# === TRACKER ID MAPPING === +# Canonical mapping between integer IDs (DB/fleet) and string names (ThingsBoard) +TRACKER_ID_MAP = { + 501: "Tracker501", + 502: "Tracker502", + 503: "Tracker503", + 509: "Tracker509", +} + +# --------------------------------------------------------------------------- +# SolarWine 2.0 — Control System Parameters +# --------------------------------------------------------------------------- + +# === PV SYSTEM === +SYSTEM_CAPACITY_KW = 48.0 # DC nameplate capacity (from ThingsBoard Digital Twin) +STC_IRRADIANCE_W_M2 = 1000.0 # Standard Test Conditions irradiance for normalisation + +# === ENERGY BUDGET === +# Hard ceiling: fraction of annual PV generation the vines can "spend" on shading. +MAX_ENERGY_REDUCTION_PCT = 5.0 # % of annual generation (user's hard ceiling) +ANNUAL_RESERVE_PCT = 15.0 # emergency reserve — not allocated to any month +WEEKLY_RESERVE_PCT = 20.0 # within-week flexibility buffer +DAILY_MARGIN_PCT = 20.0 # real-time response pool within the day + +# Monthly budget weights — must sum to 1.0 across growing season. +# May budget is very low (extreme heat emergency only); the 3D model will +# naturally produce no effective dose in most May slots because fruit-set +# geometry and low stress do not warrant intervention. +MONTHLY_BUDGET_WEIGHTS = { + 5: 0.02, # May — near-zero; extreme emergency only (fruit-set geometry protects naturally) + 6: 0.05, # June — rare; only extreme heat spikes + 7: 0.45, # July — peak heat; primary shading window + 8: 0.40, # August — sustained heat; fruit ripening / sunburn risk + 9: 0.08, # Sept — occasional late heat waves +} + +# === NO-SHADE WINDOWS (hard constraints — shading PROHIBITED) === +# These are enforced by the InterventionGate AND the chatbot guardrails. +NO_SHADE_BEFORE_HOUR = 10 # local solar time — morning light is sacred for carbon fixation +NO_SHADE_MONTHS = [5] # May — full spring exposure for flowering / fruit set +NO_SHADE_GHI_BELOW = 300 # W/m² — overcast, already diffuse; no stress to relieve +NO_SHADE_TLEAF_BELOW = 28.0 # °C — below RuBP→Rubisco transition zone; vine wants light + +# === SHADE-ELIGIBLE CONDITIONS (ALL must be true to allow intervention) === +SHADE_ELIGIBLE_TLEAF_ABOVE = 30.0 # °C — Semillon Rubisco transition (heat bottleneck) +SHADE_ELIGIBLE_CWSI_ABOVE = 0.4 # moderate water stress confirmed by sensors +SHADE_ELIGIBLE_GHI_ABOVE = 400 # W/m² — significant direct radiation load (night/deep-overcast guard) +SHADE_ELIGIBLE_HOURS = (10, 16) # local solar time window (10:00–16:00) + +# Minimum GHI below which the sun is too weak to cause stress (night, dense cloud). +# No offset can help; skip shadow computation entirely. +MIN_MEANINGFUL_GHI = 100 # W/m² + +# === FRUITING ZONE === +FRUITING_ZONE_INDEX = 1 # mid-canopy zone in the 3-zone ShadowModel (0=basal, 1=fruiting, 2=apical) +FRUITING_ZONE_HEIGHT_M = 0.6 # center height of grape cluster zone (m) +BERRY_SUNBURN_TEMP_C = 35.0 # berry surface temperature damage threshold (°C) +FRUITING_ZONE_TARGET_PAR = 400 # µmol/m²/s — quality threshold; above this → sunburn risk + +# === TRADEOFF ENGINE === +# Candidate shading offsets tested in order (minimum-dose search: stop at first effective offset). +CANDIDATE_OFFSETS = [0, 3, 5, 8, 10, 15, 20] # degrees off astronomical position +SIMULATION_TIMEOUT_SEC = 5 # max seconds for one offset simulation + +# === SAFETY RAILS === +DIVERGENCE_THRESHOLD = 0.12 # 12% — if |FvCB_A - ML_A| / max > threshold → fallback to FvCB + +# === SEMILLON FvCB — Rubisco transition === +SEMILLON_TRANSITION_TEMP_C = 30.0 # °C — below: RuBP-limited (light bottleneck); above: Rubisco-limited (heat bottleneck) + +# === WEATHER PROTECTION / OPERATIONAL MODES === +WIND_STOW_SPEED_MS = 15.0 # m/s — panels stow flat (0°) above this wind speed +HEAT_SHIELD_TEMP_C = 38.0 # °C — emergency heat shield: maximum shade regardless of budget +HEAT_SHIELD_CWSI = 0.6 # CWSI threshold that activates heat shield + +# === MECHANICAL HARVESTING === +HARVEST_PARK_CLEARANCE_CM = 250 # cm — minimum clearance for harvesting machine +HARVEST_LATERAL_WIDTH_CM = 18 # cm — lateral harvester arm width +HARVESTER_RPM_RANGE = (430, 460) # harvester operating RPM range + +# === HYSTERESIS (command arbiter) === +HYSTERESIS_WINDOW_MIN = 15 # minutes — minimum time between consecutive tilt changes +ANGLE_TOLERANCE_DEG = 2.0 # degrees — changes smaller than this are suppressed + +# === PLAN DIVERGENCE RE-PLANNING === +PLAN_DIVERGENCE_THRESHOLD_KWH = 0.5 # cumulative |planned − actual| energy that triggers re-plan +PLAN_DIVERGENCE_THRESHOLD_SLOTS = 4 # consecutive divergent slots that triggers re-plan +PLAN_REPLAN_COOLDOWN_SLOTS = 8 # minimum slots between re-plans (~2 hours) + +# === ROI / LAND EQUIVALENT RATIO === +TARGET_LER = 1.5 # Land Equivalent Ratio target (energy + crop combined) + +# --------------------------------------------------------------------------- +# Agronomic Value Weighting +# --------------------------------------------------------------------------- + +# Spatial zone weights for crop value calculation. +# The 3-zone ShadowModel: zone 0 = basal/trunk (~0.2m), zone 1 = fruiting (~0.6m), zone 2 = apical (~1.0m). +# During veraison, zone 2 (upper canopy) has the highest marginal value for sugar loading. +ZONE_CROP_WEIGHTS = { + "pre_veraison": [0.25, 0.35, 0.40], # [zone0, zone1, zone2] + "veraison": [0.10, 0.30, 0.60], # apical leaves dominate sugar loading + "post_harvest": [0.15, 0.15, 0.70], # reserve building; top canopy matters most +} + +# Temporal (phenological stage) crop value multipliers. +# Applied on top of zone weights; reflects how much each unit of photosynthesis +# contributes to final economic yield at different growth stages. +STAGE_CROP_MULTIPLIER = { + "pre_flowering": 1.2, # setting yield capacity (bunch number, berry set) + "fruit_set": 1.0, # baseline — rapid cell division + "veraison": 1.5, # sugar loading; highest crop value per unit carbon + "post_harvest": 0.5, # reserve building only; energy production prioritized +} + +# Growing Degree Day thresholds for Semillon at Sde Boker (base temperature 10°C). +PHENOLOGY_GDD_THRESHOLDS = { + "budburst": 0, # GDD accumulation starts ~March + "flowering": 350, # ~May + "fruit_set": 500, # ~early June + "veraison": 1200, # ~mid July + "harvest": 1800, # ~late August / early September +} + +# --------------------------------------------------------------------------- +# Day-Ahead DP Planner +# --------------------------------------------------------------------------- + +DP_SLOTS_PER_DAY = 96 # 15-min intervals × 24 h +DP_SLOT_DURATION_MIN = 15 # minutes per slot +DP_MOVEMENT_COST = 0.5 # penalty per degree of tilt change (kWh-equivalent) + # biases optimizer toward smooth trajectories + +# Flat energy price (ILS/kWh) used when real-time tariff is unavailable. +# Replace with time-of-use tariff schedule for production. +DP_FLAT_ENERGY_PRICE_ILS_KWH = 0.35 + +# Base crop value (ILS / µmol CO₂ m⁻² s⁻¹ per 15-min slot) used in the +# DP utility function U_t(θ) = Price_energy · E_t(θ) + Price_crop · A_t(θ). +# Calibrate from vineyard revenue per kg grape × expected yield per A unit. +DP_BASE_CROP_VALUE = 0.10 + +# --------------------------------------------------------------------------- +# Simulation Log Storage +# --------------------------------------------------------------------------- + +SIMULATION_LOG_DIR = DATA_DIR / "simulation_logs" +SIMULATION_LOG_PATH = SIMULATION_LOG_DIR / "control_loop.parquet" +DAILY_PLAN_PATH = DATA_DIR / "daily_plan.json" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..25164c5ace7237dbb243dd96cfad51f4a7da83f4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,18 @@ +# Photosynthesis Prediction Model - dependencies +# Install: pip install -r requirements.txt + +pandas==2.3.3 +numpy==2.4.2 +scikit-learn==1.8.0 +matplotlib==3.10.8 +seaborn==0.13.2 +requests==2.32.5 +python-dotenv==1.2.1 +streamlit==1.54.0 +plotly==6.5.2 +xgboost>=2.0 +pvlib>=0.10.0 +astral>=3.2 +chronos-forecasting>=2.0 +torch>=2.0 +google-genai>=1.0 diff --git a/src/__init__.py b/src/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..841660d8bd3df7bdfb5aba7f75c5ce183900f4a8 --- /dev/null +++ b/src/__init__.py @@ -0,0 +1,52 @@ +# SolarWine src package — re-exports for backward compatibility +# Modules live in subpackages: data, models, forecasting, shading, advisor, chatbot, genai + +import sys + +# Map old flat names to new subpackage locations +_REDIRECTS = { + # data + "ims_client": "src.data.ims_client", + "sensor_data_loader": "src.data.sensor_data_loader", + "data_schema": "src.data.data_schema", + "thingsboard_client": "src.data.thingsboard_client", + "data_providers": "src.data.data_providers", + # models + "farquhar_model": "src.models.farquhar_model", + "canopy_photosynthesis": "src.models.canopy_photosynthesis", + "phenology": "src.models.phenology", + # forecasting + "predictor": "src.forecasting.predictor", + "ts_predictor": "src.forecasting.ts_predictor", + "chronos_forecaster": "src.forecasting.chronos_forecaster", + "preprocessor": "src.forecasting.preprocessor", + "time_features": "src.forecasting.time_features", + # shading + "solar_geometry": "src.shading.solar_geometry", + "tracker_optimizer": "src.shading.tracker_optimizer", + "vine_3d_scene": "src.shading.vine_3d_scene", + "tradeoff_engine": "src.shading.tradeoff_engine", + # advisor + "day_ahead_advisor": "src.advisor.day_ahead_advisor", + "safety_rails": "src.advisor.safety_rails", + # chatbot + "vineyard_chatbot": "src.chatbot.vineyard_chatbot", + "routing_agent": "src.chatbot.routing_agent", + "llm_data_engineer": "src.chatbot.llm_data_engineer", + # genai + "genai_utils": "src.genai.utils", +} + + +def __getattr__(name: str): + if name in _REDIRECTS: + import importlib + + mod = importlib.import_module(_REDIRECTS[name]) + sys.modules[f"{__name__}.{name}"] = mod + return mod + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def __dir__(): + return sorted(_REDIRECTS.keys()) diff --git a/src/advisor/__init__.py b/src/advisor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ef2a5b28de6e550d871d095d01571d74a3b7f9f2 --- /dev/null +++ b/src/advisor/__init__.py @@ -0,0 +1 @@ +"""Advisor: day-ahead, safety rails.""" diff --git a/src/advisor/day_ahead_advisor.py b/src/advisor/day_ahead_advisor.py new file mode 100644 index 0000000000000000000000000000000000000000..dba6dd7942a62fd66fe93de76334a292cb50b47c --- /dev/null +++ b/src/advisor/day_ahead_advisor.py @@ -0,0 +1,632 @@ +""" +DayAheadAdvisor: Gemini-powered qualitative day-ahead stress advisory for +Semillon grapevine in the SolarWine agrivoltaic system. + +Analyzes IMS weather forecast through vine biology rules to produce: + - Hourly stress profile (RuBP vs Rubisco limitation) + - Energy budget recommendations (time-block distribution) + - Model routing preferences (FvCB vs ML by time of day) + - Chronos forecast sanity check (optional) + +Sits between raw forecast data and the future Phase 3.5 day-ahead planner. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field, asdict +from typing import Optional + +import numpy as np +import pandas as pd + +from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key + + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + +@dataclass +class HourlyStressEntry: + hour: int + limiting_state: str # "rubp" | "rubisco" | "transition" + stress_severity: str # "none" | "low" | "moderate" | "high" | "extreme" + shading_recommended: bool + + +@dataclass +class StressProfile: + rubisco_limited_hours: int + peak_stress_hour: int + peak_stress_severity: str + hourly_detail: list[HourlyStressEntry] + summary: str + + +@dataclass +class BudgetRecommendation: + daily_budget_fraction: float # 0–1 of remaining weekly budget + time_block_pct: dict[str, float] # e.g. {"10-11": 5, "11-14": 60, ...} + rationale: str + + +@dataclass +class ModelRoutingPreference: + morning: str # "fvcb" or "ml" + midday: str + afternoon: str + rationale: str + + +@dataclass +class ChronosSanityCheck: + plausible: bool + flags: list[str] + overall_assessment: str + + +@dataclass +class AdvisorReport: + date: str + phenological_stage: str + stress_profile: StressProfile + budget_recommendation: BudgetRecommendation + model_routing: ModelRoutingPreference + chronos_sanity: Optional[ChronosSanityCheck] + confidence_notes: str + raw_llm_response: str = "" + + +# --------------------------------------------------------------------------- +# System prompt — encodes vine biology rules +# --------------------------------------------------------------------------- + +SYSTEM_PROMPT = """\ +You are an agrivoltaic advisor for a Semillon grapevine vineyard in the Negev \ +desert (Sde Boker, Israel). You analyze day-ahead weather forecasts and produce \ +structured stress assessments for the tracker control system. + +CONTROL OBJECTIVE: +- Primary goal: maximise annual PV energy production. +- Secondary goal: protect vines from heat, water stress, and sunburn using a \ +limited shading budget (see energy budget rule). +- When in doubt and there is no clear sign of dangerous stress, prefer keeping \ +panels in their energy-maximising position. + +BIOLOGICAL GUIDELINES (strong constraints; balance them with the energy objective): + +1. TEMPERATURE TRANSITION: Below 30°C, Semillon photosynthesis is RuBP-limited \ +(light is the bottleneck — shading HURTS). Above 30°C, it becomes Rubisco-limited \ +(heat is the bottleneck — shading MAY help). The transition is gradual (28–32°C). + +2. NO SHADE BEFORE 10:00: Morning light is critical for carbon fixation. Avoid \ +recommending shading before 10:00 unless there is an extreme heat or safety event. + +3. MAY SENSITIVITY: May is the flowering/fruit-set period. Yield protection has \ +priority: avoid shading in May under normal conditions because even small losses \ +can reduce cluster number and berry set. Only recommend shade in May as a last \ +resort in extreme heat to prevent serious damage (e.g. severe sunburn or lethal stress). + +4. CWSI THRESHOLD: Crop Water Stress Index > 0.4 indicates real water stress. \ +Below 0.4, the vine is coping adequately. + +5. BERRY SUNBURN: Direct exposure at air temperature > 35°C risks berry sunburn, \ +especially on the southwest-facing side of clusters in the afternoon. + +6. ENERGY BUDGET: Annual energy sacrifice ceiling is 5%. Suggested monthly caps: \ +May=0%, Jun=15%, Jul=30%, Aug=30%, Sep=20%, Oct=5%. Treat these as soft caps: \ +stay below them unless there is an exceptional agronomic reason. + +7. MODEL ROUTING: Use FvCB (Farquhar model) for standard conditions (T < 30°C, \ +VPD < 2.5 kPa, adequate water). Use ML ensemble for stress conditions (T > 30°C, \ +high VPD, water stress, or any non-linear regime). + +8. PHENOLOGICAL MULTIPLIER: Stress during veraison (berry ripening) is 1.5× more \ +damaging than during vegetative growth. Protect veraison at higher cost. + +SEVERITY SCALE (anchored to air temperature): +- none: T < 28°C +- low: 28-30°C +- moderate: 30-33°C +- high: 33-37°C +- extreme: T > 37°C + +OUTPUT FORMAT — Return ONLY a JSON object (no markdown fences, no explanation) \ +with this exact schema: + +{ + "stress_profile": { + "rubisco_limited_hours": , + "peak_stress_hour": , + "peak_stress_severity": "", + "hourly_detail": [ + {"hour": , "limiting_state": "", \ +"stress_severity": "", "shading_recommended": } + ], + "summary": "<2-3 sentence natural language summary>" + }, + "budget_recommendation": { + "daily_budget_fraction": , + "time_block_pct": {"10-11": , "11-14": , "14-16": , \ +"16+": }, + "rationale": "<1-2 sentences>" + }, + "model_routing": { + "morning": "", + "midday": "", + "afternoon": "", + "rationale": "<1 sentence>" + }, + "chronos_sanity": { + "plausible": , + "flags": ["", ...], + "overall_assessment": "<1 sentence>" + }, + "confidence_notes": "" +} + +Include hourly_detail entries only for hours 6-20 (daytime). \ +If no Chronos forecast is provided, set chronos_sanity to null. +""" + + +# --------------------------------------------------------------------------- +# Helper: robust JSON extraction from LLM response +# --------------------------------------------------------------------------- + +def _extract_json(text: str) -> dict: + """Thin wrapper around the shared genai_utils implementation.""" + return extract_json_object(text) + + +# --------------------------------------------------------------------------- +# Main class +# --------------------------------------------------------------------------- + +class DayAheadAdvisor: + """ + Gemini-powered day-ahead stress advisory for agrivoltaic tracker control. + + Usage + ----- + advisor = DayAheadAdvisor() + report = advisor.advise( + date="2025-07-15", + weather_forecast=df_ims, + phenological_stage="veraison", + remaining_weekly_budget_kWh=12.5, + remaining_monthly_budget_kWh=45.0, + ) + """ + + def __init__( + self, + model_name: str = "gemini-2.5-flash", + api_key: Optional[str] = None, + verbose: bool = True, + ): + self.model_name = model_name + self._api_key = api_key + self._client = None + self.verbose = verbose + # Cache advisory per date+stage (same day = same forecast) + self._report_cache: dict[str, AdvisorReport] = {} + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + @property + def api_key(self) -> str: + return get_google_api_key(self._api_key) + + @property + def client(self): + if self._client is None: + self._client = get_genai_client(self._api_key) + return self._client + + def _call_gemini(self, user_prompt: str) -> str: + """Send a prompt to Gemini and return the raw text response.""" + response = self.client.models.generate_content( + model=self.model_name, + contents=user_prompt, + config={"system_instruction": SYSTEM_PROMPT}, + ) + return response.text + + def _log(self, msg: str) -> None: + if self.verbose: + print(f"[DayAheadAdvisor] {msg}") + + # ------------------------------------------------------------------ + # Forecast formatting + # ------------------------------------------------------------------ + + def _format_weather_forecast(self, weather_df: pd.DataFrame) -> str: + """Aggregate 15-min IMS data to hourly and format as text for Gemini.""" + df = weather_df.copy() + + # Ensure datetime index + if not isinstance(df.index, pd.DatetimeIndex): + for col in ["timestamp_utc", "time", "datetime", "timestamp"]: + if col in df.columns: + df.index = pd.to_datetime(df[col], utc=True) + break + + # Map common column names + col_map = {} + for c in df.columns: + cl = c.lower() + if "temp" in cl and "dew" not in cl: + col_map["temperature_c"] = c + elif "ghi" in cl or "radiation" in cl or "irradiance" in cl: + col_map["ghi_w_m2"] = c + elif "rh" in cl or "humid" in cl: + col_map["rh_percent"] = c + elif "wind" in cl and "speed" in cl: + col_map["wind_speed_ms"] = c + elif "vpd" in cl: + col_map["vpd_kpa"] = c + + # Resample to hourly + hourly = df.resample("1h").mean(numeric_only=True) + + lines = ["HOURLY WEATHER FORECAST:"] + lines.append(f"{'Hour':>4} {'T(°C)':>7} {'GHI':>7} {'RH(%)':>7} {'Wind':>7}") + lines.append("-" * 45) + + temp_col = col_map.get("temperature_c") + ghi_col = col_map.get("ghi_w_m2") + rh_col = col_map.get("rh_percent") + wind_col = col_map.get("wind_speed_ms") + + for idx, row in hourly.iterrows(): + hour = idx.hour if hasattr(idx, "hour") else "?" + t = f"{row[temp_col]:.1f}" if temp_col and temp_col in row.index else "N/A" + g = f"{row[ghi_col]:.0f}" if ghi_col and ghi_col in row.index else "N/A" + r = f"{row[rh_col]:.0f}" if rh_col and rh_col in row.index else "N/A" + w = f"{row[wind_col]:.1f}" if wind_col and wind_col in row.index else "N/A" + lines.append(f"{hour:>4} {t:>7} {g:>7} {r:>7} {w:>7}") + + # Summary stats + if temp_col and temp_col in hourly.columns: + temps = hourly[temp_col].dropna() + if not temps.empty: + lines.append(f"\nSummary: Tmax={temps.max():.1f}°C, " + f"Tmin={temps.min():.1f}°C, " + f"Hours above 30°C: {int((temps > 30).sum())}, " + f"Hours above 35°C: {int((temps > 35).sum())}") + + return "\n".join(lines) + + def _format_chronos_forecast(self, chronos_df: pd.DataFrame) -> str: + """Format Chronos A forecast as text for Gemini.""" + df = chronos_df.copy() + + if not isinstance(df.index, pd.DatetimeIndex): + for col in ["timestamp_utc", "time", "datetime", "timestamp"]: + if col in df.columns: + df.index = pd.to_datetime(df[col], utc=True) + break + + # Resample to hourly + hourly = df.resample("1h").agg({ + c: "median" for c in df.select_dtypes(include=[np.number]).columns + }) + + # Look for A / prediction columns + a_col = None + for c in df.columns: + cl = c.lower() + if cl in ("a", "a_n", "predicted_a", "forecast", "median"): + a_col = c + break + if a_col is None and len(df.select_dtypes(include=[np.number]).columns) > 0: + a_col = df.select_dtypes(include=[np.number]).columns[0] + + if a_col is None: + return "CHRONOS FORECAST: No numeric prediction column found." + + lines = ["CHRONOS A FORECAST (hourly median):"] + for idx, row in hourly.iterrows(): + hour = idx.hour if hasattr(idx, "hour") else "?" + val = row[a_col] if a_col in row.index else float("nan") + lines.append(f" Hour {hour:2d}: A = {val:.2f} µmol m⁻² s⁻¹") + + a_vals = hourly[a_col].dropna() + if not a_vals.empty: + lines.append(f"\nPeak A: {a_vals.max():.2f} at hour " + f"{hourly[a_col].idxmax().hour if hasattr(hourly[a_col].idxmax(), 'hour') else '?'}") + + return "\n".join(lines) + + # ------------------------------------------------------------------ + # Default (fallback) report + # ------------------------------------------------------------------ + + def _default_report(self, date: str, stage: str) -> AdvisorReport: + """ + Conservative fallback report when Gemini is unavailable. + + Assumes moderate midday stress, standard budget distribution, + FvCB morning + ML midday/afternoon. + """ + self._log("Using conservative fallback report (API unavailable).") + + hourly = [] + for h in range(6, 21): + if h < 10: + entry = HourlyStressEntry(h, "rubp", "none", False) + elif h < 12: + entry = HourlyStressEntry(h, "transition", "low", False) + elif h < 16: + entry = HourlyStressEntry(h, "rubisco", "moderate", True) + else: + entry = HourlyStressEntry(h, "transition", "low", False) + hourly.append(entry) + + return AdvisorReport( + date=date, + phenological_stage=stage, + stress_profile=StressProfile( + rubisco_limited_hours=4, + peak_stress_hour=14, + peak_stress_severity="moderate", + hourly_detail=hourly, + summary=( + "Fallback estimate: moderate midday stress assumed (12:00-16:00). " + "Conservative shading recommended during peak hours. " + "Actual conditions may differ — advisory generated without API access." + ), + ), + budget_recommendation=BudgetRecommendation( + daily_budget_fraction=0.15, + time_block_pct={"10-11": 5, "11-14": 60, "14-16": 30, "16+": 5}, + rationale="Standard budget distribution (fallback). " + "Concentrates 60% of daily budget in the 11-14 peak stress window.", + ), + model_routing=ModelRoutingPreference( + morning="fvcb", + midday="ml", + afternoon="ml", + rationale="FvCB for cool morning (T < 30°C), ML for midday/afternoon stress (fallback).", + ), + chronos_sanity=None, + confidence_notes="Fallback report — Gemini API was unavailable. " + "Using biologically conservative defaults.", + ) + + # ------------------------------------------------------------------ + # Parse Gemini JSON response → AdvisorReport + # ------------------------------------------------------------------ + + def _parse_report( + self, date: str, stage: str, parsed: dict, raw_response: str + ) -> AdvisorReport: + """Convert parsed JSON dict to AdvisorReport with safe defaults.""" + + # --- Stress profile --- + sp = parsed.get("stress_profile", {}) + hourly_raw = sp.get("hourly_detail", []) + hourly_entries = [] + for h in hourly_raw: + hourly_entries.append(HourlyStressEntry( + hour=h.get("hour", 0), + limiting_state=h.get("limiting_state", "rubp"), + stress_severity=h.get("stress_severity", "none"), + shading_recommended=h.get("shading_recommended", False), + )) + + stress_profile = StressProfile( + rubisco_limited_hours=sp.get("rubisco_limited_hours", 0), + peak_stress_hour=sp.get("peak_stress_hour", 12), + peak_stress_severity=sp.get("peak_stress_severity", "none"), + hourly_detail=hourly_entries, + summary=sp.get("summary", "No summary provided."), + ) + + # --- Budget recommendation --- + br = parsed.get("budget_recommendation", {}) + budget_rec = BudgetRecommendation( + daily_budget_fraction=br.get("daily_budget_fraction", 0.15), + time_block_pct=br.get("time_block_pct", {"10-11": 5, "11-14": 60, "14-16": 30, "16+": 5}), + rationale=br.get("rationale", "No rationale provided."), + ) + + # --- Model routing --- + mr = parsed.get("model_routing", {}) + model_routing = ModelRoutingPreference( + morning=mr.get("morning", "fvcb"), + midday=mr.get("midday", "ml"), + afternoon=mr.get("afternoon", "ml"), + rationale=mr.get("rationale", "No rationale provided."), + ) + + # --- Chronos sanity check (optional) --- + cs = parsed.get("chronos_sanity") + chronos_sanity = None + if cs is not None: + chronos_sanity = ChronosSanityCheck( + plausible=cs.get("plausible", True), + flags=cs.get("flags", []), + overall_assessment=cs.get("overall_assessment", "No assessment."), + ) + + return AdvisorReport( + date=date, + phenological_stage=stage, + stress_profile=stress_profile, + budget_recommendation=budget_rec, + model_routing=model_routing, + chronos_sanity=chronos_sanity, + confidence_notes=parsed.get("confidence_notes", ""), + raw_llm_response=raw_response, + ) + + # ------------------------------------------------------------------ + # Main advisory method + # ------------------------------------------------------------------ + + def advise( + self, + date: str, + weather_forecast: pd.DataFrame, + phenological_stage: str = "vegetative", + remaining_weekly_budget_kWh: float = 20.0, + remaining_monthly_budget_kWh: float = 80.0, + chronos_forecast: Optional[pd.DataFrame] = None, + gdd_cumulative: Optional[float] = None, + vine_snapshot: Optional[object] = None, + ) -> AdvisorReport: + """ + Analyze day-ahead weather forecast and produce structured advisory. + + Parameters + ---------- + date : target date string (e.g. "2025-07-15") + weather_forecast : DataFrame of IMS weather data (15-min or hourly) + phenological_stage : current vine stage (vegetative/flowering/veraison/harvest) + remaining_weekly_budget_kWh : remaining shading budget for the week + remaining_monthly_budget_kWh : remaining shading budget for the month + chronos_forecast : optional Chronos A prediction DataFrame + gdd_cumulative : optional cumulative growing degree days + vine_snapshot : optional VineSnapshot from ThingsBoardClient.get_vine_snapshot(); + seeds the advisory with current on-site sensor state (soil moisture, + fruiting-zone PAR, treatment vs reference comparison) + + Returns + ------- + AdvisorReport with stress profile, budget, routing, and sanity check + """ + self._log(f"Generating advisory for {date} (stage: {phenological_stage})") + + # Return cached report if same date+stage already advised + cache_key = f"{date}|{phenological_stage}" + if cache_key in self._report_cache: + self._log("Returning cached advisory for this date+stage.") + return self._report_cache[cache_key] + + # Build user prompt + weather_text = self._format_weather_forecast(weather_forecast) + + prompt_parts = [ + f"DATE: {date}", + f"PHENOLOGICAL STAGE: {phenological_stage}", + f"REMAINING WEEKLY BUDGET: {remaining_weekly_budget_kWh:.1f} kWh", + f"REMAINING MONTHLY BUDGET: {remaining_monthly_budget_kWh:.1f} kWh", + ] + if gdd_cumulative is not None: + prompt_parts.append(f"CUMULATIVE GDD: {gdd_cumulative:.0f}") + + if vine_snapshot is not None: + prompt_parts.append("") + try: + prompt_parts.append(vine_snapshot.to_advisor_text()) + except Exception: + pass + + prompt_parts.append("") + prompt_parts.append(weather_text) + + if chronos_forecast is not None: + prompt_parts.append("") + prompt_parts.append(self._format_chronos_forecast(chronos_forecast)) + else: + prompt_parts.append("\nNo Chronos forecast available — set chronos_sanity to null.") + + user_prompt = "\n".join(prompt_parts) + + # Call Gemini + try: + raw = self._call_gemini(user_prompt) + parsed = _extract_json(raw) + report = self._parse_report(date, phenological_stage, parsed, raw) + self._report_cache[cache_key] = report + self._log("Advisory generated successfully via Gemini.") + return report + except Exception as exc: + self._log(f"Gemini API error: {exc}") + return self._default_report(date, phenological_stage) + + # ------------------------------------------------------------------ + # Serialization + # ------------------------------------------------------------------ + + @staticmethod + def report_to_dict(report: AdvisorReport) -> dict: + """Convert AdvisorReport to a plain dict (JSON-serializable).""" + return asdict(report) + + @staticmethod + def report_to_json(report: AdvisorReport, indent: int = 2) -> str: + """Convert AdvisorReport to a JSON string.""" + return json.dumps(asdict(report), indent=indent, default=str) + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + from pathlib import Path + + IMS_CSV = Path(__file__).resolve().parent.parent / "Data" / "ims" / "ims_merged_15min.csv" + + if not IMS_CSV.exists(): + print("No IMS cache data found. Cannot run advisory demo.") + print(f"Looked in: {IMS_CSV}") + raise SystemExit(1) + + print(f"Loading IMS data from: {IMS_CSV.name}") + df = pd.read_csv(IMS_CSV, parse_dates=True) + + # Try to parse datetime + for col in ["timestamp_utc", "datetime", "time", "timestamp"]: + if col in df.columns: + df.index = pd.to_datetime(df[col]) + break + + # Use last day of data + if isinstance(df.index, pd.DatetimeIndex): + last_date = df.index.date[-1] + day_data = df[df.index.date == last_date] + date_str = str(last_date) + else: + day_data = df.tail(96) # ~24h of 15-min data + date_str = "unknown" + + print(f"Date: {date_str}, rows: {len(day_data)}") + + advisor = DayAheadAdvisor(verbose=True) + report = advisor.advise( + date=date_str, + weather_forecast=day_data, + phenological_stage="veraison", + remaining_weekly_budget_kWh=15.0, + remaining_monthly_budget_kWh=50.0, + ) + + print("\n" + "=" * 60) + print("DAY-AHEAD STRESS ADVISORY") + print("=" * 60) + print(f"Date: {report.date}") + print(f"Stage: {report.phenological_stage}") + print(f"\nStress Summary: {report.stress_profile.summary}") + print(f"Rubisco-limited hours: {report.stress_profile.rubisco_limited_hours}") + print(f"Peak stress: {report.stress_profile.peak_stress_severity} " + f"at hour {report.stress_profile.peak_stress_hour}") + print(f"\nBudget: {report.budget_recommendation.daily_budget_fraction:.0%} " + f"of weekly budget") + print(f"Time blocks: {report.budget_recommendation.time_block_pct}") + print(f"Rationale: {report.budget_recommendation.rationale}") + print(f"\nModel routing: morning={report.model_routing.morning}, " + f"midday={report.model_routing.midday}, " + f"afternoon={report.model_routing.afternoon}") + if report.chronos_sanity: + print(f"\nChronos sanity: plausible={report.chronos_sanity.plausible}") + print(f" Flags: {report.chronos_sanity.flags}") + print(f"\nConfidence: {report.confidence_notes}") + print("\n--- Full JSON ---") + print(DayAheadAdvisor.report_to_json(report)) diff --git a/src/advisor/safety_rails.py b/src/advisor/safety_rails.py new file mode 100644 index 0000000000000000000000000000000000000000..3edc860e51dd242ad8f87f395670ee3048dfe2ce --- /dev/null +++ b/src/advisor/safety_rails.py @@ -0,0 +1,179 @@ +""" +SafetyRails: FvCB vs ML divergence guard for the SolarWine 2.0 control loop. + +Position in the control loop (Phase 3, Step 7): + After TradeoffEngine selects a minimum dose, SafetyRails validates that + the FvCB and ML photosynthesis predictions are sufficiently consistent. + + If the two models disagree by more than DIVERGENCE_THRESHOLD (12%), the + system cannot confidently predict that shading will help, so it falls back + to full astronomical tracking (zero energy sacrifice, zero risk). + +Rationale +--------- +The FvCB mechanistic model and ML ensemble are calibrated on different +assumptions: + - FvCB is reliable in standard conditions (T < 30°C, moderate VPD). + - ML handles non-linear stress regimes better. + +When both agree → high confidence → proceed with intervention. +When they disagree significantly → sensor fault, regime change, or edge +case not covered by calibration. The safe default is no intervention. +""" + +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + +from config.settings import DIVERGENCE_THRESHOLD + + +# --------------------------------------------------------------------------- +# Result dataclass +# --------------------------------------------------------------------------- + +@dataclass +class SafetyCheckResult: + """Outcome of a single FvCB vs ML divergence check.""" + + passed: bool + fvcb_a: float + ml_a: float + divergence_pct: float # |fvcb_a - ml_a| / max(fvcb_a, ml_a) × 100 + fallback_needed: bool # True when divergence > threshold + reason: str # human-readable explanation + + def __str__(self) -> str: + status = "PASS" if self.passed else "FAIL → fallback to θ_astro" + return ( + f"SafetyRails [{status}] " + f"FvCB={self.fvcb_a:.2f} ML={self.ml_a:.2f} " + f"divergence={self.divergence_pct:.1f}% " + f"(threshold={DIVERGENCE_THRESHOLD * 100:.0f}%)" + ) + + +# --------------------------------------------------------------------------- +# SafetyRails +# --------------------------------------------------------------------------- + +class SafetyRails: + """ + Validates that FvCB and ML model outputs are consistent before any + shading command is issued. + + Usage + ----- + rails = SafetyRails() + result = rails.check(fvcb_a=14.3, ml_a=14.8) + if result.fallback_needed: + # stay at θ_astro, log result + """ + + def __init__(self, threshold: Optional[float] = None) -> None: + """ + Parameters + ---------- + threshold : divergence fraction (0–1) that triggers fallback. + Defaults to DIVERGENCE_THRESHOLD (0.12) from settings. + """ + self.threshold = threshold if threshold is not None else DIVERGENCE_THRESHOLD + + def check( + self, + fvcb_a: float, + ml_a: float, + context: Optional[str] = None, + ) -> SafetyCheckResult: + """ + Compare FvCB and ML photosynthesis outputs. + + Parameters + ---------- + fvcb_a : net A from FarquharModel (µmol CO₂ m⁻² s⁻¹) + ml_a : net A from ML ensemble (µmol CO₂ m⁻² s⁻¹) + context : optional string for logging (e.g. "2025-07-15 13:00") + + Returns + ------- + SafetyCheckResult + """ + denominator = max(abs(fvcb_a), abs(ml_a), 1e-6) + divergence = abs(fvcb_a - ml_a) / denominator + divergence_pct = divergence * 100.0 + + fallback_needed = divergence > self.threshold + + if fallback_needed: + reason = ( + f"Models diverge by {divergence_pct:.1f}% " + f"(FvCB={fvcb_a:.2f}, ML={ml_a:.2f}) — " + f"exceeds {self.threshold * 100:.0f}% threshold. " + f"Falling back to full astronomical tracking." + ) + elif fvcb_a < 0 and ml_a < 0: + reason = "Both models predict carbon loss (dark/night); no shading beneficial." + fallback_needed = True + else: + reason = ( + f"Models agree within {self.threshold * 100:.0f}% threshold " + f"(FvCB={fvcb_a:.2f}, ML={ml_a:.2f}, " + f"divergence={divergence_pct:.1f}%). Proceeding." + ) + + return SafetyCheckResult( + passed=not fallback_needed, + fvcb_a=fvcb_a, + ml_a=ml_a, + divergence_pct=round(divergence_pct, 2), + fallback_needed=fallback_needed, + reason=reason, + ) + + def check_from_log(self, fvcb_a: Optional[float], ml_a: Optional[float]) -> SafetyCheckResult: + """ + Variant that handles None inputs gracefully (e.g. ML model not loaded). + + If either value is None, defaults to passing with a warning — the + calling code should use whichever model is available. + """ + if fvcb_a is None or ml_a is None: + available = fvcb_a if fvcb_a is not None else ml_a + return SafetyCheckResult( + passed=True, + fvcb_a=fvcb_a or 0.0, + ml_a=ml_a or 0.0, + divergence_pct=0.0, + fallback_needed=False, + reason=( + f"Only one model available (value={available:.2f}). " + "Cannot check divergence; proceeding with available model." + ), + ) + return self.check(fvcb_a, ml_a) + + +# --------------------------------------------------------------------------- +# CLI smoke test +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + rails = SafetyRails() + + cases = [ + (14.3, 14.8, "Normal agreement (3.4%)"), + (14.3, 16.5, "Borderline (15.4% — over threshold)"), + (14.3, 12.0, "Below threshold (17.6% — over)"), + (14.3, 14.3, "Perfect agreement"), + (14.3, None, "ML unavailable"), + (-2.0, -1.8, "Carbon loss (night)"), + ] + + print(f"SafetyRails — threshold={rails.threshold * 100:.0f}%\n") + for fvcb, ml, label in cases: + result = rails.check_from_log(fvcb, ml) + status = "FALLBACK" if result.fallback_needed else "OK " + print(f" [{status}] {label}") + print(f" {result.reason}") + print() diff --git a/src/baseline_predictor.py b/src/baseline_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..1b55797136bf677e3e3e07ccaf97c3b4e1ee8492 --- /dev/null +++ b/src/baseline_predictor.py @@ -0,0 +1,248 @@ +""" +BaselinePredictor: hybrid FvCB + ML photosynthesis baseline for day-ahead planning. + +Provides a single ``predict_day()`` method that: + 1. Runs FvCB (Farquhar–Greer–Weedon) for each slot using forecast weather + 2. Optionally runs a trained ML model for the same slots + 3. Uses the RoutingAgent's rule-based logic to pick the better prediction per slot + 4. Returns a 96-slot profile of predicted photosynthesis rate A (µmol CO₂ m⁻² s⁻¹) + +This feeds into the DayAheadPlanner to estimate crop value for each slot, +replacing the current temperature-only heuristic with an actual photosynthesis +prediction that captures the Rubisco transition more accurately. +""" + +from __future__ import annotations + +import logging +import math +from datetime import date +from typing import List, Optional + +import numpy as np + +from config.settings import SEMILLON_TRANSITION_TEMP_C + +logger = logging.getLogger(__name__) + + +class BaselinePredictor: + """Hybrid FvCB + ML photosynthesis prediction for day-ahead planning. + + Parameters + ---------- + fvcb_model : FarquharModel, optional + Lazy-initialised if not provided. + ml_predictor : PhotosynthesisPredictor, optional + Trained ML model. If None, FvCB-only mode is used. + routing_agent : RoutingAgent, optional + Model router for per-slot FvCB/ML selection. + If None, uses rule-based routing only (no API calls). + """ + + def __init__( + self, + fvcb_model=None, + ml_predictor=None, + routing_agent=None, + ): + self._fvcb = fvcb_model + self._ml = ml_predictor + self._router = routing_agent + + @property + def fvcb(self): + if self._fvcb is None: + from src.models.farquhar_model import FarquharModel + self._fvcb = FarquharModel() + return self._fvcb + + # ------------------------------------------------------------------ + # Main API + # ------------------------------------------------------------------ + + def predict_day( + self, + forecast_temps: List[float], + forecast_ghi: List[float], + co2_ppm: float = 400.0, + rh_pct: float = 40.0, + ) -> List[float]: + """Predict photosynthesis rate A for each 15-min slot. + + Parameters + ---------- + forecast_temps : list of 96 floats + Forecast air temperature (°C) per slot. + forecast_ghi : list of 96 floats + Forecast GHI (W/m²) per slot. + co2_ppm : float + Atmospheric CO₂ concentration (default 400 ppm). + rh_pct : float + Relative humidity (%) for VPD estimation (default 40%). + + Returns + ------- + list of 96 floats + Predicted net photosynthesis A (µmol CO₂ m⁻² s⁻¹) per slot. + 0.0 for nighttime slots. + """ + assert len(forecast_temps) == 96 and len(forecast_ghi) == 96 + + # FvCB predictions for all 96 slots + fvcb_predictions = self._predict_fvcb( + forecast_temps, forecast_ghi, co2_ppm, rh_pct, + ) + + # If no ML model, return FvCB-only + if self._ml is None: + return fvcb_predictions + + # ML predictions for all 96 slots + ml_predictions = self._predict_ml(forecast_temps, forecast_ghi) + + # Route each slot + predictions = self._route_predictions( + forecast_temps, forecast_ghi, + fvcb_predictions, ml_predictions, + ) + + return predictions + + # ------------------------------------------------------------------ + # FvCB predictions + # ------------------------------------------------------------------ + + def _predict_fvcb( + self, + temps: List[float], + ghis: List[float], + co2_ppm: float, + rh_pct: float, + ) -> List[float]: + """Run FvCB for each slot. Returns 96 A values.""" + predictions = [] + for i in range(96): + temp = temps[i] + ghi = ghis[i] + + # Nighttime or negligible light + if ghi < 50: + predictions.append(0.0) + continue + + # Estimate PAR from GHI (roughly 2× conversion for photosynthetically active) + par = ghi * 2.0 + + # Estimate Tleaf from Tair (proxy: +2°C under sun) + tleaf = temp + 2.0 + + # Estimate VPD from temperature and RH + vpd = self._estimate_vpd(temp, rh_pct) + + try: + result = self.fvcb.calc_photosynthesis_semillon( + PAR=par, + Tleaf=tleaf, + CO2=co2_ppm, + VPD=vpd, + Tair=temp, + ) + # Returns (A, limiting_state, shading_helps) + A = result[0] if isinstance(result, tuple) else result + predictions.append(max(0.0, float(A))) + except Exception as exc: + logger.debug("FvCB failed at slot %d: %s", i, exc) + predictions.append(0.0) + + return predictions + + @staticmethod + def _estimate_vpd(tair_c: float, rh_pct: float) -> float: + """Estimate VPD (kPa) from air temperature and relative humidity.""" + # Tetens formula for saturated vapor pressure + es = 0.6108 * math.exp(17.27 * tair_c / (tair_c + 237.3)) + ea = es * rh_pct / 100.0 + return max(0.0, es - ea) + + # ------------------------------------------------------------------ + # ML predictions + # ------------------------------------------------------------------ + + def _predict_ml( + self, + temps: List[float], + ghis: List[float], + ) -> List[float]: + """Run ML model for each slot. Returns 96 A values.""" + if self._ml is None: + return [0.0] * 96 + + try: + import pandas as pd + + # Build feature DataFrame matching ML model's expected features + hours = [i * 0.25 for i in range(96)] + df = pd.DataFrame({ + "air_temperature_c": temps, + "ghi_w_m2": ghis, + "hour": [int(h) for h in hours], + "minute": [int((h % 1) * 60) for h in hours], + }) + + # Try prediction with the best model + best_model = None + best_mae = float("inf") + for name, result in self._ml.results.items(): + if result.get("mae", float("inf")) < best_mae: + best_mae = result["mae"] + best_model = name + + if best_model and best_model in self._ml.models: + model = self._ml.models[best_model] + # Use whatever features the model was trained on + feature_cols = [c for c in df.columns if c in getattr(model, "feature_names_in_", df.columns)] + if feature_cols: + preds = model.predict(df[feature_cols]) + return [max(0.0, float(p)) for p in preds] + + except Exception as exc: + logger.warning("ML prediction failed: %s", exc) + + return [0.0] * 96 + + # ------------------------------------------------------------------ + # Routing + # ------------------------------------------------------------------ + + def _route_predictions( + self, + temps: List[float], + ghis: List[float], + fvcb_preds: List[float], + ml_preds: List[float], + ) -> List[float]: + """Pick FvCB or ML per slot using routing logic.""" + from src.chatbot.routing_agent import RoutingAgent + + predictions = [] + for i in range(96): + telemetry = { + "temp_c": temps[i], + "ghi_w_m2": ghis[i], + "hour": i // 4, + } + + # Use rule-based routing only (no API calls for batch prediction) + choice = RoutingAgent._rule_based_route(telemetry) + if choice is None: + # Transition zone: weight FvCB 60% / ML 40% as compromise + a = 0.6 * fvcb_preds[i] + 0.4 * ml_preds[i] + elif choice == "ml": + a = ml_preds[i] + else: + a = fvcb_preds[i] + + predictions.append(a) + + return predictions diff --git a/src/canopy_photosynthesis.py b/src/canopy_photosynthesis.py new file mode 100644 index 0000000000000000000000000000000000000000..a3596b9d348b554af01ef4f5845d537f6b1badb5 --- /dev/null +++ b/src/canopy_photosynthesis.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.models.canopy_photosynthesis.""" +from src.models.canopy_photosynthesis import * # noqa: F401, F403 diff --git a/src/chatbot/__init__.py b/src/chatbot/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..ea46447044af9b7e693a69efef3f9062cdd01a58 --- /dev/null +++ b/src/chatbot/__init__.py @@ -0,0 +1 @@ +"""Chatbot: vineyard chat, routing agent, LLM data engineer.""" diff --git a/src/chatbot/feedback.py b/src/chatbot/feedback.py new file mode 100644 index 0000000000000000000000000000000000000000..addb54e46112002686fca210fdd42dd906694cf1 --- /dev/null +++ b/src/chatbot/feedback.py @@ -0,0 +1,104 @@ +""" +Feedback storage for the Vineyard Advisor chatbot. + +Logs user feedback (thumbs up/down, flags) to a JSON-lines file. +Each entry captures the query, response, tool results, rules applied, +and the user's feedback action. +""" + +from __future__ import annotations + +import json +import logging +from datetime import datetime, timezone +from pathlib import Path +from typing import Optional + +from config.settings import DATA_DIR + +logger = logging.getLogger(__name__) + +FEEDBACK_FILE = DATA_DIR / "advisor_feedback.jsonl" + + +def log_feedback( + query: str, + response: str, + feedback: str, + confidence: str = "", + sources: Optional[list[str]] = None, + tool_calls: Optional[list[dict]] = None, + rule_violations: Optional[list[dict]] = None, + response_mode: str = "", + comment: str = "", +) -> None: + """Append a feedback entry to the JSONL file. + + Parameters + ---------- + query : str + The user's original question. + response : str + The chatbot's response text. + feedback : str + One of: "thumbs_up", "thumbs_down", "flag_incorrect". + confidence, sources, tool_calls, rule_violations, response_mode : + Metadata from the ChatResponse. + comment : str + Optional free-text comment from the user. + """ + entry = { + "timestamp": datetime.now(tz=timezone.utc).isoformat(), + "query": query, + "response": response[:500], # truncate for storage + "feedback": feedback, + "confidence": confidence, + "sources": sources or [], + "tool_calls": [ + {"name": tc.get("name", ""), "args": tc.get("args", {})} + for tc in (tool_calls or []) + ], + "rule_violations": rule_violations or [], + "response_mode": response_mode, + "comment": comment, + } + + try: + FEEDBACK_FILE.parent.mkdir(parents=True, exist_ok=True) + with open(FEEDBACK_FILE, "a") as f: + f.write(json.dumps(entry, default=str) + "\n") + logger.info("Feedback logged: %s for query: %s", feedback, query[:50]) + except Exception as exc: + logger.warning("Failed to log feedback: %s", exc) + + +def load_feedback(limit: int = 100) -> list[dict]: + """Load recent feedback entries.""" + if not FEEDBACK_FILE.exists(): + return [] + + entries = [] + try: + with open(FEEDBACK_FILE) as f: + for line in f: + line = line.strip() + if line: + entries.append(json.loads(line)) + except Exception as exc: + logger.warning("Failed to load feedback: %s", exc) + + return entries[-limit:] + + +def feedback_summary() -> dict: + """Return a summary of feedback stats.""" + entries = load_feedback(limit=10000) + if not entries: + return {"total": 0} + + return { + "total": len(entries), + "thumbs_up": sum(1 for e in entries if e.get("feedback") == "thumbs_up"), + "thumbs_down": sum(1 for e in entries if e.get("feedback") == "thumbs_down"), + "flagged": sum(1 for e in entries if e.get("feedback") == "flag_incorrect"), + } diff --git a/src/chatbot/guardrails.py b/src/chatbot/guardrails.py new file mode 100644 index 0000000000000000000000000000000000000000..f7170b7009708c4883abd1b09d97b4920e779cb4 --- /dev/null +++ b/src/chatbot/guardrails.py @@ -0,0 +1,363 @@ +""" +Guardrails for the Vineyard Advisor chatbot. + +Three components: + 1. QueryClassifier — determines if a query requires tool data or can be + answered from biology rules alone. + 2. ResponseValidator — deterministic post-response check that catches + rule violations before the answer reaches the user. + 3. confidence_from_context — estimates answer confidence based on data + freshness and availability. +""" + +from __future__ import annotations + +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Optional + +from config.settings import ( + NO_SHADE_BEFORE_HOUR, + NO_SHADE_MONTHS, + NO_SHADE_TLEAF_BELOW, +) + + +# --------------------------------------------------------------------------- +# 1. Query classifier — decides whether a tool call is mandatory +# --------------------------------------------------------------------------- + +# Keywords that indicate user is asking about real-time / site-specific data +_DATA_KEYWORDS = [ + # Weather / environment + r"\btemperature\b", r"\btemp\b", r"\bhow hot\b", r"\bhow cold\b", + r"\bweather\b", r"\bforecast\b", r"\brain\b", r"\bwind\b", + r"\bhumidity\b", r"\bghi\b", r"\bradiation\b", r"\birradiance\b", + # Sensors + r"\bsensor\b", r"\bsoil\b", r"\bmoisture\b", r"\bleaf temp\b", + r"\bpar\b", r"\bndvi\b", r"\bcwsi\b", r"\bvpd\b", + # Photosynthesis / predictions + r"\bphotosynthesis\b", r"\bassimilation\b", r"\bpredict\b", + r"\bforecast\b", r"\bA rate\b", r"\bcarbon\b", + # Energy + r"\benergy\b", r"\bkwh\b", r"\bpower\b", r"\bgeneration\b", + r"\binverter\b", + # Irrigation + r"\birrigat\b", r"\bwater\b", + # Shading — action-oriented + r"\bshade\b", r"\bshading\b", r"\btilt\b", r"\bangle\b", r"\bpanel\b", + # Temporal / current state + r"\bright now\b", r"\bcurrent\b", r"\btoday\b", r"\btomorrow\b", + r"\byesterday\b", r"\bthis week\b", r"\blast \d+ (hour|day|minute)", + # Direct data ask + r"\bshow me\b", r"\bwhat is\b", r"\bwhat are\b", r"\bhow much\b", + r"\bcheck\b", r"\bstatus\b", r"\bstate\b", +] + +# Compile once +_DATA_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _DATA_KEYWORDS] + +# Keywords for pure knowledge / biology rule questions (no tool needed) +_KNOWLEDGE_KEYWORDS = [ + r"\bwhy\b.*\brule\b", r"\bexplain\b.*\brule\b", + r"\bwhat is rubisco\b", r"\bwhat is fvcb\b", r"\bwhat is farquhar\b", + r"\btell me about\b.*\bbiology\b", r"\bhow does photosynthesis work\b", + r"\bwhat does .* mean\b", +] + +_KNOWLEDGE_PATTERNS = [re.compile(p, re.IGNORECASE) for p in _KNOWLEDGE_KEYWORDS] + + +@dataclass +class QueryClass: + """Result of query classification.""" + requires_data: bool # True = tool call is mandatory + category: str # "data", "knowledge", "greeting", "ambiguous" + matched_keywords: list[str] = field(default_factory=list) + + +def classify_query(user_message: str) -> QueryClass: + """Classify whether a user query requires tool-grounded data.""" + msg = user_message.strip() + + # Very short / greeting + if len(msg) < 5 or re.match(r"^(hi|hello|hey|thanks|thank you|ok|bye)\b", msg, re.I): + return QueryClass(requires_data=False, category="greeting") + + # Check knowledge patterns first (more specific) + for pat in _KNOWLEDGE_PATTERNS: + if pat.search(msg): + return QueryClass(requires_data=False, category="knowledge") + + # Check data patterns + matched = [] + for pat in _DATA_PATTERNS: + m = pat.search(msg) + if m: + matched.append(m.group()) + + if matched: + # If the only match is a generic question word ("what is", "show me") + # but no domain-specific data keyword, treat as ambiguous + domain_matches = [m for m in matched if m.lower() not in + {"what is", "what are", "show me", "how much", "check", "status", "state"}] + if not domain_matches: + return QueryClass(requires_data=False, category="ambiguous") + return QueryClass(requires_data=True, category="data", matched_keywords=matched) + + # Default: ambiguous — allow LLM to decide + return QueryClass(requires_data=False, category="ambiguous") + + +# --------------------------------------------------------------------------- +# 2. Response validator — deterministic rule checks +# --------------------------------------------------------------------------- + +@dataclass +class RuleViolation: + """A detected rule violation in a chatbot response.""" + rule_name: str + severity: str # "block" or "warn" + message: str + correction: str # What to tell the user instead + + +def validate_response( + response_text: str, + action: Optional[str] = None, + context: Optional[dict] = None, +) -> list[RuleViolation]: + """ + Check a chatbot response for rule violations. + + Parameters + ---------- + response_text : str + The chatbot's response text. + action : str or None + Extracted action ("shade", "irrigate", "no_action", etc.). + context : dict or None + Current conditions: hour, month, temp_c, stage_id, etc. + + Returns + ------- + List of RuleViolation objects. Empty list = all good. + """ + violations: list[RuleViolation] = [] + ctx = context or {} + text_lower = response_text.lower() + + hour = ctx.get("hour") + month = ctx.get("month") + temp_c = ctx.get("temp_c") + stage_id = ctx.get("stage_id") + + # Detect if the response recommends shading + _recommends_shade = _text_recommends_shading(text_lower) + + # Rule: No shading before NO_SHADE_BEFORE_HOUR + if _recommends_shade and hour is not None and hour < NO_SHADE_BEFORE_HOUR: + violations.append(RuleViolation( + rule_name="no_shade_before_10", + severity="block", + message=f"Response recommends shading before {NO_SHADE_BEFORE_HOUR}:00.", + correction=( + "Morning light is critical for carbon fixation. " + f"Shading should not be recommended before {NO_SHADE_BEFORE_HOUR}:00 regardless " + "of temperature. Panels should remain at full tracking." + ), + )) + + # Rule: No shading in restricted months (unless extreme) + if _recommends_shade and month in NO_SHADE_MONTHS: + # Check if the response mentions extreme conditions + _mentions_extreme = any(w in text_lower for w in [ + "extreme", "lethal", "emergency", "severe sunburn", "last resort", + ]) + if not _mentions_extreme: + violations.append(RuleViolation( + rule_name="no_shade_in_may", + severity="block", + message="Response recommends shading in May without citing extreme conditions.", + correction=( + "May is the flowering/fruit-set period. Shading should be " + "avoided in May unless there is extreme heat causing lethal " + "stress. Panels should remain at full tracking." + ), + )) + + # Rule: Below transition temp shading hurts (RuBP-limited) + if _recommends_shade and temp_c is not None and temp_c < NO_SHADE_TLEAF_BELOW: + violations.append(RuleViolation( + rule_name="temperature_transition", + severity="warn", + message=f"Response recommends shading at {temp_c:.0f}°C (below 28°C transition zone).", + correction=( + f"At {temp_c:.0f}°C, photosynthesis is RuBP-limited — " + f"the vine needs light, not shade. Shading would reduce " + f"photosynthesis. Keep panels at full tracking." + ), + )) + + # Rule: Dormant season — shading is irrelevant, not harmful + if stage_id in ("winter_dormancy",) and _recommends_shade: + violations.append(RuleViolation( + rule_name="no_leaves_no_shade_problem", + severity="warn", + message="Response discusses shading during dormancy.", + correction=( + "The vine is dormant with no leaves. Shading is irrelevant " + "(not harmful, just pointless). Panels should track for " + "maximum energy." + ), + )) + + # Rule: "No shading" answers must explain why + _recommends_no_shade = _text_recommends_no_shading(text_lower) + if _recommends_no_shade: + _has_reason = any(reason in text_lower for reason in [ + "light-limited", "rubp", "need light", "needs light", + "full sun", "below 30", "below 28", + "dormant", "no leaves", "no canopy", + "night", "dark", "no radiation", "ghi", "no sun", + "carbon fixation", "morning light", + "not photosynthesi", "not active", + ]) + if not _has_reason: + violations.append(RuleViolation( + rule_name="no_shading_must_explain", + severity="warn", + message="Response says 'no shading' without explaining why.", + correction=( + "When recommending no shading, always explain the reason: " + "is the vine light-limited (T < 30°C), dormant (no leaves), " + "or is there no radiation? The farmer needs to understand why." + ), + )) + + return violations + + +# Shared keyword lists for shading detection heuristics +_POSITIVE_SHADE_PHRASES = [ + "recommend shading", "should shade", "activate shading", + "tilt the panel", "move the panel", "adjust the panel", + "shade the vine", "shade your vine", "shading would help", + "shading is recommended", "suggest shading", "consider shading", + "apply shading", "deploy shading", "enable shading", + "recommend anti-tracking", "switch to anti-tracking", +] + +_NEGATIVE_SHADE_PHRASES = [ + "should not shade", "don't shade", "no shading", + "avoid shading", "shading is not", "not recommend shading", + "do not shade", "keep panels tracking", "full tracking", + "shading would reduce", "shading would hurt", + "shading is irrelevant", "shading is unnecessary", + "i would not recommend shading", "i don't recommend shading", + "no shading needed", "shading is not needed", + "no need to shade", "no need for shading", +] + + +def _text_recommends_shading(text_lower: str) -> bool: + """Heuristic: does the response recommend activating shade?""" + has_positive = any(p in text_lower for p in _POSITIVE_SHADE_PHRASES) + has_negative = any(p in text_lower for p in _NEGATIVE_SHADE_PHRASES) + # If both present, the negative usually wins (e.g. "some might suggest shading, but I don't recommend it") + return has_positive and not has_negative + + +def _text_recommends_no_shading(text_lower: str) -> bool: + """Heuristic: does the response explicitly recommend NOT shading?""" + return any(p in text_lower for p in _NEGATIVE_SHADE_PHRASES) + + +# --------------------------------------------------------------------------- +# 3. Confidence estimation +# --------------------------------------------------------------------------- + +def estimate_confidence( + tool_called: bool, + tool_succeeded: bool, + data_age_minutes: Optional[float], + tool_name: Optional[str] = None, +) -> str: + """ + Estimate response confidence based on data grounding. + + Returns one of: "high", "medium", "low", "insufficient_data". + """ + # No tool called at all + if not tool_called: + return "low" # answering from system prompt / training data only + + # Tool was called but failed + if not tool_succeeded: + return "insufficient_data" + + # Tool succeeded — check data freshness + if data_age_minutes is None: + # Computed result (FvCB, shading sim) — no age concept + return "high" + + if data_age_minutes <= 30: + return "high" + elif data_age_minutes <= 120: + return "medium" + else: + return "low" + + +# --------------------------------------------------------------------------- +# 4. Source tagging helper +# --------------------------------------------------------------------------- + +# Map tool names to human-readable data sources +_TOOL_SOURCES = { + "get_current_weather": "IMS Station 43 (Sde Boker)", + "get_weather_history": "IMS Station 43 (Sde Boker)", + "get_vine_state": "ThingsBoard sensors (on-site)", + "get_sensor_history": "ThingsBoard sensors (on-site)", + "calc_photosynthesis": "Farquhar FvCB model (computed)", + "predict_photosynthesis_ml": "ML ensemble (computed)", + "get_ps_forecast": "FvCB day-ahead forecast (computed)", + "simulate_shading": "Shadow model simulation (computed)", + "compare_tilt_angles": "Shadow model simulation (computed)", + "get_daily_schedule": "Shadow model schedule (computed)", + "get_energy_generation": "IMS + analytical model (estimated)", + "get_energy_history": "IMS + analytical model (estimated)", + "predict_energy": "IMS + analytical model (estimated)", + "run_day_ahead_advisory": "Gemini day-ahead advisor", + "explain_biology_rule": "Built-in biology rules", + "get_photosynthesis_3d": "3D scene (computed)", +} + + +def get_source_label(tool_name: str) -> str: + """Return a human-readable source label for a tool.""" + return _TOOL_SOURCES.get(tool_name, tool_name) + + +def tag_tool_result(tool_name: str, tool_result: dict) -> dict: + """ + Add source metadata to a tool result before sending to Gemini. + + The tagged result helps Gemini cite sources in its response. + """ + tagged = dict(tool_result) + tagged["_source"] = get_source_label(tool_name) + tagged["_tool"] = tool_name + + # Extract data age if present + age = tool_result.get("age_minutes") + if age is not None: + tagged["_data_age_minutes"] = age + if age > 60: + tagged["_freshness_warning"] = ( + f"This data is {age:.0f} minutes old. " + "Warn the user that conditions may have changed." + ) + + return tagged diff --git a/src/chatbot/llm_data_engineer.py b/src/chatbot/llm_data_engineer.py new file mode 100644 index 0000000000000000000000000000000000000000..547701913d10d660beb5dec4fb5cd23b198a58b8 --- /dev/null +++ b/src/chatbot/llm_data_engineer.py @@ -0,0 +1,559 @@ +""" +LLMDataEngineer: Gemini-assisted sensor data cleaning and feature engineering +for the SolarWine agrivoltaic pipeline. + +Phase 8B tasks: + - llm-data-cleaning : Gemini analyzes sensor stats, returns Z-score/IQR + filter thresholds for automated anomaly detection. + - llm-feature-eng : Gemini confirms feature formulae; module generates + cyclical time features and a Stress Risk Score. +""" + +from __future__ import annotations + +from typing import Optional + +import hashlib +import numpy as np +import pandas as pd + +from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key +from src.time_features import add_cyclical_time_features + + +# --------------------------------------------------------------------------- +# Domain knowledge injected into Gemini prompts +# --------------------------------------------------------------------------- + +SENSOR_CONTEXT = { + "Air1_PAR_ref": { + "description": "Photosynthetically Active Radiation (PAR)", + "unit": "μmol photons m⁻² s⁻¹", + "physical_range": [0, 2500], + "notes": "Solar PAR at surface cannot exceed ~2200–2500 under any realistic sky. " + "Values above 3000 are sensor artefacts.", + }, + "Air1_leafTemperature_ref": { + "description": "Leaf (canopy) temperature", + "unit": "°C", + "physical_range": [-5, 55], + "notes": "Grape leaf temperature in the Negev can reach ~45°C on extreme days, " + "but values above 55°C are physiologically impossible for a living leaf.", + }, + "Air1_airTemperature_ref": { + "description": "Air temperature near canopy", + "unit": "°C", + "physical_range": [0, 50], + "notes": "Sde Boker record high is ~47°C. Values above 50°C or below 0°C " + "during the growing season (May–Sep) are sensor faults.", + }, + "Air1_VPD_ref": { + "description": "Vapour Pressure Deficit", + "unit": "kPa", + "physical_range": [0, 7], + "notes": "Desert VPD rarely exceeds 6–7 kPa even in extreme heat. " + "Negative values and values above 8 kPa are sensor errors.", + }, + "Air1_airHumidity_ref": { + "description": "Relative Humidity", + "unit": "%", + "physical_range": [0, 100], + "notes": "Must be in [0, 100]. Values outside this range are invalid.", + }, + "Air1_CO2_ref": { + "description": "CO₂ concentration (raw sensor, corrected ×0.7 by SensorDataLoader)", + "unit": "ppm (raw)", + "physical_range": [400, 4000], + "notes": "Raw sensor reads ~30% too high (corrected ×0.7 in the data pipeline). " + "Raw values above 4000 ppm or below 400 ppm are sensor artefacts. " + "Post-correction (~280–2800 ppm) values above 2000 ppm indicate sensor drift.", + }, +} + +_SYSTEM_PROMPT_CLEANING = ( + "You are a precision-agriculture sensor data quality engineer. " + "You are given descriptive statistics for sensor columns from a vineyard " + "in the Negev desert, Israel (Sde Boker region, Semillon grapevine, May–September). " + "Your task: for each column, propose anomaly filter thresholds to flag " + "or remove invalid readings. " + "Return ONLY a JSON object (no markdown, no explanation) with the following schema:\n" + "{\n" + ' "": {\n' + ' "lower_bound": ,\n' + ' "upper_bound": ,\n' + ' "zscore_threshold": ,\n' + ' "iqr_multiplier": ,\n' + ' "rationale": ""\n' + " },\n" + " ...\n" + "}" +) + +_SYSTEM_PROMPT_FEATURES = ( + "You are a precision-agriculture feature engineering expert specialising in " + "grapevine physiology and agrivoltaic systems. " + "Given the available sensor columns, propose the exact mathematical formulae " + "for a Stress Risk Score that combines VPD and (optionally) CWSI. " + "Return ONLY a JSON object (no markdown, no explanation) with schema:\n" + "{\n" + ' "stress_risk_score": {\n' + ' "formula_description": "",\n' + ' "vpd_weight": ,\n' + ' "cwsi_weight": ,\n' + ' "vpd_clip_max": ,\n' + ' "cwsi_clip_max": ,\n' + ' "rationale": ""\n' + " }\n" + "}" +) + + +# --------------------------------------------------------------------------- +# Helper: robust JSON extraction from LLM response +# --------------------------------------------------------------------------- + +def _extract_json(text: str) -> dict: + """Thin wrapper around the shared genai_utils implementation.""" + return extract_json_object(text) + + +# --------------------------------------------------------------------------- +# Main class +# --------------------------------------------------------------------------- + +class LLMDataEngineer: + """ + Gemini-assisted sensor data cleaning and feature engineering. + + Usage + ----- + engineer = LLMDataEngineer() + df_clean, thresholds, features_meta = engineer.run_pipeline(df) + """ + + def __init__( + self, + model_name: str = "gemini-2.5-flash", + api_key: Optional[str] = None, + verbose: bool = True, + ): + self.model_name = model_name + self._api_key = api_key + self._client = None + self.verbose = verbose + # Caches keyed by content hash — avoids repeated Gemini calls + self._threshold_cache: dict[str, dict] = {} + self._feature_spec_cache: dict[str, dict] = {} + + # ------------------------------------------------------------------ + # Internal helpers + # ------------------------------------------------------------------ + + @property + def api_key(self) -> str: + return get_google_api_key(self._api_key) + + @property + def client(self): + if self._client is None: + self._client = get_genai_client(self._api_key) + return self._client + + def _call_gemini(self, system_prompt: str, user_prompt: str) -> str: + """Send a prompt to Gemini and return the raw text response.""" + response = self.client.models.generate_content( + model=self.model_name, + contents=user_prompt, + config={"system_instruction": system_prompt}, + ) + return response.text + + @staticmethod + def _hash_key(*parts: str) -> str: + """Create a short hash from string parts for cache keying.""" + return hashlib.md5("|".join(parts).encode()).hexdigest()[:12] + + def _log(self, msg: str) -> None: + if self.verbose: + print(f"[LLMDataEngineer] {msg}") + + # ------------------------------------------------------------------ + # Step 1: Anomaly detection — ask Gemini for filter thresholds + # ------------------------------------------------------------------ + + def analyze_anomalies( + self, + df: pd.DataFrame, + columns: Optional[list[str]] = None, + ) -> dict: + """ + Send descriptive statistics to Gemini and receive per-column + anomaly filter thresholds. + + Parameters + ---------- + df : DataFrame with sensor measurements + columns : subset of columns to analyze; defaults to SENSOR_CONTEXT keys + + Returns + ------- + dict mapping column_name → {lower_bound, upper_bound, + zscore_threshold, iqr_multiplier, rationale} + """ + target_cols = [ + c for c in (columns or list(SENSOR_CONTEXT.keys())) if c in df.columns + ] + if not target_cols: + raise ValueError("No recognized sensor columns found in DataFrame.") + + stats = df[target_cols].describe(percentiles=[0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99]) + + # Build prompt with stats + domain context + lines = [ + "Analyze the following sensor columns from a vineyard dataset.", + "For each column, the physical context and expected range are provided.", + "", + ] + for col in target_cols: + ctx = SENSOR_CONTEXT.get(col, {}) + lines.append(f"Column: {col}") + if ctx: + lines.append(f" Description : {ctx['description']} ({ctx['unit']})") + lines.append(f" Expected range : {ctx['physical_range']}") + lines.append(f" Domain notes : {ctx['notes']}") + lines.append(" Observed statistics:") + for stat_name, val in stats[col].items(): + lines.append(f" {stat_name:10s}: {val:.4f}") + lines.append("") + + user_prompt = "\n".join(lines) + + # Check cache (same stats → same thresholds) + cache_key = self._hash_key(user_prompt) + if cache_key in self._threshold_cache: + self._log("Using cached anomaly thresholds (same data fingerprint).") + return self._threshold_cache[cache_key] + + self._log("Querying Gemini for anomaly thresholds …") + + try: + raw = self._call_gemini(_SYSTEM_PROMPT_CLEANING, user_prompt) + thresholds = _extract_json(raw) + except Exception as exc: + self._log(f"Gemini API error: {exc}. Using statistical fallback.") + thresholds = self._fallback_thresholds(df, target_cols) + + self._threshold_cache[cache_key] = thresholds + self._log(f"Received thresholds for {len(thresholds)} columns.") + return thresholds + + @staticmethod + def _fallback_thresholds(df: pd.DataFrame, cols: list[str]) -> dict: + """Conservative statistical fallback used when API is unavailable.""" + result = {} + for col in cols: + ctx = SENSOR_CONTEXT.get(col, {}) + phys = ctx.get("physical_range", [None, None]) + result[col] = { + "lower_bound": phys[0], + "upper_bound": phys[1], + "zscore_threshold": 3.5, + "iqr_multiplier": 3.0, + "rationale": "Statistical fallback (Gemini unavailable).", + } + return result + + # ------------------------------------------------------------------ + # Step 2: Apply cleaning + # ------------------------------------------------------------------ + + def apply_cleaning( + self, + df: pd.DataFrame, + thresholds: dict, + strategy: str = "clip", + ) -> pd.DataFrame: + """ + Apply Gemini-generated thresholds to clean the sensor DataFrame. + + Parameters + ---------- + df : raw sensor DataFrame + thresholds : dict from analyze_anomalies() + strategy : 'clip' — clamp values to [lower_bound, upper_bound] + 'drop' — drop rows where any column is out of bounds + 'nan' — replace out-of-bounds values with NaN + + Returns + ------- + Cleaned DataFrame (copy). + """ + result = df.copy() + report_lines = ["Anomaly cleaning report:"] + + for col, thresh in thresholds.items(): + if col not in result.columns: + continue + series = result[col] + lower = thresh.get("lower_bound") + upper = thresh.get("upper_bound") + + # Count violations before cleaning + mask_low = (series < lower) if lower is not None else pd.Series(False, index=series.index) + mask_high = (series > upper) if upper is not None else pd.Series(False, index=series.index) + + # Z-score based detection (secondary flag) + z_thresh = thresh.get("zscore_threshold", 3.5) + z_scores = (series - series.mean()) / (series.std() + 1e-9) + mask_zscore = z_scores.abs() > z_thresh + + # IQR-based detection (tertiary flag) + iqr_mult = thresh.get("iqr_multiplier", 3.0) + q1, q3 = series.quantile(0.25), series.quantile(0.75) + iqr = q3 - q1 + mask_iqr = (series < q1 - iqr_mult * iqr) | (series > q3 + iqr_mult * iqr) + + # Union of all anomaly flags + mask_anomaly = mask_low | mask_high | (mask_zscore & mask_iqr) + n_anomalies = int(mask_anomaly.sum()) + + if n_anomalies > 0: + report_lines.append( + f" {col}: {n_anomalies} anomalies ({n_anomalies / len(series) * 100:.2f}%)" + ) + + if strategy == "clip": + result[col] = series.clip( + lower=lower if lower is not None else -np.inf, + upper=upper if upper is not None else np.inf, + ) + elif strategy == "nan": + result.loc[mask_anomaly, col] = np.nan + elif strategy == "drop": + result = result.loc[~mask_anomaly].copy() + else: + raise ValueError(f"Unknown strategy '{strategy}'. Use 'clip', 'nan', or 'drop'.") + + self._log("\n".join(report_lines)) + return result + + # ------------------------------------------------------------------ + # Step 3: Feature engineering + # ------------------------------------------------------------------ + + def get_feature_spec( + self, + available_cols: list[str], + ) -> dict: + """ + Ask Gemini to confirm the Stress Risk Score formula given available columns. + + Returns a feature spec dict with vpd_weight, cwsi_weight, etc. + Falls back to a biologically motivated default if API is unavailable. + """ + has_cwsi = any("cwsi" in c.lower() or "CWSI" in c for c in available_cols) + + # Cache key: just depends on whether CWSI is available + cache_key = f"cwsi={has_cwsi}" + if cache_key in self._feature_spec_cache: + self._log("Using cached feature spec.") + return self._feature_spec_cache[cache_key] + + user_prompt = ( + f"Available sensor columns: {available_cols}.\n" + f"CWSI column available: {has_cwsi}.\n" + "Propose weights and clip bounds for a Stress Risk Score that linearly " + "combines normalised VPD and (if available) normalised CWSI. " + "The score should be in [0, 1] and reflect acute heat/drought stress " + "for Semillon grapevine in a desert agrivoltaic system." + ) + self._log("Querying Gemini for Stress Risk Score formula …") + try: + raw = self._call_gemini(_SYSTEM_PROMPT_FEATURES, user_prompt) + spec = _extract_json(raw).get("stress_risk_score", {}) + except Exception as exc: + self._log(f"Gemini API error: {exc}. Using default feature spec.") + spec = {} + + # Merge with defaults so the dict is always complete + defaults = { + "formula_description": "Normalised weighted sum of VPD and CWSI stress signals", + "vpd_weight": 0.6, + "cwsi_weight": 0.4, + "vpd_clip_max": 6.0, + "cwsi_clip_max": 1.0, + "rationale": ( + "VPD dominates stomatal response (weight 0.6); " + "CWSI captures cumulative water status (weight 0.4)." + ), + } + for k, v in defaults.items(): + spec.setdefault(k, v) + + self._feature_spec_cache[cache_key] = spec + return spec + + def engineer_features( + self, + df: pd.DataFrame, + timestamp_col: str = "time", + cwsi_col: Optional[str] = None, + vpd_col: str = "Air1_VPD_ref", + feature_spec: Optional[dict] = None, + ) -> pd.DataFrame: + """ + Add engineered features to the sensor DataFrame. + + New columns added + ----------------- + hour_sin, hour_cos – cyclical encoding of hour-of-day + doy_sin, doy_cos – cyclical encoding of day-of-year + stress_risk_score – weighted VPD (+ CWSI) stress index in [0, 1] + + Parameters + ---------- + df : sensor DataFrame (original unmodified) + timestamp_col : name of the datetime column (or index if not a column) + cwsi_col : optional CWSI column name; if None, stress score uses VPD only + vpd_col : VPD column name + feature_spec : pre-fetched spec from get_feature_spec(); fetched if None + + Returns + ------- + DataFrame copy with additional feature columns. + """ + result = df.copy() + + # --- Cyclical time features (via shared utility) --- + ts_col = timestamp_col if timestamp_col in result.columns else None + use_index = ts_col is None and isinstance(result.index, pd.DatetimeIndex) + if ts_col is not None or use_index: + result = add_cyclical_time_features( + result, + timestamp_col=ts_col, + index_is_timestamp=use_index, + ) + self._log("Added cyclical time features: hour_sin, hour_cos, doy_sin, doy_cos") + else: + self._log("Warning: no timestamp found; skipping cyclical features.") + + # --- Stress Risk Score --- + if vpd_col in result.columns: + if feature_spec is None: + feature_spec = self.get_feature_spec(list(result.columns)) + + vpd_w = float(feature_spec.get("vpd_weight", 0.6)) + cwsi_w = float(feature_spec.get("cwsi_weight", 0.4)) + vpd_max = float(feature_spec.get("vpd_clip_max", 6.0)) + cwsi_max = float(feature_spec.get("cwsi_clip_max", 1.0)) + + vpd_norm = (result[vpd_col].clip(0, vpd_max) / vpd_max).fillna(0.0) + + if cwsi_col and cwsi_col in result.columns: + cwsi_norm = (result[cwsi_col].clip(0, cwsi_max) / cwsi_max).fillna(0.0) + effective_cwsi_w = cwsi_w + effective_vpd_w = vpd_w + else: + # No CWSI — redistribute weight entirely to VPD + cwsi_norm = pd.Series(0.0, index=result.index) + effective_cwsi_w = 0.0 + effective_vpd_w = 1.0 + + score = (effective_vpd_w * vpd_norm + effective_cwsi_w * cwsi_norm).clip(0, 1) + result["stress_risk_score"] = score.round(4) + + self._log( + f"Added stress_risk_score (vpd_weight={effective_vpd_w:.2f}, " + f"cwsi_weight={effective_cwsi_w:.2f})" + ) + else: + self._log(f"Warning: VPD column '{vpd_col}' not found; skipping stress_risk_score.") + + return result + + # ------------------------------------------------------------------ + # Full pipeline + # ------------------------------------------------------------------ + + def run_pipeline( + self, + df: pd.DataFrame, + cleaning_strategy: str = "clip", + timestamp_col: str = "time", + cwsi_col: Optional[str] = None, + vpd_col: str = "Air1_VPD_ref", + ) -> tuple[pd.DataFrame, dict, dict]: + """ + Execute the full LLM data engineering pipeline. + + Steps + ----- + 1. Gemini analyzes column stats → anomaly thresholds + 2. Apply cleaning (clip / nan / drop) + 3. Gemini confirms feature spec → engineer features + + Returns + ------- + (df_engineered, thresholds, feature_spec) + """ + self._log("=== LLM Data Engineering Pipeline ===") + + # Step 1: anomaly thresholds + thresholds = self.analyze_anomalies(df) + + # Step 2: clean + df_clean = self.apply_cleaning(df, thresholds, strategy=cleaning_strategy) + + # Step 3: feature spec + engineering + feature_spec = self.get_feature_spec(list(df_clean.columns)) + df_engineered = self.engineer_features( + df_clean, + timestamp_col=timestamp_col, + cwsi_col=cwsi_col, + vpd_col=vpd_col, + feature_spec=feature_spec, + ) + + new_cols = [c for c in df_engineered.columns if c not in df.columns] + self._log(f"Pipeline complete. New columns: {new_cols}") + return df_engineered, thresholds, feature_spec + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + from pathlib import Path + + DATA_DIR = Path(__file__).resolve().parent.parent / "Data" + sample_path = DATA_DIR / "Seymour" / "sensors_wide_sample.csv" + sensors_path = DATA_DIR / "Seymour" / "sensors_wide.csv" + csv_path = sample_path if sample_path.exists() else sensors_path + + print(f"Loading sensor data from: {csv_path.name}") + df_raw = pd.read_csv(csv_path) + print(f"Shape: {df_raw.shape} | Columns: {list(df_raw.columns)}\n") + + engineer = LLMDataEngineer(verbose=True) + df_out, thresh, feat_spec = engineer.run_pipeline(df_raw) + + print("\n--- Anomaly Thresholds (from Gemini) ---") + for col, t in thresh.items(): + print( + f" {col:35s} lower={t.get('lower_bound')} " + f"upper={t.get('upper_bound')} " + f"z={t.get('zscore_threshold')} " + f"IQR×{t.get('iqr_multiplier')}" + ) + print(f" → {t.get('rationale', '')}") + + print("\n--- Stress Risk Score Spec (from Gemini) ---") + for k, v in feat_spec.items(): + print(f" {k}: {v}") + + print("\n--- Engineered DataFrame Head ---") + eng_cols = ["time", "Air1_PAR_ref", "Air1_VPD_ref", + "hour_sin", "hour_cos", "doy_sin", "doy_cos", "stress_risk_score"] + show = [c for c in eng_cols if c in df_out.columns] + print(df_out[show].head(6).to_string(index=False)) diff --git a/src/chatbot/routing_agent.py b/src/chatbot/routing_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..7598827a8e897db37023f3a28e7fc2692c940817 --- /dev/null +++ b/src/chatbot/routing_agent.py @@ -0,0 +1,233 @@ +""" +RoutingAgent: Gemini-based intelligent model routing for the agrivoltaic +control system. Given real-time telemetry, routes to either the FvCB +mechanistic model or the ML ensemble for photosynthesis prediction. + +Uses gemini-2.5-flash for low-latency (~100ms) routing decisions. +""" + +from __future__ import annotations + +from typing import Optional + +from src.genai_utils import get_genai_client, get_google_api_key + +SYSTEM_PROMPT = ( + "You are a model routing supervisor for an agrivoltaic vineyard control system. " + "Given real-time telemetry, decide which photosynthesis model to use:\n" + "- MODEL_A (FvCB mechanistic): accurate under standard conditions (T<30C, low stress)\n" + "- MODEL_B (ML ensemble): handles non-linear stress, high VPD, extreme heat\n" + "Reply with ONLY 'MODEL_A' or 'MODEL_B'." +) + + +class RoutingAgent: + """Model router for FvCB vs ML ensemble selection. + + Uses deterministic rules first (covers >90% of cases without any API call). + Falls back to Gemini only for ambiguous transition-zone conditions. + """ + + # Thresholds for rule-based routing (avoids API calls) + _TEMP_CLEAR_FVCB = 28.0 # clearly FvCB territory + _TEMP_CLEAR_ML = 32.0 # clearly ML territory + _VPD_CLEAR_ML = 2.5 # high VPD → ML + _CWSI_CLEAR_ML = 0.4 # water stress → ML + + def __init__( + self, + model_name: str = "gemini-2.5-flash", + api_key: Optional[str] = None, + ): + self.model_name = model_name + self._api_key = api_key + self._client = None + + @property + def api_key(self) -> str: + return get_google_api_key(self._api_key) + + @property + def client(self): + """Lazy-init the Gemini client.""" + if self._client is None: + self._client = get_genai_client(self._api_key) + return self._client + + # ------------------------------------------------------------------ + # Rule-based fast path (no API call) + # ------------------------------------------------------------------ + + @classmethod + def _rule_based_route(cls, telemetry: dict) -> Optional[str]: + """Return 'fvcb' or 'ml' if rules are decisive, else None.""" + temp = telemetry.get("temp_c") + vpd = telemetry.get("vpd") + cwsi = telemetry.get("cwsi") + + # High stress signals → ML (no ambiguity) + if temp is not None and temp >= cls._TEMP_CLEAR_ML: + return "ml" + if vpd is not None and vpd >= cls._VPD_CLEAR_ML: + return "ml" + if cwsi is not None and cwsi >= cls._CWSI_CLEAR_ML: + return "ml" + + # Clearly cool/calm → FvCB + if temp is not None and temp < cls._TEMP_CLEAR_FVCB: + if vpd is None or vpd < cls._VPD_CLEAR_ML: + if cwsi is None or cwsi < cls._CWSI_CLEAR_ML: + return "fvcb" + + return None # transition zone — need LLM + + # ------------------------------------------------------------------ + # Gemini routing (only for ambiguous cases) + # ------------------------------------------------------------------ + + @staticmethod + def _format_telemetry(telemetry: dict) -> str: + """Format telemetry dict into a readable prompt string.""" + lines = ["Current telemetry:"] + field_labels = { + "temp_c": "Air temperature", + "ghi_w_m2": "GHI (irradiance)", + "cwsi": "CWSI (crop water stress)", + "vpd": "VPD (vapor pressure deficit)", + "wind_speed_ms": "Wind speed", + "hour": "Hour of day", + } + for key, label in field_labels.items(): + if key in telemetry: + val = telemetry[key] + lines.append(f" {label}: {val}") + return "\n".join(lines) + + @staticmethod + def _parse_response(text: str) -> str: + """Extract model choice from Gemini response. + + Returns 'fvcb' or 'ml'. Falls back to 'fvcb' on ambiguous response. + """ + text_upper = text.strip().upper() + if "MODEL_B" in text_upper: + return "ml" + return "fvcb" + + def route(self, telemetry: dict) -> str: + """Route a single telemetry reading to fvcb or ml. + + Uses deterministic rules first; only calls Gemini for ambiguous cases. + + Parameters + ---------- + telemetry : dict with keys like temp_c, ghi_w_m2, cwsi, vpd, + wind_speed_ms, hour + + Returns + ------- + 'fvcb' or 'ml' + """ + # Fast path: rule-based (no API call) + rule_result = self._rule_based_route(telemetry) + if rule_result is not None: + return rule_result + + # Slow path: Gemini for transition-zone ambiguity + prompt = self._format_telemetry(telemetry) + try: + response = self.client.models.generate_content( + model=self.model_name, + contents=prompt, + config={"system_instruction": SYSTEM_PROMPT}, + ) + return self._parse_response(response.text) + except Exception as e: + print(f"RoutingAgent: API error ({e}), falling back to fvcb") + return "fvcb" + + def route_batch(self, telemetry_rows: list[dict]) -> list[str]: + """Route a batch of telemetry readings. + + Uses rule-based routing where possible; batches remaining ambiguous + rows into a single Gemini call. + """ + results = [None] * len(telemetry_rows) + ambiguous_indices = [] + + # First pass: rule-based + for i, row in enumerate(telemetry_rows): + rule_result = self._rule_based_route(row) + if rule_result is not None: + results[i] = rule_result + else: + ambiguous_indices.append(i) + + # Second pass: single batched Gemini call for ambiguous rows + if ambiguous_indices: + lines = [ + "Route each of the following telemetry readings to MODEL_A or MODEL_B.", + "Reply with one line per reading: ': MODEL_A' or ': MODEL_B'.", + "", + ] + for idx in ambiguous_indices: + lines.append(f"Reading {idx}: {self._format_telemetry(telemetry_rows[idx])}") + lines.append("") + + try: + response = self.client.models.generate_content( + model=self.model_name, + contents="\n".join(lines), + config={"system_instruction": SYSTEM_PROMPT}, + ) + resp_text = response.text.upper() + for idx in ambiguous_indices: + # Look for this index's answer in the response + if f"{idx}: MODEL_B" in resp_text or f"{idx}:MODEL_B" in resp_text: + results[idx] = "ml" + else: + results[idx] = "fvcb" + except Exception as e: + print(f"RoutingAgent: batch API error ({e}), falling back to fvcb") + for idx in ambiguous_indices: + results[idx] = "fvcb" + + return results + + +# ---------------------------------------------------------------------- +# CLI entry point +# ---------------------------------------------------------------------- + +if __name__ == "__main__": + sample_scenarios = [ + { + "name": "Cool morning", + "telemetry": { + "temp_c": 22.0, "ghi_w_m2": 350.0, "cwsi": 0.15, + "vpd": 0.8, "wind_speed_ms": 2.0, "hour": 8, + }, + }, + { + "name": "Hot afternoon, high stress", + "telemetry": { + "temp_c": 38.0, "ghi_w_m2": 950.0, "cwsi": 0.72, + "vpd": 3.5, "wind_speed_ms": 1.0, "hour": 14, + }, + }, + { + "name": "Moderate conditions", + "telemetry": { + "temp_c": 29.5, "ghi_w_m2": 680.0, "cwsi": 0.35, + "vpd": 1.8, "wind_speed_ms": 3.0, "hour": 11, + }, + }, + ] + + agent = RoutingAgent() + print("Gemini Routing Agent — Sample Scenarios\n") + + for scenario in sample_scenarios: + choice = agent.route(scenario["telemetry"]) + model_label = "FvCB (mechanistic)" if choice == "fvcb" else "ML ensemble" + print(f" {scenario['name']:30s} → {choice:4s} ({model_label})") diff --git a/src/chatbot/vineyard_chatbot.py b/src/chatbot/vineyard_chatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..82a8256b4ba78a2c74bd84bb55accd83a49260a5 --- /dev/null +++ b/src/chatbot/vineyard_chatbot.py @@ -0,0 +1,939 @@ +""" +VineyardChatbot: Gemini-powered conversational advisor for the SolarWine +agrivoltaic system. + +Provides a natural-language interface for farmers to ask about shading +decisions, photosynthesis, weather conditions, vine biology, and energy +generation. Uses a DataHub of loosely-coupled service providers for all +data access — the chatbot never imports data clients directly. + +Anti-hallucination guardrails (v2): + - Structured responses with confidence, sources, and caveats + - Mandatory tool grounding for data questions + - Post-response rule validation + - Source-tagged tool results + - Confidence estimation based on data freshness +""" + +from __future__ import annotations + +import json +import re +import traceback +from dataclasses import dataclass, field +from typing import Optional + +from src.data_providers import DataHub +from src.genai_utils import extract_json_object, get_genai_client, get_google_api_key +from src.chatbot.guardrails import ( + classify_query, + estimate_confidence, + get_source_label, + tag_tool_result, + validate_response, +) + + +def _extract_json(text: str) -> dict: + """Thin wrapper around the shared genai_utils implementation.""" + return extract_json_object(text) + + +# --------------------------------------------------------------------------- +# Data structures +# --------------------------------------------------------------------------- + +@dataclass +class ChatResponse: + """Structured response from the chatbot with grounding metadata.""" + message: str + tool_calls: list[dict] = field(default_factory=list) + data: dict = field(default_factory=dict) + # --- Grounding metadata (v2) --- + confidence: str = "low" # high / medium / low / insufficient_data + sources: list[str] = field(default_factory=list) + caveats: list[str] = field(default_factory=list) + rule_violations: list[dict] = field(default_factory=list) + # --- Dual-channel advisory (v3) --- + response_mode: str = "info" # "info" (factual) or "advisory" (recommendation) + + +# --------------------------------------------------------------------------- +# Biology rules lookup (shared knowledge base) +# --------------------------------------------------------------------------- + +BIOLOGY_RULES = { + "site_location": ( + "The vineyard site is in Yeruham, Israel (Seymour experimental plot). " + "Weather data is from IMS station 43 (Sde Boker, Negev). Timezone is always " + "Asia/Jerusalem (Israel Standard Time / Israel Daylight Time). All timestamps " + "from tools (get_current_weather, get_vine_state, etc.) are in Israel local time. " + "When the user asks about 'right now' or 'current' conditions, interpret the " + "time in the tool result as Israel local time (e.g. 15:16 = afternoon in Yeruham)." + ), + "temperature_transition": ( + "Below 30\u00b0C, Semillon photosynthesis is RuBP-limited (light is the " + "bottleneck \u2014 shading HURTS). Above 30\u00b0C, it becomes Rubisco-limited " + "(heat is the bottleneck \u2014 shading MAY help). The transition is gradual " + "(28\u201332\u00b0C)." + ), + "no_shade_before_10": ( + "Morning light is critical for carbon fixation. Never shade before " + "10:00 regardless of temperature." + ), + "no_shade_in_may": ( + "May is the flowering/fruit-set period. Yield protection has priority: " + "avoid shading in May under normal conditions because even small losses " + "can reduce cluster number and berry set. Only introduce shade in May " + "as a last resort in extreme heat to prevent serious damage (e.g. " + "severe sunburn or lethal stress)." + ), + "cwsi_threshold": ( + "Crop Water Stress Index > 0.4 indicates real water stress. Below 0.4, " + "the vine is coping adequately." + ), + "berry_sunburn": ( + "Direct exposure at air temperature > 35\u00b0C risks berry sunburn, " + "especially on the southwest-facing side of clusters in the afternoon." + ), + "energy_budget": ( + "Primary objective is to maximise annual PV energy. The vines have a " + "limited \"protection budget\": up to 5% annual energy sacrifice for " + "shading that clearly protects vine health or yield. Suggested monthly " + "caps: May=0%, Jun=15%, Jul=30%, Aug=30%, Sep=20%, Oct=5%. Stay below " + "these caps unless there is an exceptional agronomic reason." + ), + "model_routing": ( + "Use FvCB (Farquhar model) for standard conditions (T < 30\u00b0C, " + "VPD < 2.5 kPa, adequate water). Use ML ensemble for stress conditions " + "(T > 30\u00b0C, high VPD, water stress, or any non-linear regime)." + ), + "phenological_multiplier": ( + "Stress during veraison (berry ripening) is 1.5x more damaging than " + "during vegetative growth. Protect veraison at higher cost." + ), + "irrigation_management": ( + "Aim to keep soil moisture in a comfortable band for Semillon: avoid " + "both chronic dryness and chronic saturation. During vegetative growth " + "allow gentle dry-down between irrigations; during flowering and " + "veraison, avoid strong swings. Use CWSI and VPD together: if CWSI " + "stays > 0.4 and VPD is high for several hours, consider an irrigation " + "event unless the soil is already wet." + ), + "fertiliser_management": ( + "Prioritise balanced nutrition over aggressive fertiliser use. Apply " + "most nitrogen early in the season (budburst to pre-flowering), reduce " + "near veraison to avoid excessive vigour and delayed ripening. Use " + "leaf tissue tests and visual cues; avoid fertilising stressed vines " + "during acute heat or drought events." + ), + "photosynthesis_3d": ( + "The 3D viewer shows the vine canopy, solar tracker panel and sun position, " + "with each zone coloured by photosynthesis rate (green = rate). Connect a " + "Google API key to use the Vineyard Advisor and generate the interactive " + "3D scene from the chat (e.g. \"Show me the 3D vine and photosynthesis\")." + ), + "no_leaves_no_shade_problem": ( + "When there are no leaves (dormant season, before budburst, or canopy not " + "yet developed), there is no problem with shading \u2014 the vine is not " + "photosynthesising, so shading does not harm it. Do not frame the answer as " + "\"you should not shade\" as if shading would be bad; instead say that " + "shading is irrelevant right now (no leaves to protect), and panel position " + "can favour energy. In the Negev, dormancy is roughly October\u2013March; budburst " + "is typically March\u2013April." + ), + "no_shading_must_explain": ( + "When recommending that the farmer should NOT shade (or that shading is not " + "needed), always give a specific reason tied to photosynthesis or need. " + "Examples: (1) No leaves / dormant \u2014 no photosynthesis to protect, so shading " + "is irrelevant. (2) Full sun is beneficial \u2014 vine is light-limited (T < 30\u00b0C), " + "so shading would reduce photosynthesis; keep panels tracking. (3) No " + "radiation (night or GHI = 0) \u2014 nothing to manage; no shading decision needed. " + "Never say only \"you should not shade\" without explaining the underlying " + "reason (no need for PS protection, or need for full light for PS, etc.)." + ), +} + + +# --------------------------------------------------------------------------- +# System prompt +# --------------------------------------------------------------------------- + +_SYSTEM_PROMPT_TEMPLATE = """\ +You are a friendly vineyard advisor for the SolarWine agrivoltaic system. \ +Site: Yeruham, Israel (Seymour plot, Negev). Weather: IMS station 43 (Sde Boker). \ +Timezone: Asia/Jerusalem — all tool timestamps are Israel local time; interpret \ +"now" and "current" using that timezone (e.g. 15:16 = afternoon in Yeruham). \ +You help the farmer decide when and how much to shade their Semillon grapevines \ +(VSP trellis, 1.2 m canopy) under single-axis solar trackers (1.13 m panel at \ +2.05 m height, 3.0 m row spacing). + +CONTROL OBJECTIVE: +- Primary goal: maximise annual PV energy production. +- Secondary goal: protect vines from heat, water stress, and sunburn using a \ +limited shading budget (see energy_budget rule). +- When in doubt and there is no clear sign of dangerous stress, prefer \ +keeping panels in their energy-maximising position. + +CALENDAR & STAGE HANDLING: +- Do NOT guess the current calendar month. If the user does not supply a \ +date and you do not have a phenology tool result, talk in terms of stages \ +(budburst, flowering, veraison, etc.) rather than asserting a specific month. + +COMMUNICATION STYLE: +- Use plain language; explain jargon when you first use it +- Be concise but thorough +- Always explain WHY a recommendation makes sense biologically +- When uncertain, say so and suggest what data would help + +BIOLOGICAL GUIDELINES (strong constraints; balance them with the energy objective): + +{biology_rules} + +TOOLS AVAILABLE: +You can call tools by including a JSON block in your response with this format: +{{"tool_call": {{"name": "", "args": {{}}}}}} + +Available tools: + +WEATHER & ENVIRONMENT: +- get_current_weather: No args. Returns latest IMS weather readings plus \ +current_time_israel, current_date_israel, current_datetime_israel (the real \ +"now" in Yeruham). Use these for "right now" answers; timestamp_local is \ +when the weather was recorded (may be stale — check age_minutes). +- get_weather_history: Args: start_date (str YYYY-MM-DD), end_date (str \ +YYYY-MM-DD). Returns hourly IMS weather summary for a date range. + +VINE SENSORS (ThingsBoard): +- get_vine_state: No args. Returns the latest on-site sensor readings from \ +ThingsBoard (soil moisture, leaf temperature, fruiting-zone PAR, irrigation \ +status, panel surface temps) comparing TREATMENT area (rows 501-502, under \ +panels) vs REFERENCE area (rows 503-504, open sky). Use when the user asks \ +about current vine conditions, stress levels, soil moisture, or irrigation. +- get_sensor_history: Args: device_type (str: air/crop/soil), area (str: \ +treatment/reference/ambient), hours_back (int, default 24). Returns hourly \ +averages from ThingsBoard time-series data. + +PHOTOSYNTHESIS: +- calc_photosynthesis: Args: PAR (float), Tleaf (float), CO2 (float), \ +VPD (float), Tair (float). Returns net assimilation A and limiting factor \ +using the mechanistic Farquhar (FvCB) model. +- predict_photosynthesis_ml: Args: features (dict, optional). Returns ML \ +ensemble prediction of A. If features not provided, auto-fills from latest \ +IMS cache. Use when conditions are stressful (T>30C, high VPD). +- get_ps_forecast: Args: date (str YYYY-MM-DD, optional). Returns 24-hour \ +predicted A profile (hourly) using time-series forecasting. + +SHADING & TRACKING: +- simulate_shading: Args: angle_offset (float, degrees), hour (int 0-23), \ +date (str YYYY-MM-DD, optional). Returns A comparison shaded vs unshaded. +- compare_tilt_angles: Args: angles (list of ints, optional). Returns A \ +and energy at different tilt offsets. +- get_daily_schedule: Args: stress_threshold (float, optional), \ +shade_angle (int, optional). Returns hourly shading schedule. + +ENERGY: +- get_energy_generation: No args. Returns latest energy generation data \ +from ThingsBoard (today kWh, current power W). +- get_energy_history: Args: hours_back (int, default 24). Returns energy \ +generation time-series. +- predict_energy: Args: date (str YYYY-MM-DD, optional). Returns predicted \ +daily energy generation (kWh) based on IMS GHI forecast and panel geometry. + +ADVISORY: +- run_day_ahead_advisory: Args: date (str YYYY-MM-DD, optional). Returns \ +full stress advisory from the DayAheadAdvisor. + +VISUALIZATION: +- get_photosynthesis_3d: Args: hour (int 0-23, optional), date (str YYYY-MM-DD, \ +optional). Returns a 3D interactive scene showing the vine, solar tracker, sun, \ +and which parts of the canopy are doing how much photosynthesis (green = rate). \ +Use when the user asks to see a 3D view, visualize photosynthesis, or show vine \ +and tracker together. + +BIOLOGY: +- explain_biology_rule: Args: rule_name (str). Returns detailed explanation. \ +Valid names: {rule_names}. + +RESPONSE RULES: +- CRITICAL: When the user asks about current conditions, specific numbers, \ +predictions, sensor readings, or any site-specific data, you MUST call a \ +tool. NEVER answer data questions from your training knowledge — always \ +use a tool to get real data. +- When quoting numbers from tool results, cite the data source and timestamp. \ +Example: "According to IMS Station 43 (recorded 14:30), the temperature is 28°C." +- If tool data is older than 60 minutes, warn: "Note: this data is X minutes old." +- After receiving tool results, explain them in plain language. +- When the answer is "no shading" or "shading not needed", always state the \ +specific reason (no leaves / dormant; light-limited so full sun helps PS; or \ +no radiation). See no_shading_must_explain and no_leaves_no_shade_problem. +- If the user suggests something that violates a biology rule, refuse clearly \ +and explain which rule and why. +- If a tool returns an error or some data is missing, say clearly what data \ +is unavailable. Do NOT invent or estimate values — say "I don't have current \ +data for X" and explain what you can still answer from biology rules. +- If no API key is available, you can still answer biology questions from \ +your built-in knowledge. +- NEVER invent sensor readings, temperatures, or measurements. If you don't \ +have data, say so. +""" + + +# --------------------------------------------------------------------------- +# Build system prompt from BIOLOGY_RULES to avoid drift +# --------------------------------------------------------------------------- + +def _build_system_prompt() -> str: + """Build the system prompt, embedding biology rules from the shared dict.""" + rules_text = "\n\n".join( + f"{i}. {name.upper().replace('_', ' ')}: {text}" + for i, (name, text) in enumerate(BIOLOGY_RULES.items(), 1) + ) + rule_names = ", ".join(BIOLOGY_RULES.keys()) + return _SYSTEM_PROMPT_TEMPLATE.format( + biology_rules=rules_text, rule_names=rule_names, + ) + + +CHATBOT_SYSTEM_PROMPT = _build_system_prompt() + +# RAG-style rule retrieval: keyword index for selecting relevant rules per query +_RULE_KEYWORDS = { + "site_location": ["yeruham", "location", "timezone", "israel", "sde boker", "negev", + "where", "site", "local time"], + "temperature_transition": ["temperature", "30", "rubp", "rubisco", "transition", + "heat", "hot", "cold", "cool", "warm"], + "no_shade_before_10": ["morning", "before 10", "early", "sunrise", "dawn"], + "no_shade_in_may": ["may", "flowering", "fruit set", "spring"], + "cwsi_threshold": ["cwsi", "water stress", "crop water", "drought"], + "berry_sunburn": ["sunburn", "berry", "35", "cluster", "grape"], + "energy_budget": ["budget", "energy", "sacrifice", "ceiling", "5%", "kwh", + "solar", "power", "generation"], + "model_routing": ["model", "fvcb", "farquhar", "ml", "routing", "predict"], + "phenological_multiplier": ["veraison", "ripening", "phenol", "stage"], + "irrigation_management": ["irrigation", "water", "soil", "moisture", "irrigate"], + "fertiliser_management": ["fertiliser", "fertilizer", "nitrogen", "nutrient"], + "photosynthesis_3d": ["3d", "visual", "scene", "show"], + "no_leaves_no_shade_problem": ["no leaves", "dormant", "budburst", "winter"], + "no_shading_must_explain": ["should not shade", "no shading", "don't shade", + "why not shade"], +} + +# Rules that are always included (core constraints) +_PINNED_RULES = {"no_shade_before_10", "energy_budget", "temperature_transition"} + + +def retrieve_relevant_rules(query: str, max_rules: int = 5) -> list[str]: + """Retrieve the most relevant biology rules for a query. + + Returns up to ``max_rules`` rule names, always including pinned rules. + Uses keyword matching (no vector DB needed for 13 rules). + """ + query_lower = query.lower() + scores: dict[str, int] = {} + + for rule_name, keywords in _RULE_KEYWORDS.items(): + score = sum(1 for kw in keywords if kw in query_lower) + if score > 0: + scores[rule_name] = score + + # Always include pinned rules + selected = set(_PINNED_RULES) + # Add scored rules sorted by relevance + for name, _ in sorted(scores.items(), key=lambda x: -x[1]): + if len(selected) >= max_rules: + break + selected.add(name) + + # If we still have room, add remaining pinned rules + return [r for r in BIOLOGY_RULES if r in selected] + + +_ADVISORY_PATTERNS = [re.compile(p, re.IGNORECASE) for p in [ + r"\bshould i\b", r"\bwhat should\b", r"\brecommend\b", r"\badvice\b", + r"\bwhat do i\b", r"\baction\b", r"\bwhat to do\b", r"\bshade now\b", + r"\birrigate\b", r"\bprepare\b", r"\bneed to\b", r"\bhow much\b", + r"\bwhen should\b", r"\bcan i\b", +]] + + +def classify_response_mode(query: str) -> str: + """Classify whether a query needs factual info or actionable advisory. + + Returns 'info' or 'advisory'. + """ + for pat in _ADVISORY_PATTERNS: + if pat.search(query): + return "advisory" + return "info" + + +def build_contextual_prompt(query: str) -> str: + """Build a system prompt with only relevant biology rules for this query.""" + relevant = retrieve_relevant_rules(query) + rules_text = "\n\n".join( + f"{i}. {name.upper().replace('_', ' ')}: {BIOLOGY_RULES[name]}" + for i, name in enumerate(relevant, 1) + ) + rule_names = ", ".join(BIOLOGY_RULES.keys()) + return _SYSTEM_PROMPT_TEMPLATE.format( + biology_rules=rules_text, rule_names=rule_names, + ) + + +# --------------------------------------------------------------------------- +# Main class +# --------------------------------------------------------------------------- + +class VineyardChatbot: + """ + Gemini-powered conversational vineyard advisor. + + All data access is delegated to a DataHub of loosely-coupled services. + The chatbot itself only handles: + - Gemini communication (two-pass tool-calling flow) + - Tool dispatch (thin delegation to hub services) + - Guardrails (query classification, response validation, confidence) + - Offline fallback (keyword-match to biology rules) + + Usage + ----- + bot = VineyardChatbot() # default hub + bot = VineyardChatbot(hub=custom_hub) # injected hub + response = bot.chat("Should I shade right now?", history=[]) + """ + + # Maximum retries when LLM fails to call a required tool + _MAX_TOOL_RETRIES = 1 + + def __init__( + self, + hub: Optional[DataHub] = None, + model_name: str = "gemini-2.5-flash", + api_key: Optional[str] = None, + verbose: bool = False, + ): + self.hub = hub or DataHub.default(verbose=verbose) + self.model_name = model_name + self._api_key = api_key + self._client = None + self.verbose = verbose + + # ------------------------------------------------------------------ + # Gemini client (lazy) + # ------------------------------------------------------------------ + + @property + def api_key(self) -> str: + return get_google_api_key(self._api_key) + + @property + def client(self): + if self._client is None: + self._client = get_genai_client(self._api_key) + return self._client + + @property + def has_api_key(self) -> bool: + try: + get_google_api_key(self._api_key) + return True + except (ValueError, Exception): + return False + + def _log(self, msg: str) -> None: + if self.verbose: + print(f"[VineyardChatbot] {msg}") + + # ------------------------------------------------------------------ + # Tool dispatch — thin delegation to hub services + # ------------------------------------------------------------------ + + def _dispatch_tool(self, tool_name: str, args: dict) -> dict: + """Route a tool call to the correct hub service method.""" + self._log(f"Dispatching tool: {tool_name}({args})") + + # --- Weather --- + if tool_name == "get_current_weather": + return self.hub.weather.get_current() + elif tool_name == "get_weather_history": + return self.hub.weather.get_history( + start_date=str(args.get("start_date", "")), + end_date=str(args.get("end_date", "")), + ) + + # --- Vine sensors --- + elif tool_name == "get_vine_state": + return self.hub.vine_sensors.get_snapshot() + elif tool_name == "get_sensor_history": + return self.hub.vine_sensors.get_history( + device_type=str(args.get("device_type", "crop")), + area=str(args.get("area", "treatment")), + hours_back=int(args.get("hours_back", 24)), + ) + + # --- Photosynthesis --- + elif tool_name == "calc_photosynthesis": + return self.hub.photosynthesis.predict_fvcb( + PAR=float(args.get("PAR", 1500)), + Tleaf=float(args.get("Tleaf", 30)), + CO2=float(args.get("CO2", 400)), + VPD=float(args.get("VPD", 2.0)), + Tair=float(args.get("Tair", 30)), + ) + elif tool_name == "predict_photosynthesis_ml": + return self.hub.photosynthesis.predict_ml( + features=args.get("features"), + ) + elif tool_name == "get_ps_forecast": + return self.hub.photosynthesis.forecast_day_ahead( + target_date=args.get("date"), + ) + + # --- Shading / tracking --- + elif tool_name == "simulate_shading": + return self.hub.photosynthesis.simulate_shading( + angle_offset=float(args.get("angle_offset", 20)), + hour=int(args.get("hour", 13)), + date_str=args.get("date"), + ) + elif tool_name == "compare_tilt_angles": + angles = args.get("angles") + if angles and isinstance(angles, list): + angles = [int(a) for a in angles] + return self.hub.photosynthesis.compare_angles(angles=angles) + elif tool_name == "get_daily_schedule": + return self.hub.photosynthesis.daily_schedule( + stress_threshold=float(args.get("stress_threshold", 2.0)), + shade_angle=int(args.get("shade_angle", 20)), + ) + + # --- Energy --- + elif tool_name == "get_energy_generation": + return self.hub.energy.get_current() + elif tool_name == "get_energy_history": + return self.hub.energy.get_history( + hours_back=int(args.get("hours_back", 24)), + ) + elif tool_name == "predict_energy": + return self.hub.energy.predict( + target_date=args.get("date"), + ) + + # --- Advisory --- + elif tool_name == "run_day_ahead_advisory": + return self.hub.advisory.run_advisory( + target_date=args.get("date"), + ) + + # --- Biology --- + elif tool_name == "explain_biology_rule": + return self.hub.biology.explain_rule( + rule_name=str(args.get("rule_name", "")), + ) + + elif tool_name == "get_photosynthesis_3d": + hour = args.get("hour") + if hour is not None: + hour = int(hour) + return self.hub.photosynthesis.get_photosynthesis_3d_scene( + hour=hour, + date_str=args.get("date"), + ) + + else: + return {"error": f"Unknown tool: {tool_name}"} + + # ------------------------------------------------------------------ + # Gemini communication + # ------------------------------------------------------------------ + + # Number of recent message pairs to keep verbatim + _RECENT_MESSAGES = 6 + # Max older messages to summarize + _MAX_SUMMARY_MESSAGES = 20 + + def _build_messages(self, user_message: str, history: list[dict]) -> list[dict]: + """Build Gemini multi-turn message list with sliding context window. + + Strategy: + - Keep the most recent 6 messages verbatim (for conversational flow) + - Summarize older messages into a single context message + - Always include pinned context (current date, season) + """ + messages = [] + n = len(history) + + if n > self._RECENT_MESSAGES: + # Summarize older messages + older = history[:n - self._RECENT_MESSAGES] + # Take at most _MAX_SUMMARY_MESSAGES from the older portion + older = older[-self._MAX_SUMMARY_MESSAGES:] + summary = self._summarize_history(older) + if summary: + messages.append({ + "role": "user", + "parts": [{"text": f"[Conversation context: {summary}]"}], + }) + messages.append({ + "role": "model", + "parts": [{"text": "Understood, I'll keep that context in mind."}], + }) + + # Recent messages verbatim + recent = history[-self._RECENT_MESSAGES:] if n > self._RECENT_MESSAGES else history + for entry in recent: + role = entry.get("role", "user") + content = entry.get("content", "") + if role == "user": + messages.append({"role": "user", "parts": [{"text": content}]}) + elif role == "assistant": + messages.append({"role": "model", "parts": [{"text": content}]}) + + messages.append({"role": "user", "parts": [{"text": user_message}]}) + return messages + + @staticmethod + def _summarize_history(messages: list[dict]) -> str: + """Create a brief summary of older conversation messages.""" + topics = [] + for entry in messages: + content = entry.get("content", "") + role = entry.get("role", "user") + if role == "user" and content: + # Extract the core question/topic (first sentence or 100 chars) + first_line = content.split("\n")[0][:100] + topics.append(first_line) + + if not topics: + return "" + + # Deduplicate and keep last 5 topics + seen = set() + unique = [] + for t in reversed(topics): + t_lower = t.lower().strip() + if t_lower not in seen: + seen.add(t_lower) + unique.append(t) + unique.reverse() + + return "Earlier in this conversation, the user asked about: " + "; ".join(unique[-5:]) + + def _call_gemini(self, messages: list[dict], system_prompt: str | None = None) -> str: + """Send messages to Gemini and return raw text response.""" + prompt = system_prompt or CHATBOT_SYSTEM_PROMPT + response = self.client.models.generate_content( + model=self.model_name, + contents=messages, + config={"system_instruction": prompt}, + ) + return response.text + + def _extract_tool_call(self, text: str) -> Optional[dict]: + """Try to extract a tool_call JSON from the model response.""" + try: + match = re.search(r'\{\s*"tool_call"\s*:', text) + if not match: + return None + start = match.start() + brace_count = 0 + for i in range(start, len(text)): + if text[i] == "{": + brace_count += 1 + elif text[i] == "}": + brace_count -= 1 + if brace_count == 0: + snippet = text[start:i + 1] + parsed = json.loads(snippet) + return parsed.get("tool_call") + return None + except (json.JSONDecodeError, ValueError): + return None + + # ------------------------------------------------------------------ + # Context gathering (for rule validation) + # ------------------------------------------------------------------ + + def _get_validation_context(self) -> dict: + """Gather current context for post-response rule validation.""" + ctx = {} + try: + from src.phenology import estimate_stage_for_date + from datetime import date, datetime + import zoneinfo + + tz = zoneinfo.ZoneInfo("Asia/Jerusalem") + now = datetime.now(tz=tz) + ctx["hour"] = now.hour + ctx["month"] = now.month + + stage = estimate_stage_for_date(date.today()) + ctx["stage_id"] = stage.id + + # Try to get current temperature from cached weather + try: + wx = self.hub.weather.get_current() + if "error" not in wx: + t = wx.get("air_temperature_c") + if t is not None: + ctx["temp_c"] = float(t) + except Exception: + pass + + except Exception: + pass + return ctx + + # ------------------------------------------------------------------ + # Main chat method + # ------------------------------------------------------------------ + + def chat(self, user_message: str, history: list[dict] | None = None) -> ChatResponse: + """ + Process a user message and return a structured response. + + Flow: + 1. Classify query (data vs knowledge vs greeting) + 2. Send to Gemini (Pass 1) + 3. If data query and no tool call → re-prompt to force tool use + 4. If tool call → dispatch → tag result → send back (Pass 2) + 5. Validate response against biology rules + 6. Estimate confidence + 7. Return structured ChatResponse + """ + history = history or [] + + if not self.has_api_key: + _, response = self._fallback_response(user_message) + return response + + try: + # Step 1: Classify query + query_class = classify_query(user_message) + self._log(f"Query classified: {query_class.category} " + f"(requires_data={query_class.requires_data})") + + # Build contextual system prompt with only relevant biology rules + contextual_prompt = build_contextual_prompt(user_message) + messages = self._build_messages(user_message, history) + self._log("Pass 1: calling Gemini...") + response_text = self._call_gemini(messages, system_prompt=contextual_prompt) + self._log(f"Pass 1 response: {response_text[:200]}...") + + tool_call = self._extract_tool_call(response_text) + + # Step 2: Force tool use if query requires data but LLM didn't call one + if query_class.requires_data and not tool_call: + self._log("Data query but no tool call — re-prompting...") + retry_prompt = ( + "The user is asking about site-specific data or current conditions. " + "You MUST call a tool to answer this — do not use your training " + "knowledge for real-time data. Please call the appropriate tool now." + ) + messages.append({"role": "model", "parts": [{"text": response_text}]}) + messages.append({"role": "user", "parts": [{"text": retry_prompt}]}) + response_text = self._call_gemini(messages, system_prompt=contextual_prompt) + tool_call = self._extract_tool_call(response_text) + + # Step 3: Process tool call if present + tool_name = None + tool_result = None + tool_succeeded = False + data_age = None + + if tool_call: + tool_name = tool_call.get("name", "") + tool_args = tool_call.get("args", {}) + self._log(f"Tool call detected: {tool_name}") + + try: + tool_result = self._dispatch_tool(tool_name, tool_args) + tool_succeeded = "error" not in tool_result + except Exception as exc: + tool_result = {"error": f"Tool execution failed: {exc}"} + tool_succeeded = False + + # Tag result with source metadata + tagged_result = tag_tool_result(tool_name, tool_result) + data_age = tagged_result.get("_data_age_minutes") + + # Build Pass 2 prompt with source citation instructions + source_label = get_source_label(tool_name) + freshness_note = "" + if data_age is not None and data_age > 60: + freshness_note = ( + f"\n\nIMPORTANT: This data is {data_age:.0f} minutes old. " + "Tell the user the data may be stale and conditions may have changed." + ) + + tool_result_text = ( + f"Tool result for {tool_name} " + f"(source: {source_label}):\n" + f"```json\n{json.dumps(tagged_result, indent=2, default=str)}\n```\n\n" + f"Explain this result to the farmer in plain language. " + f"When quoting numbers, mention that they come from {source_label}." + f"{freshness_note}" + ) + + messages.append({"role": "model", "parts": [{"text": response_text}]}) + messages.append({"role": "user", "parts": [{"text": tool_result_text}]}) + + self._log("Pass 2: calling Gemini with tool result...") + final_response = self._call_gemini(messages) + self._log(f"Pass 2 response: {final_response[:200]}...") + else: + final_response = response_text + + # Step 4: Estimate confidence + confidence = estimate_confidence( + tool_called=tool_call is not None, + tool_succeeded=tool_succeeded, + data_age_minutes=data_age, + tool_name=tool_name, + ) + + # Step 5: Post-response rule validation + validation_ctx = self._get_validation_context() + violations = validate_response( + response_text=final_response, + context=validation_ctx, + ) + + caveats: list[str] = [] + violation_dicts: list[dict] = [] + + for v in violations: + violation_dicts.append({ + "rule": v.rule_name, + "severity": v.severity, + "message": v.message, + }) + if v.severity == "block": + # Override the response with the correction + final_response = ( + f"{v.correction}\n\n" + f"*(Original response was overridden because it violated " + f"the **{v.rule_name.replace('_', ' ')}** rule.)*" + ) + confidence = "high" # rule-based override is deterministic + self._log(f"BLOCKED: {v.rule_name} — {v.message}") + elif v.severity == "warn": + caveats.append(v.correction) + self._log(f"WARNING: {v.rule_name} — {v.message}") + + # Build data freshness caveat + if data_age is not None and data_age > 60: + caveats.append( + f"Data is {data_age:.0f} minutes old — conditions may have changed." + ) + + # Build sources list + sources: list[str] = [] + if tool_name: + sources.append(get_source_label(tool_name)) + if not tool_call and query_class.category == "knowledge": + sources.append("Built-in biology rules") + + response_mode = classify_response_mode(user_message) + + return ChatResponse( + message=final_response, + tool_calls=[{"name": tool_name, "args": tool_call.get("args", {}), + "result": tool_result}] if tool_call else [], + data=tool_result if tool_result else {}, + confidence=confidence, + sources=sources, + caveats=caveats, + rule_violations=violation_dicts, + response_mode=response_mode, + ) + + except Exception as exc: + self._log(f"Chat error: {exc}\n{traceback.format_exc()}") + matched, fallback = self._fallback_response(user_message) + if matched: + return fallback + return ChatResponse( + message=( + "I'm having trouble connecting to the AI service right now. " + "You can still ask me about vine biology rules \u2014 I have those " + "built in. For data queries, please check that your Google API " + "key is configured." + ), + confidence="insufficient_data", + sources=[], + caveats=["AI service connection failed"], + ) + + # ------------------------------------------------------------------ + # Fallback (no API key / offline) + # ------------------------------------------------------------------ + + def _fallback_response(self, user_message: str) -> tuple[bool, ChatResponse]: + """Keyword-match fallback when Gemini is unavailable.""" + msg_lower = user_message.lower() + + rule_matches = { + "site_location": ["yeruham", "location", "timezone", "right now", "current time", + "what time", "israel time", "local time"], + "temperature_transition": ["temperature", "30 degree", "30\u00b0", "rubp", "rubisco", + "transition", "heat", "hot"], + "no_shade_before_10": ["morning", "before 10", "early", "sunrise"], + "no_shade_in_may": ["may", "flowering", "fruit set", "fruit-set"], + "cwsi_threshold": ["cwsi", "water stress", "crop water"], + "berry_sunburn": ["sunburn", "berry", "35\u00b0", "35 degree"], + "energy_budget": ["budget", "energy", "sacrifice", "ceiling", "5%", + "monthly", "generation", "kwh", "power", "solar"], + "model_routing": ["model", "fvcb", "farquhar", "ml", "routing", + "predict", "forecast"], + "phenological_multiplier": ["veraison", "ripening", "phenolog"], + "irrigation_management": ["irrigation", "water", "soil moisture"], + "fertiliser_management": ["fertiliser", "fertilizer", "nitrogen", "nutrient"], + "photosynthesis_3d": ["3d", "3D", "visual", "visualize", "visualise", + "model show", "vine and tracker", "sun and vine"], + "no_leaves_no_shade_problem": ["no leaves", "dormant", "budburst", "no canopy"], + "no_shading_must_explain": ["should not shade", "don't shade", "no shading"], + } + + matched_rules = [] + for rule_name, keywords in rule_matches.items(): + if any(kw in msg_lower for kw in keywords): + matched_rules.append(rule_name) + + if matched_rules: + parts = ["Here's what I know about that (from built-in biology rules):\n"] + for rule in matched_rules: + parts.append(f"**{rule.replace('_', ' ').title()}:** {BIOLOGY_RULES[rule]}\n") + parts.append( + "\n*Note: I'm running without an AI connection, so I can only " + "answer from built-in biology rules. Connect a Google API key " + "for full advisory capabilities.*" + ) + return True, ChatResponse( + message="\n".join(parts), + confidence="medium", + sources=["Built-in biology rules"], + ) + + return False, ChatResponse( + message=( + "I'm currently running without an AI connection (no Google API key). " + "I can answer questions about vine biology rules \u2014 try asking about:\n\n" + "- Temperature and shading thresholds\n" + "- Morning light rules\n" + "- May shading restrictions\n" + "- Water stress (CWSI)\n" + "- Berry sunburn risk\n" + "- Energy budget limits\n" + "- Model routing (FvCB vs ML)\n" + "- Veraison protection\n" + "- Irrigation management\n" + "- Energy generation and prediction\n\n" + "*Connect a Google API key for full advisory capabilities " + "(weather, photosynthesis calculations, shading simulations, " + "energy analysis).*" + ), + confidence="insufficient_data", + sources=[], + ) diff --git a/src/chronos_forecaster.py b/src/chronos_forecaster.py new file mode 100644 index 0000000000000000000000000000000000000000..ecf13b302b2908a16eaf5a4180d16f52fe70ba47 --- /dev/null +++ b/src/chronos_forecaster.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.forecasting.chronos_forecaster.""" +from src.forecasting.chronos_forecaster import * # noqa: F401, F403 diff --git a/src/command_arbiter.py b/src/command_arbiter.py new file mode 100644 index 0000000000000000000000000000000000000000..9b692c1dbaafe7f532f62de2c6b1fe3e21f427dd --- /dev/null +++ b/src/command_arbiter.py @@ -0,0 +1,327 @@ +""" +CommandArbiter: priority stack, hysteresis, and fallback logic for tracker commands. + +Sits between the TradeoffEngine output and the physical tracker actuator. +Ensures: + 1. Weather protection and harvest mode override everything. + 2. Safety rail alerts and simulation timeouts fall back to θ_astro. + 3. Hysteresis prevents sub-slot jitter (motor protection). + 4. All fallbacks default to full astronomical tracking (zero energy cost). + +Priority Stack (highest to lowest): + P1 Weather Protection → stow angle (flat, 0°) + P2 Mechanical Harvest → vertical park (90°) + P3 Safety Rail Alert → θ_astro + P4 Simulation Timeout → θ_astro + P5 TradeoffEngine → θ_astro or θ_astro + offset +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime +from enum import Enum +from typing import Optional + +import pandas as pd + +from config.settings import ( + ANGLE_TOLERANCE_DEG, + HYSTERESIS_WINDOW_MIN, + SIMULATION_TIMEOUT_SEC, + WIND_STOW_SPEED_MS, +) + + +class CommandSource(str, Enum): + """Priority source identifiers for tracker commands.""" + WEATHER = "weather_protection" + HARVEST = "harvest_mode" + SAFETY = "safety_fallback" + TIMEOUT = "timeout_fallback" + ENGINE = "engine" + HYSTERESIS = "hysteresis" + INITIAL = "initial" + STABLE = "stable" + + +@dataclass +class ArbiterDecision: + """Output of the CommandArbiter.""" + + angle: float # final tracker tilt angle (degrees) + dispatch: bool # True = send command to actuator + source: str # which priority level decided + requested_angle: float = 0.0 # what was originally requested + suppressed_reason: Optional[str] = None # why dispatch=False (if suppressed) + + def decision_tags(self) -> list[str]: + tags = [f"source:{self.source}"] + if not self.dispatch and self.suppressed_reason: + tags.append(f"suppressed:{self.suppressed_reason}") + return tags + + +class CommandArbiter: + """Priority stack + hysteresis for tracker tilt commands. + + Parameters + ---------- + hysteresis_window_min : float + Minimum time (minutes) between consecutive tilt changes. + angle_tolerance_deg : float + Changes smaller than this are suppressed (motor protection). + """ + + def __init__( + self, + hysteresis_window_min: float = HYSTERESIS_WINDOW_MIN, + angle_tolerance_deg: float = ANGLE_TOLERANCE_DEG, + ): + self.window_min = hysteresis_window_min + self.tolerance = angle_tolerance_deg + self._buffer: list[tuple[datetime, float]] = [] + self.current_angle: float = 0.0 + self._last_dispatch_time: Optional[datetime] = None + + # ------------------------------------------------------------------ + # Priority selection + # ------------------------------------------------------------------ + + def select_source( + self, + engine_result: dict, + safety_valid: bool = True, + sim_time_sec: float = 0.0, + weather_override: Optional[dict] = None, + harvest_active: bool = False, + theta_astro: float = 0.0, + ) -> dict: + """Select the highest-priority command source. + + Parameters + ---------- + engine_result : dict + Output from TradeoffEngine.evaluate_slot() or find_minimum_dose(). + Must contain 'angle' key (or 'chosen_offset_deg' for DoseResult). + safety_valid : bool + False if SafetyRails detected FvCB/ML divergence. + sim_time_sec : float + Wall-clock time the simulation took (seconds). + weather_override : dict or None + If not None, must contain 'target_angle' and optionally 'reason'. + harvest_active : bool + True if mechanical harvesting is in progress. + theta_astro : float + Astronomical tracking angle (safe default). + + Returns + ------- + dict with 'angle', 'source', 'reason' + """ + # P1: Weather protection (wind stow, hail, etc.) + if weather_override is not None: + return { + "angle": weather_override.get("target_angle", 0.0), + "source": CommandSource.WEATHER, + "reason": weather_override.get("reason", "weather override active"), + } + + # P2: Mechanical harvesting — panels go vertical for clearance + if harvest_active: + return { + "angle": 90.0, + "source": CommandSource.HARVEST, + "reason": "mechanical harvesting in progress", + } + + # P3: Safety rail alert — FvCB/ML divergence too high + if not safety_valid: + return { + "angle": theta_astro, + "source": CommandSource.SAFETY, + "reason": "FvCB/ML divergence exceeded threshold; reverting to astronomical", + } + + # P4: Simulation timeout — shadow model took too long + if sim_time_sec > SIMULATION_TIMEOUT_SEC: + return { + "angle": theta_astro, + "source": CommandSource.TIMEOUT, + "reason": f"simulation took {sim_time_sec:.1f}s > {SIMULATION_TIMEOUT_SEC}s limit", + } + + # P5: Normal — use TradeoffEngine result + angle = engine_result.get("angle", theta_astro) + return { + "angle": angle, + "source": CommandSource.ENGINE, + "reason": engine_result.get("action", "tradeoff_engine"), + } + + # ------------------------------------------------------------------ + # Hysteresis filter + # ------------------------------------------------------------------ + + def should_move( + self, + requested_angle: float, + timestamp: datetime, + ) -> ArbiterDecision: + """Apply hysteresis filter to a requested angle change. + + Motor protection logic: + - Suppresses changes smaller than angle_tolerance_deg. + - Requires the requested angle to be stable for hysteresis_window_min + before dispatching. + - Immediate dispatch if this is the first command or if the change + is large (e.g., weather stow). + """ + # Record request in buffer + self._buffer.append((timestamp, requested_angle)) + + # Trim buffer to window + cutoff = timestamp - pd.Timedelta(minutes=self.window_min) + self._buffer = [(t, a) for t, a in self._buffer if t >= cutoff] + + # Change smaller than tolerance → suppress + angle_diff = abs(requested_angle - self.current_angle) + if angle_diff <= self.tolerance: + return ArbiterDecision( + angle=self.current_angle, + dispatch=False, + source=CommandSource.HYSTERESIS, + requested_angle=requested_angle, + suppressed_reason=f"change {angle_diff:.1f}° ≤ tolerance {self.tolerance}°", + ) + + # First command or only one entry in buffer → dispatch immediately + if len(self._buffer) < 2 or self._last_dispatch_time is None: + self.current_angle = requested_angle + self._last_dispatch_time = timestamp + return ArbiterDecision( + angle=requested_angle, + dispatch=True, + source=CommandSource.INITIAL, + requested_angle=requested_angle, + ) + + # Check stability: all recent entries must agree within tolerance + stable = all( + abs(a - requested_angle) <= self.tolerance + for _, a in self._buffer + ) + + if stable: + self.current_angle = requested_angle + self._last_dispatch_time = timestamp + return ArbiterDecision( + angle=requested_angle, + dispatch=True, + source=CommandSource.STABLE, + requested_angle=requested_angle, + ) + + return ArbiterDecision( + angle=self.current_angle, + dispatch=False, + source=CommandSource.HYSTERESIS, + requested_angle=requested_angle, + suppressed_reason="angle not stable within hysteresis window", + ) + + # ------------------------------------------------------------------ + # Combined: select + filter + # ------------------------------------------------------------------ + + def arbitrate( + self, + timestamp: datetime, + engine_result: dict, + theta_astro: float, + safety_valid: bool = True, + sim_time_sec: float = 0.0, + weather_override: Optional[dict] = None, + harvest_active: bool = False, + ) -> ArbiterDecision: + """Full arbitration: priority selection → hysteresis filter. + + This is the main entry point for the 15-min control loop. + """ + selected = self.select_source( + engine_result=engine_result, + safety_valid=safety_valid, + sim_time_sec=sim_time_sec, + weather_override=weather_override, + harvest_active=harvest_active, + theta_astro=theta_astro, + ) + + # Weather and harvest overrides bypass hysteresis (safety-critical) + if selected["source"] in {CommandSource.WEATHER, CommandSource.HARVEST}: + self.current_angle = selected["angle"] + self._last_dispatch_time = timestamp + self._buffer.clear() + return ArbiterDecision( + angle=selected["angle"], + dispatch=True, + source=selected["source"], + requested_angle=selected["angle"], + ) + + # Normal path: apply hysteresis + decision = self.should_move(selected["angle"], timestamp) + # Override source with the priority level that selected the angle + if decision.dispatch: + decision.source = selected["source"] + return decision + + # ------------------------------------------------------------------ + # Wind stow helper (delegates to operational_modes) + # ------------------------------------------------------------------ + + @staticmethod + def check_wind_stow( + wind_speed_ms: float, + stow_threshold: float = WIND_STOW_SPEED_MS, + ) -> Optional[dict]: + """Return a weather override dict if wind speed exceeds stow threshold. + + Note: ControlLoop uses OperationalModeChecker instead of this method. + Kept for backward compatibility with direct arbiter usage. + """ + from src.operational_modes import check_wind_stow as _check + result = _check(wind_speed_ms, stow_threshold) + return result.to_weather_override() + + +class AstronomicalTracker: + """Pure sun-following. The always-safe default. + + Wraps ShadowModel to provide a simple get_angle(timestamp) interface. + """ + + def __init__(self, shadow_model=None): + self._shadow_model = shadow_model + + @property + def shadow_model(self): + if self._shadow_model is None: + from src.shading.solar_geometry import ShadowModel + self._shadow_model = ShadowModel() + return self._shadow_model + + def get_angle(self, timestamp: datetime) -> float: + """Return the astronomical tracking angle for a given timestamp.""" + ts = pd.Timestamp(timestamp) + if ts.tzinfo is None: + ts = ts.tz_localize("UTC") + sp = self.shadow_model.get_solar_position( + pd.DatetimeIndex([ts]) + ) + elev = float(sp["solar_elevation"].iloc[0]) + if elev <= 0: + return 0.0 + azim = float(sp["solar_azimuth"].iloc[0]) + result = self.shadow_model.compute_tracker_tilt(azim, elev) + return float(result["tracker_theta"]) diff --git a/src/control_loop.py b/src/control_loop.py new file mode 100644 index 0000000000000000000000000000000000000000..b6afed021cc48a387914cc4e6843fb5ee02135cf --- /dev/null +++ b/src/control_loop.py @@ -0,0 +1,779 @@ +""" +ControlLoop: the 15-minute agrivoltaic control cycle. + +Each tick: + 1. Fetch live sensor data (IMS weather + TB vine sensors) + 2. Load/validate the day-ahead plan for today + 3. Look up the planned offset for the current slot + 4. Run live gate check (may override plan if conditions diverged) + 5. Check energy budget (block intervention if budget exhausted) + 6. Run CommandArbiter (priority stack + hysteresis) + 7. Resolve per-tracker fleet overrides (rare; default = all same angle) + 8. Dispatch angle to trackers via TrackerDispatcher + 9. Spend energy budget for the slot + 10. Check plan divergence and trigger re-plan if needed + 11. Log the result + +The loop can run as: + - **one-shot**: ``loop.tick()`` — execute one cycle (called externally) + - **continuous**: ``loop.run()`` — blocking loop with 15-min sleep + - **plan-only**: ``loop.tick(dry_run=True)`` — compute decisions without sending +""" + +from __future__ import annotations + +import json +import logging +import time +from dataclasses import dataclass, field +from datetime import date, datetime, timedelta, timezone +from pathlib import Path +from typing import Dict, List, Optional + +import pandas as pd + +from config.settings import ( + ANGLE_TOLERANCE_DEG, + DAILY_PLAN_PATH, + DP_SLOT_DURATION_MIN, + PLAN_DIVERGENCE_THRESHOLD_KWH, + PLAN_DIVERGENCE_THRESHOLD_SLOTS, + PLAN_REPLAN_COOLDOWN_SLOTS, + SIMULATION_LOG_PATH, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Tick result +# --------------------------------------------------------------------------- + +@dataclass +class TickResult: + """Output of a single control loop tick.""" + + timestamp: datetime + slot_index: int # 0–95 + stage_id: str = "unknown" + + # Plan lookup + plan_offset_deg: float = 0.0 # what the day-ahead plan says + plan_gate_passed: bool = False + + # Live override + live_gate_passed: bool = False + live_override: bool = False # True if live data diverged from plan + override_reason: Optional[str] = None + + # Arbiter decision + target_angle: float = 0.0 + dispatch: bool = False + source: str = "" + + # Dispatch result + trackers_verified: int = 0 + trackers_total: int = 0 + dispatch_error: Optional[str] = None + + # Energy cost + energy_cost_kwh: float = 0.0 # energy sacrificed by this slot's offset + + # Budget tracking + budget_spent_kwh: float = 0.0 # actual amount deducted from budget + budget_remaining_kwh: float = 0.0 # daily budget remaining after this slot + + # Model routing + model_route: str = "" # "fvcb" or "ml" — which model was selected + + # Fleet overrides (per-tracker angles, if any differ from default) + fleet_overrides: Optional[Dict[str, float]] = None + + # Plan divergence tracking + divergence_cumulative_kwh: float = 0.0 + divergence_consecutive: int = 0 + replan_triggered: bool = False + + # Sensor snapshot + air_temp_c: Optional[float] = None + ghi_w_m2: Optional[float] = None + wind_speed_ms: Optional[float] = None + + def to_dict(self) -> dict: + return {k: (v.isoformat() if isinstance(v, datetime) else v) + for k, v in self.__dict__.items()} + + +# --------------------------------------------------------------------------- +# ControlLoop +# --------------------------------------------------------------------------- + +class ControlLoop: + """15-minute agrivoltaic control loop. + + Parameters + ---------- + dry_run : bool + If True, compute decisions but don't send commands to trackers. + plan_path : Path + Path to the day-ahead plan JSON file. + log_path : Path + Path for simulation log output. + """ + + def __init__( + self, + dry_run: bool = True, + plan_path: Path = DAILY_PLAN_PATH, + log_path: Path = SIMULATION_LOG_PATH, + ): + self.dry_run = dry_run + self.plan_path = plan_path + self.log_path = log_path + + # Lazy-init components + self._arbiter = None + self._dispatcher = None + self._astro = None + self._hub = None + self._modes = None + self._fleet = None + self._schedulers: Dict[str, object] = {} + self._budget_planner = None + self._router = None + self._current_plan: Optional[dict] = None + self._tick_log: List[dict] = [] + + # Daily budget state (reset each day) + self._daily_budget_plan: Optional[dict] = None + self._daily_budget_date: Optional[date] = None + + # Divergence tracking (reset on re-plan or new day) + self._divergence_cumulative_kwh: float = 0.0 + self._divergence_consecutive: int = 0 + self._last_replan_slot: int = -99 + self._replan_count: int = 0 + + # ------------------------------------------------------------------ + # Lazy component init + # ------------------------------------------------------------------ + + @property + def arbiter(self): + if self._arbiter is None: + from src.command_arbiter import CommandArbiter + self._arbiter = CommandArbiter() + return self._arbiter + + @property + def dispatcher(self): + if self._dispatcher is None: + from src.tracker_dispatcher import TrackerDispatcher + self._dispatcher = TrackerDispatcher(dry_run=self.dry_run) + return self._dispatcher + + @property + def astro(self): + if self._astro is None: + from src.command_arbiter import AstronomicalTracker + self._astro = AstronomicalTracker() + return self._astro + + @property + def hub(self): + if self._hub is None: + from src.data.data_providers import DataHub + self._hub = DataHub.default() + return self._hub + + @property + def modes(self): + if self._modes is None: + from src.operational_modes import OperationalModeChecker + self._modes = OperationalModeChecker() + return self._modes + + @property + def fleet(self): + if self._fleet is None: + from src.tracker_fleet import TrackerFleet + self._fleet = TrackerFleet() + return self._fleet + + @property + def budget_planner(self): + if self._budget_planner is None: + from src.energy_budget import EnergyBudgetPlanner + self._budget_planner = EnergyBudgetPlanner() + return self._budget_planner + + @property + def router(self): + if self._router is None: + from src.chatbot.routing_agent import RoutingAgent + self._router = RoutingAgent() + return self._router + + # ------------------------------------------------------------------ + # Plan loading + # ------------------------------------------------------------------ + + def _build_persistence_forecast(self) -> tuple[list[float], list[float]]: + """Build 96-slot temp/GHI forecast from last available IMS day.""" + ims_df = self.hub.weather.get_dataframe() + if ims_df.empty: + return [25.0] * 96, [0.0] * 96 + + df = ims_df.copy() + if "timestamp_utc" in df.columns: + df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True) + df = df.set_index("timestamp_utc") + + last_day = df.index.max().normalize() + day_data = df[df.index.normalize() == last_day] + if len(day_data) < 10: + last_day -= pd.Timedelta(days=1) + day_data = df[df.index.normalize() == last_day] + + temps = [25.0] * 96 + ghis = [0.0] * 96 + for _, row in day_data.iterrows(): + slot = row.name.hour * 4 + row.name.minute // 15 + if 0 <= slot < 96: + t = row.get("air_temperature_c") + if pd.notna(t): + temps[slot] = float(t) + g = row.get("ghi_w_m2") + if pd.notna(g): + ghis[slot] = float(g) + return temps, ghis + + def _compute_daily_budget(self, target: date) -> float: + """Compute the daily energy budget from the annual/monthly hierarchy.""" + annual = self.budget_planner.compute_annual_plan(target.year) + month_budget = annual["monthly_budgets"].get(target.month, 0.5) + weekly = self.budget_planner.compute_weekly_plan(target, month_budget) + dow = target.weekday() + return weekly["daily_budgets_kWh"][min(dow, 6)] + + def load_plan(self, target_date: Optional[date] = None) -> Optional[dict]: + """Load the day-ahead plan for the given date.""" + target = target_date or date.today() + + # Try loading from file + if self.plan_path.exists(): + try: + with open(self.plan_path) as f: + plan = json.load(f) + if plan.get("target_date") == str(target): + self._current_plan = plan + logger.info("Loaded plan for %s (%d slots)", + target, len(plan.get("slots", []))) + return plan + except Exception as exc: + logger.warning("Failed to load plan from %s: %s", self.plan_path, exc) + + # No plan file or wrong date — compute on the fly + try: + from src.day_ahead_planner import DayAheadPlanner + + temps, ghis = self._build_persistence_forecast() + daily_budget = self._compute_daily_budget(target) + + planner = DayAheadPlanner() + plan_obj = planner.plan_day(target, temps, ghis, max(daily_budget, 0.1)) + plan = plan_obj.to_dict() + + # Save for reuse + self.plan_path.parent.mkdir(parents=True, exist_ok=True) + with open(self.plan_path, "w") as f: + json.dump(plan, f, indent=2) + + self._current_plan = plan + return plan + + except Exception as exc: + logger.error("Plan generation failed: %s", exc) + return None + + def _get_slot_plan(self, slot_index: int) -> Optional[dict]: + """Look up the planned offset for a given slot.""" + if not self._current_plan: + return None + slots = self._current_plan.get("slots", []) + for s in slots: + t = s.get("time", "") + try: + h, m = map(int, t.split(":")) + s_idx = h * 4 + m // 15 + if s_idx == slot_index: + return s + except (ValueError, AttributeError): + continue + return None + + # ------------------------------------------------------------------ + # Energy budget + # ------------------------------------------------------------------ + + def _ensure_daily_budget(self, today: date) -> Optional[dict]: + """Load or reuse the daily slot-level budget plan.""" + if self._daily_budget_plan and self._daily_budget_date == today: + return self._daily_budget_plan + + # Try restoring from Redis (survives worker restarts) + try: + from src.data.redis_cache import get_redis + redis = get_redis() + if redis: + cached = redis.get_json("control:budget") + if cached and cached.get("date") == str(today): + self._daily_budget_plan = cached["plan"] + self._daily_budget_date = today + logger.info("Restored daily budget from Redis for %s", today) + return self._daily_budget_plan + except Exception: + pass + + try: + daily_budget = self._compute_daily_budget(today) + self._daily_budget_plan = self.budget_planner.compute_daily_plan( + today, daily_budget, + ) + self._daily_budget_date = today + + # Reset divergence tracking for new day + self._divergence_cumulative_kwh = 0.0 + self._divergence_consecutive = 0 + self._last_replan_slot = -99 + + # Persist to Redis + self._persist_budget(today) + + return self._daily_budget_plan + except Exception as exc: + logger.warning("Failed to compute daily budget: %s", exc) + return None + + def _persist_budget(self, today: date) -> None: + """Save daily budget state to Redis for cross-process access.""" + try: + from src.data.redis_cache import get_redis + import json as _json + redis = get_redis() + if redis and self._daily_budget_plan: + payload = { + "date": str(today), + "plan": _json.loads(_json.dumps(self._daily_budget_plan, default=str)), + } + redis.set_json("control:budget", payload, ttl=86400) + except Exception as exc: + logger.debug("Budget Redis persist failed: %s", exc) + + @staticmethod + def _slot_key(now: datetime) -> str: + """Format a datetime as a slot key like '10:15'.""" + return f"{now.hour:02d}:{(now.minute // 15) * 15:02d}" + + # ------------------------------------------------------------------ + # Fleet overrides (Task 1) + # ------------------------------------------------------------------ + + def _resolve_fleet_overrides( + self, now: datetime, theta_astro: float, + ) -> Dict[str, float]: + """Resolve per-tracker angle overrides from TrackerFleet assignments. + + Returns an empty dict in the common case (all trackers follow the + arbiter's angle). Only returns overrides for trackers that have + an explicit non-tracking assignment active right now. + """ + from src.tracker_fleet import tracker_id_to_name + from src.tracker_scheduler import TrackerScheduler, PLAN_LIBRARY + + overrides: Dict[str, float] = {} + try: + best = self.fleet.get_all_best_assignments(now) + except Exception as exc: + logger.debug("Fleet assignment lookup skipped: %s", exc) + return overrides + + for tracker_id, assignment in best.items(): + if assignment is None: + continue + + plan_id = assignment.plan_id + # Get or create scheduler for this plan + if plan_id not in self._schedulers: + if assignment.plan_file: + plan_path = Path(assignment.plan_file) + if plan_path.exists(): + self._schedulers[plan_id] = TrackerScheduler( + plan_file=plan_path, + ) + else: + logger.warning("Plan file not found: %s", plan_path) + continue + elif plan_id in PLAN_LIBRARY: + self._schedulers[plan_id] = TrackerScheduler( + plan_data=PLAN_LIBRARY[plan_id], + ) + else: + logger.debug("Unknown plan_id %r, skipping", plan_id) + continue + + sched = self._schedulers[plan_id] + event = sched.get_event(now) + if event is None: + continue + + mode = event.get("mode") + event_angle = event.get("angle") + + if mode == "tracking" or mode is None: + # Same as default astronomical tracking — no override needed + continue + elif mode == "antiTracking" and event_angle is not None: + overrides[tracker_id_to_name(tracker_id)] = theta_astro + event_angle + elif mode == "fixed_angle" and event_angle is not None: + overrides[tracker_id_to_name(tracker_id)] = event_angle + + return overrides + + # ------------------------------------------------------------------ + # Plan divergence (Task 3) + # ------------------------------------------------------------------ + + def _check_plan_divergence( + self, + slot_index: int, + planned_offset: float, + actual_offset: float, + planned_cost: float, + actual_cost: float, + ) -> bool: + """Track divergence between plan and execution. Return True if re-plan needed.""" + cost_diff = abs(planned_cost - actual_cost) + offset_diverged = abs(planned_offset - actual_offset) > ANGLE_TOLERANCE_DEG + + self._divergence_cumulative_kwh += cost_diff + + if offset_diverged: + self._divergence_consecutive += 1 + else: + self._divergence_consecutive = 0 + + # Check cooldown + if slot_index - self._last_replan_slot < PLAN_REPLAN_COOLDOWN_SLOTS: + return False + + if self._divergence_cumulative_kwh >= PLAN_DIVERGENCE_THRESHOLD_KWH: + logger.warning( + "Cumulative divergence %.3f kWh >= %.3f threshold; triggering re-plan", + self._divergence_cumulative_kwh, PLAN_DIVERGENCE_THRESHOLD_KWH, + ) + return True + + if self._divergence_consecutive >= PLAN_DIVERGENCE_THRESHOLD_SLOTS: + logger.warning( + "%d consecutive divergent slots >= %d threshold; triggering re-plan", + self._divergence_consecutive, PLAN_DIVERGENCE_THRESHOLD_SLOTS, + ) + return True + + return False + + def _trigger_replan(self, now: datetime, slot_index: int) -> bool: + """Re-generate the day-ahead plan from the current slot onward.""" + today = now.date() + daily_bp = self._ensure_daily_budget(today) + spent = daily_bp["cumulative_spent"] if daily_bp else 0.0 + remaining = (daily_bp["daily_total_kWh"] - spent) if daily_bp else 0.0 + + if remaining <= 0: + logger.info("Re-plan skipped: no budget remaining") + return False + + try: + from src.day_ahead_planner import DayAheadPlanner + + temps, ghis = self._build_persistence_forecast() + + planner = DayAheadPlanner() + plan_obj = planner.plan_day(today, temps, ghis, max(remaining, 0.01)) + plan = plan_obj.to_dict() + + # Save for reuse + self.plan_path.parent.mkdir(parents=True, exist_ok=True) + with open(self.plan_path, "w") as f: + json.dump(plan, f, indent=2) + + self._current_plan = plan + self._last_replan_slot = slot_index + self._divergence_cumulative_kwh = 0.0 + self._divergence_consecutive = 0 + self._replan_count += 1 + + n_slots = len(plan.get("slots", [])) + logger.info( + "Re-plan #%d at slot %d: %d slots, %.4f kWh remaining budget", + self._replan_count, slot_index, n_slots, remaining, + ) + return True + except Exception as exc: + logger.error("Re-plan failed: %s", exc) + return False + + # ------------------------------------------------------------------ + # Main tick + # ------------------------------------------------------------------ + + def tick(self, timestamp: Optional[datetime] = None) -> TickResult: + """Execute one control loop cycle. + + Parameters + ---------- + timestamp : datetime, optional + Override current time (for simulation/replay). + """ + now = timestamp or datetime.now(tz=timezone.utc) + slot_index = now.hour * 4 + now.minute // 15 + + result = TickResult(timestamp=now, slot_index=slot_index) + + # 1. Load plan if needed + today = now.date() if hasattr(now, 'date') else date.today() + if (not self._current_plan or + self._current_plan.get("target_date") != str(today)): + self.load_plan(today) + + # 2. Fetch live weather + try: + wx = self.hub.weather.get_current() + if "error" not in wx: + result.air_temp_c = wx.get("air_temperature_c") + result.ghi_w_m2 = wx.get("ghi_w_m2") + result.wind_speed_ms = wx.get("wind_speed_ms") + except Exception as exc: + logger.warning("Weather fetch failed: %s", exc) + + # 2b. Route model selection (FvCB vs ML) based on live conditions + try: + telemetry = { + "temp_c": result.air_temp_c, + "ghi_w_m2": result.ghi_w_m2, + "hour": now.hour, + } + result.model_route = self.router.route(telemetry) + except Exception as exc: + logger.debug("Model routing failed: %s", exc) + result.model_route = "fvcb" + + # 3. Get astronomical tracking angle + theta_astro = self.astro.get_angle(now) + + # 4. Look up plan for this slot + slot_plan = self._get_slot_plan(slot_index) + if slot_plan: + result.plan_offset_deg = slot_plan.get("offset_deg", 0.0) + result.plan_gate_passed = slot_plan.get("gate_passed", False) + result.energy_cost_kwh = slot_plan.get("energy_cost_kwh", 0.0) + result.stage_id = self._current_plan.get("stage_id", "unknown") + else: + logger.debug("No plan slot for index %d — defaulting to astronomical", slot_index) + + # 5. Live gate check — override plan if conditions diverged + # Intentionally simpler than DayAheadPlanner._check_gate(): + # the planner has forecast CWSI + FvCB shading_helps; the live gate + # only checks real-time temp and GHI as hard constraints. + planned_offset = result.plan_offset_deg + live_offset = planned_offset # default: follow the plan + + if result.air_temp_c is not None: + from config.settings import ( + NO_SHADE_BEFORE_HOUR, + SEMILLON_TRANSITION_TEMP_C, + SHADE_ELIGIBLE_GHI_ABOVE, + ) + + if planned_offset > 0: + blocked = False + reason = "" + + if now.hour < NO_SHADE_BEFORE_HOUR: + blocked, reason = True, "morning — no shading before 10:00" + elif result.air_temp_c < SEMILLON_TRANSITION_TEMP_C: + blocked, reason = True, f"temp {result.air_temp_c:.0f}°C < {SEMILLON_TRANSITION_TEMP_C:.0f}°C" + elif result.ghi_w_m2 is not None and result.ghi_w_m2 < SHADE_ELIGIBLE_GHI_ABOVE: + blocked, reason = True, f"GHI {result.ghi_w_m2:.0f} < {SHADE_ELIGIBLE_GHI_ABOVE:.0f}" + + if blocked: + live_offset = 0.0 + result.live_override = True + result.override_reason = reason + logger.info("Live override: plan offset %.0f° → 0° (%s)", + planned_offset, reason) + + result.live_gate_passed = live_offset > 0 + + # 5b. Budget guard — block intervention if daily budget exhausted + if live_offset > 0: + daily_bp = self._ensure_daily_budget(today) + if daily_bp: + sk = self._slot_key(now) + slot_remaining = daily_bp["slot_budgets"].get(sk, 0.0) + margin_remaining = daily_bp["daily_margin_remaining_kWh"] + if slot_remaining + margin_remaining <= 0: + live_offset = 0.0 + result.live_override = True + result.override_reason = "daily energy budget exhausted" + logger.info("Budget guard: forcing astronomical (budget depleted)") + + # 6. Build engine result for arbiter + target_angle = theta_astro + live_offset + engine_result = { + "angle": target_angle, + "action": f"plan_offset_{live_offset:.0f}deg", + } + + # Check operational modes (wind stow, heat shield, harvest) + mode_override = self.modes.check_all( + wind_speed_ms=result.wind_speed_ms, + air_temp_c=result.air_temp_c, + theta_astro=theta_astro, + current_date=today, + ) + weather_override = mode_override.to_weather_override() if mode_override else None + + # 7. Arbitrate + decision = self.arbiter.arbitrate( + timestamp=now, + engine_result=engine_result, + theta_astro=theta_astro, + weather_override=weather_override, + ) + + result.target_angle = decision.angle + result.dispatch = decision.dispatch + result.source = decision.source.value if hasattr(decision.source, 'value') else str(decision.source) + + # 7b. Resolve per-tracker fleet overrides (rare; most ticks return {}) + fleet_overrides = self._resolve_fleet_overrides(now, theta_astro) + if fleet_overrides: + result.fleet_overrides = fleet_overrides + logger.info("Fleet overrides active: %s", fleet_overrides) + + # 8. Dispatch to trackers + if decision.dispatch: + try: + dispatch_result = self.dispatcher.dispatch( + decision, angle_overrides=fleet_overrides or None, + ) + result.trackers_verified = dispatch_result.n_success + result.trackers_total = len(dispatch_result.trackers) + if not dispatch_result.all_verified: + failed = [t.device_name for t in dispatch_result.trackers if not t.verified] + result.dispatch_error = f"failed: {', '.join(failed)}" + except Exception as exc: + result.dispatch_error = str(exc) + logger.error("Dispatch failed: %s", exc) + + # 9. Spend energy budget + if result.energy_cost_kwh > 0: + daily_bp = self._ensure_daily_budget(today) + if daily_bp: + sk = self._slot_key(now) + result.budget_spent_kwh = self.budget_planner.spend_slot( + daily_bp, sk, result.energy_cost_kwh, + ) + result.budget_remaining_kwh = ( + sum(daily_bp["slot_budgets"].values()) + + daily_bp["daily_margin_remaining_kWh"] + ) + # Persist updated budget to Redis + self._persist_budget(today) + + if result.budget_spent_kwh < result.energy_cost_kwh: + logger.warning( + "Budget shortfall: requested %.4f kWh, spent %.4f kWh (slot %s)", + result.energy_cost_kwh, result.budget_spent_kwh, sk, + ) + + # 10. Check plan divergence and trigger re-plan if needed + if slot_plan: + actual_offset = live_offset if not result.live_override else 0.0 + needs_replan = self._check_plan_divergence( + slot_index=slot_index, + planned_offset=result.plan_offset_deg, + actual_offset=actual_offset, + planned_cost=slot_plan.get("energy_cost_kwh", 0.0), + actual_cost=result.energy_cost_kwh, + ) + result.divergence_cumulative_kwh = self._divergence_cumulative_kwh + result.divergence_consecutive = self._divergence_consecutive + if needs_replan: + result.replan_triggered = self._trigger_replan(now, slot_index) + + # 11. Log + self._tick_log.append(result.to_dict()) + logger.info( + "Tick %02d:%02d slot=%d angle=%.1f° offset=%.0f° dispatch=%s source=%s" + " budget_remaining=%.3f kWh%s", + now.hour, now.minute, slot_index, decision.angle, + live_offset, decision.dispatch, decision.source, + result.budget_remaining_kwh, + f" [OVERRIDE: {result.override_reason}]" if result.live_override else "", + ) + + return result + + # ------------------------------------------------------------------ + # Continuous run + # ------------------------------------------------------------------ + + def run(self, max_ticks: Optional[int] = None) -> None: + """Run the control loop continuously (blocking). + + Parameters + ---------- + max_ticks : int, optional + Stop after this many ticks (for testing). None = run forever. + """ + logger.info("Control loop starting (dry_run=%s)", self.dry_run) + tick_count = 0 + + while max_ticks is None or tick_count < max_ticks: + try: + result = self.tick() + tick_count += 1 + except Exception as exc: + logger.error("Tick failed: %s", exc) + + # Sleep until next 15-min boundary + now = datetime.now(tz=timezone.utc) + next_slot = now.replace( + minute=(now.minute // DP_SLOT_DURATION_MIN + 1) * DP_SLOT_DURATION_MIN % 60, + second=0, microsecond=0, + ) + if next_slot <= now: + next_slot += timedelta(hours=1) + sleep_sec = (next_slot - now).total_seconds() + logger.debug("Sleeping %.0f s until %s", sleep_sec, next_slot) + time.sleep(max(sleep_sec, 1.0)) + + # ------------------------------------------------------------------ + # Log access + # ------------------------------------------------------------------ + + def get_log(self) -> List[dict]: + """Return all tick results from this session.""" + return list(self._tick_log) + + def save_log(self, path: Optional[Path] = None) -> Path: + """Save tick log to JSON file.""" + out = path or self.log_path.with_suffix(".json") + out.parent.mkdir(parents=True, exist_ok=True) + with open(out, "w") as f: + json.dump(self._tick_log, f, indent=2, default=str) + logger.info("Saved %d tick results to %s", len(self._tick_log), out) + return out diff --git a/src/data/__init__.py b/src/data/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..70fbc2490c8b3b6d1cae12a6e2abf97a9aa09cd4 --- /dev/null +++ b/src/data/__init__.py @@ -0,0 +1 @@ +"""Data access: IMS, sensors, schema, ThingsBoard, data providers.""" diff --git a/src/data/data_providers.py b/src/data/data_providers.py new file mode 100644 index 0000000000000000000000000000000000000000..e3a40c9b3b7989923e65ba1edee8b1247b7e4bec --- /dev/null +++ b/src/data/data_providers.py @@ -0,0 +1,1180 @@ +""" +Data provider layer for the VineyardChatbot. + +Architecture +------------ +Each data domain gets a **Service** class that encapsulates: + - data fetching (IMS API, ThingsBoard API, model inference, ...) + - caching / TTL logic + - error handling (returns dict with "error" key on failure) + - serialisation to LLM-friendly dicts + +Services are registered on a lightweight **DataHub** which is injected +into the chatbot. The chatbot's tool methods become thin one-liners +that delegate to ``self.hub..()``. + + ┌────────────────────┐ + │ VineyardChatbot │ + │ (tool dispatch) │ + └────────┬───────────┘ + │ self.hub + ┌────────▼───────────┐ + │ DataHub │ + │ (service registry) │ + └────────┬───────────┘ + ┌──────────┬────────┼────────┬──────────┐ + ▼ ▼ ▼ ▼ ▼ + WeatherSvc VineSensorSvc PSSvc EnergySvc BiologySvc + │ │ │ │ │ + IMSClient TB Client Farquhar TB+Analytical rules dict + ML Pred + +Loose coupling guarantees: + - The chatbot never imports IMS / TB / Farquhar / ML directly. + - Each service can be unit-tested in isolation (pass a mock client). + - Adding a new data source = write a new Service + register it. + - Services own their TTL caches — the chatbot is stateless w.r.t. data. +""" + +from __future__ import annotations + +import time +import traceback +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from datetime import date, datetime, timedelta, timezone +from typing import Any, Dict, List, Optional + +import numpy as np +import pandas as pd + + +# ═══════════════════════════════════════════════════════════════════════ +# TTL Cache helper +# ═══════════════════════════════════════════════════════════════════════ + +@dataclass +class _CacheEntry: + value: Any + expires_at: float # monotonic clock + + +class TTLCache: + """TTL cache with optional Redis backend. + + When Redis is available (``UPSTASH_REDIS_URL`` set), values are stored + in Redis so multiple processes (API server, workers) share state. + Falls back to in-memory when Redis is unavailable — Streamlit keeps + working exactly as before. + """ + + def __init__(self, ttl_seconds: float = 300, redis_prefix: str = ""): + self.ttl = ttl_seconds + self._prefix = redis_prefix + self._store: Dict[str, _CacheEntry] = {} + # Lazy Redis lookup (avoid import-time side effects) + self._redis_checked = False + self._redis = None + + def _get_redis(self): + if not self._redis_checked: + self._redis_checked = True + try: + from src.data.redis_cache import get_redis + self._redis = get_redis() + except Exception: + self._redis = None + return self._redis + + def _rkey(self, key: str) -> str: + return f"{self._prefix}{key}" if self._prefix else key + + def get(self, key: str) -> Any | None: + # Try Redis first + redis = self._get_redis() + if redis: + val = redis.get_json(self._rkey(key)) + if val is not None: + return val + # Fall back to in-memory + entry = self._store.get(key) + if entry and time.monotonic() < entry.expires_at: + return entry.value + return None + + def set(self, key: str, value: Any) -> None: + # Write to Redis if available + redis = self._get_redis() + if redis: + redis.set_json(self._rkey(key), value, ttl=int(self.ttl)) + # Always write in-memory too (local fast path) + self._store[key] = _CacheEntry(value=value, expires_at=time.monotonic() + self.ttl) + + def invalidate(self, key: str) -> None: + redis = self._get_redis() + if redis: + redis.delete(self._rkey(key)) + self._store.pop(key, None) + + +# ═══════════════════════════════════════════════════════════════════════ +# LLM-friendly summarisation +# ═══════════════════════════════════════════════════════════════════════ + +def summarise_dataframe(df: pd.DataFrame, max_rows: int = 48) -> Dict[str, Any]: + """Compress a DataFrame to key stats when it exceeds *max_rows*. + + Returns a dict with ``rows`` (list of dicts) if small enough, or + ``summary`` (per-column min/max/mean/trend) if too large. + """ + if df.empty: + return {"rows": [], "note": "No data available."} + + if len(df) <= max_rows: + records = df.reset_index().to_dict(orient="records") + for r in records: + for k, v in list(r.items()): + if isinstance(v, (pd.Timestamp, datetime)): + r[k] = str(v) + elif isinstance(v, (float, np.floating)): + r[k] = round(float(v), 2) + return {"rows": records, "row_count": len(records)} + + # Summarise + summary: Dict[str, Any] = {"row_count": len(df), "summarised": True, "columns": {}} + numeric = df.select_dtypes(include=[np.number]) + for col in numeric.columns: + s = numeric[col].dropna() + if s.empty: + continue + summary["columns"][col] = { + "min": round(float(s.min()), 2), + "max": round(float(s.max()), 2), + "mean": round(float(s.mean()), 2), + "first": round(float(s.iloc[0]), 2), + "last": round(float(s.iloc[-1]), 2), + } + + # Time range + if isinstance(df.index, pd.DatetimeIndex): + summary["time_range"] = {"start": str(df.index.min()), "end": str(df.index.max())} + return summary + + +# ═══════════════════════════════════════════════════════════════════════ +# Service base class +# ═══════════════════════════════════════════════════════════════════════ + +class BaseService(ABC): + """Abstract base for all data-provider services. + + Subclasses must implement ``service_name`` (used as registry key). + All public methods should return plain dicts (JSON-serialisable) + so the chatbot can forward them to the LLM without conversion. + """ + + @property + @abstractmethod + def service_name(self) -> str: ... + + +# ═══════════════════════════════════════════════════════════════════════ +# 1. WeatherService (IMS station 43) +# ═══════════════════════════════════════════════════════════════════════ + +class WeatherService(BaseService): + """IMS weather data — cached CSV for history, latest row for 'now'.""" + + service_name = "weather" + + def __init__(self, ims_client: Any = None, cache_ttl: float = 1800): + self._ims = ims_client # lazy + self._df_cache = TTLCache(ttl_seconds=cache_ttl, redis_prefix="weather:") + + # -- lazy client -- + + def _client(self): + if self._ims is None: + from src.ims_client import IMSClient + self._ims = IMSClient() + return self._ims + + def _load_df(self) -> pd.DataFrame: + cached = self._df_cache.get("ims") + if cached is not None: + return cached + df = self._client().load_cached() + if not df.empty: + self._df_cache.set("ims", df) + return df + + def get_dataframe(self) -> pd.DataFrame: + """Public accessor for the cached IMS DataFrame.""" + return self._load_df() + + # -- public API -- + + def _now_israel(self) -> Dict[str, str]: + """Current time in Yeruham (Asia/Jerusalem) for context in API responses.""" + try: + from zoneinfo import ZoneInfo + tz = ZoneInfo("Asia/Jerusalem") + except ImportError: + tz = timezone(timedelta(hours=2)) + now = datetime.now(tz) + return { + "current_time_israel": now.strftime("%H:%M"), + "current_date_israel": now.strftime("%Y-%m-%d"), + "current_datetime_israel": now.isoformat(), + } + + def get_current(self) -> Dict[str, Any]: + """Latest IMS weather row with local time and staleness. Always includes current time (Yeruham) so callers can compare.""" + try: + df = self._load_df() + if df.empty: + return {"error": "No cached IMS data available.", **self._now_israel()} + last = df.iloc[-1] + + result: Dict[str, Any] = { + "timezone": "Asia/Jerusalem (Israel local, Yeruham/Sde Boker)", + **self._now_israel(), + } + try: + ts_utc = pd.to_datetime(last.get("timestamp_utc"), utc=True) + ts_local = ts_utc.tz_convert("Asia/Jerusalem") + now_utc = pd.Timestamp.now(tz="UTC") + result["timestamp_utc"] = ts_utc.isoformat() + result["timestamp_local"] = ts_local.isoformat() + result["age_minutes"] = round((now_utc - ts_utc).total_seconds() / 60, 1) + except Exception: + result["timestamp_utc"] = str(last.get("timestamp_utc", "unknown")) + + for col in df.columns: + if col != "timestamp_utc": + val = last[col] + if pd.notna(val): + result[col] = round(float(val), 2) if isinstance(val, (int, float, np.floating)) else str(val) + return result + except Exception as exc: + return {"error": f"Could not load weather data: {exc}"} + + def get_history(self, start_date: str, end_date: str) -> Dict[str, Any]: + """Hourly IMS summary for a date range (from cached CSV).""" + try: + df = self._load_df() + if df.empty: + return {"error": "No cached IMS data."} + if "timestamp_utc" in df.columns: + df = df.set_index(pd.to_datetime(df["timestamp_utc"], utc=True)) + start = pd.Timestamp(start_date, tz="UTC") + end = pd.Timestamp(end_date, tz="UTC") + pd.Timedelta(days=1) + subset = df.loc[start:end] + if subset.empty: + return {"error": f"No data in range {start_date} to {end_date}."} + hourly = subset.resample("1h").mean(numeric_only=True) + return summarise_dataframe(hourly) + except Exception as exc: + return {"error": f"Weather history failed: {exc}"} + + +# ═══════════════════════════════════════════════════════════════════════ +# 2. VineSensorService (ThingsBoard) +# ═══════════════════════════════════════════════════════════════════════ + +class VineSensorService(BaseService): + """On-site vine sensors via ThingsBoard — snapshot + time-series.""" + + service_name = "vine_sensors" + + def __init__(self, tb_client: Any = None, snapshot_ttl: float = 300): + self._tb = tb_client # lazy + self._snap_cache = TTLCache(ttl_seconds=snapshot_ttl, redis_prefix="vine:") + + def _client(self): + if self._tb is None: + from src.thingsboard_client import ThingsBoardClient + self._tb = ThingsBoardClient() + return self._tb + + # -- public API -- + + def get_snapshot(self, light: bool = False, + mode: Optional[str] = None) -> Dict[str, Any]: + """Latest vine state (treatment vs reference), 5-min TTL. + + Parameters + ---------- + light : bool + If True, fetch only ~6 key devices instead of all 21. + mode : str, optional + "dashboard" = 4 devices only (air + soil + irrigation). + """ + cache_key = mode or ("snap_light" if light else "snap") + cached = self._snap_cache.get(cache_key) + if cached is not None: + return cached + try: + snapshot = self._client().get_vine_snapshot(light=light, mode=mode) + result = snapshot.to_dict() + self._snap_cache.set(cache_key, result) + return result + except Exception as exc: + return { + "error": f"ThingsBoard unavailable: {exc}", + "hint": "Check THINGSBOARD_USERNAME/PASSWORD in .env", + } + + def get_history( + self, + device_type: str = "crop", + area: str = "treatment", + hours_back: int = 24, + ) -> Dict[str, Any]: + """Hourly averages for a device group over the last N hours.""" + from src.thingsboard_client import ( + AIR_KEYS, CROP_KEYS, SOIL_KEYS, DEVICE_REGISTRY, VineArea, + ) + + key_map = {"air": AIR_KEYS, "crop": CROP_KEYS, "soil": SOIL_KEYS} + keys = key_map.get(device_type.lower()) + if keys is None: + return {"error": f"Unknown device_type '{device_type}'. Use air/crop/soil."} + + area_enum = { + "treatment": VineArea.TREATMENT, + "reference": VineArea.REFERENCE, + "ambient": VineArea.AMBIENT, + }.get(area.lower()) + if area_enum is None: + return {"error": f"Unknown area '{area}'. Use treatment/reference/ambient."} + + # Select matching devices + devices = [ + name for name, info in DEVICE_REGISTRY.items() + if info.area == area_enum and name.lower().startswith(device_type.lower()) + ] + if not devices: + return {"error": f"No {device_type} devices in {area} area."} + + end = datetime.now(tz=timezone.utc) + start = end - timedelta(hours=hours_back) + + try: + frames = [] + for dev in devices: + df = self._client().get_timeseries(dev, keys, start, end) + if not df.empty: + df = df.add_prefix(f"{dev}_") + frames.append(df) + if not frames: + return {"error": "No time-series data returned from ThingsBoard."} + merged = pd.concat(frames, axis=1).sort_index() + hourly = merged.resample("1h").mean(numeric_only=True) + return summarise_dataframe(hourly) + except Exception as exc: + return {"error": f"Sensor history failed: {exc}"} + + +# ═══════════════════════════════════════════════════════════════════════ +# 3. PhotosynthesisService (FvCB + ML + forecast) +# ═══════════════════════════════════════════════════════════════════════ + +class PhotosynthesisService(BaseService): + """Photosynthesis predictions — mechanistic, ML, and day-ahead.""" + + service_name = "photosynthesis" + + def __init__(self): + self._farquhar = None + self._ml_predictor = None + self._shadow = None + self._canopy = None + + # -- lazy loaders -- + + def _get_farquhar(self): + if self._farquhar is None: + from src.farquhar_model import FarquharModel + self._farquhar = FarquharModel() + return self._farquhar + + def _get_shadow(self): + if self._shadow is None: + from src.solar_geometry import ShadowModel + self._shadow = ShadowModel() + return self._shadow + + def _get_canopy(self): + if self._canopy is None: + from src.canopy_photosynthesis import CanopyPhotosynthesisModel + self._canopy = CanopyPhotosynthesisModel( + shadow_model=self._get_shadow(), + farquhar_model=self._get_farquhar(), + ) + return self._canopy + + # -- public API -- + + def predict_fvcb( + self, PAR: float, Tleaf: float, CO2: float, VPD: float, Tair: float, + ) -> Dict[str, Any]: + """Single-point Farquhar model prediction with limiting factor.""" + model = self._get_farquhar() + A = model.calc_photosynthesis(PAR=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair) + + Tk = Tleaf + 273.15 + Vcmax = model.calc_Vcmax(Tk) + Jmax = model.calc_Jmax(Tk) + gamma_star = model.calc_gamma_star(Tk) + Kc = model.calc_Kc(Tk) + Ko = model.calc_Ko(Tk) + ci = model._ci_from_ca(CO2, VPD) + J = model.calc_electron_transport(PAR, Jmax) + Ac = Vcmax * (ci - gamma_star) / (ci + Kc * (1.0 + 210.0 / Ko)) + Aj = J * (ci - gamma_star) / (4.0 * ci + 8.0 * gamma_star) + + limiting = ("Rubisco-limited (high temperature is the bottleneck)" + if Ac < Aj else + "RuBP-limited (light is the bottleneck)") + shading_helps = Tleaf > 30.0 + + return { + "A_net": round(A, 3), + "units": "umol CO2 m-2 s-1", + "limiting_factor": limiting, + "Tleaf": Tleaf, + "shading_would_help": shading_helps, + "model": "fvcb", + "note": ("Shading may help reduce heat stress" if shading_helps + else "Shading would reduce photosynthesis (vine needs light)"), + } + + def predict_ml(self, features: Optional[Dict[str, float]] = None) -> Dict[str, Any]: + """ML ensemble prediction. Auto-fills features from latest IMS if not provided. + + Trains the model once on first call (lazy), then caches it. + """ + try: + predictor, feature_cols, best_name = self._ensure_ml_predictor() + except Exception as exc: + return {"error": f"ML predictor unavailable: {exc}"} + + try: + if features: + row = {col: features.get(col, 0.0) for col in feature_cols} + else: + row = self._auto_fill_features(feature_cols) + if row is None: + return {"error": "No IMS data available to auto-fill features."} + + import pandas as _pd + X = _pd.DataFrame([row])[feature_cols] + model = predictor.models[best_name] + pred = float(model.predict(X)[0]) + metrics = predictor.results.get(best_name, {}) + + return { + "A_net_predicted": round(pred, 3), + "units": "umol CO2 m-2 s-1", + "model": best_name, + "model_mae": round(metrics.get("mae", 0), 3), + "model_r2": round(metrics.get("r2", 0), 3), + "features_used": {k: round(v, 2) for k, v in row.items()}, + "note": "Prediction from ML ensemble trained on IMS weather features.", + } + except Exception as exc: + return {"error": f"ML prediction failed: {exc}"} + + def _ensure_ml_predictor(self): + """Train the ML predictor once and cache it. Returns (predictor, feature_cols, best_name).""" + if self._ml_predictor is not None: + return self._ml_predictor + + from src.ims_client import IMSClient + from src.farquhar_model import FarquharModel + from src.preprocessor import Preprocessor + from src.predictor import PhotosynthesisPredictor + + ims = IMSClient() + ims_df = ims.load_cached() + if ims_df.empty: + raise RuntimeError("No IMS cache data — cannot train ML predictor.") + + # Compute Stage 1 labels (A) from sensor data + from src.sensor_data_loader import SensorDataLoader + loader = SensorDataLoader() + sensor_df = loader.load() + fvcb = FarquharModel() + labels = fvcb.compute_all(sensor_df) + labels.name = "A" + + # Ensure labels have a datetime index for merge + if "time" in sensor_df.columns: + ts = pd.to_datetime(sensor_df["time"], utc=True) + labels.index = ts + + # Preprocess: merge, time features, split + prep = Preprocessor() + merged = prep.merge_ims_with_labels(ims_df, labels) + if merged.empty: + raise RuntimeError("Merge of IMS + labels produced empty DataFrame.") + merged = prep.create_time_features(merged) + X_train, y_train, X_test, y_test = prep.temporal_split(merged) + if X_train.empty: + raise RuntimeError("Not enough data to train ML predictor.") + + predictor = PhotosynthesisPredictor() + predictor.train(X_train, y_train) + if not X_test.empty: + predictor.evaluate(X_test, y_test) + + best_name = "GradientBoosting" + if predictor.results: + best_name = min(predictor.results, key=lambda n: predictor.results[n].get("mae", 999)) + + feature_cols = list(X_train.columns) + self._ml_predictor = (predictor, feature_cols, best_name) + return self._ml_predictor + + def _auto_fill_features(self, feature_cols: List[str]) -> Optional[Dict[str, float]]: + """Fill feature vector from the latest IMS cache row + time features.""" + try: + from src.ims_client import IMSClient + from src.time_features import add_cyclical_time_features + ims = IMSClient() + df = ims.load_cached() + if df.empty: + return None + last_row_df = df.tail(1).copy() + last_row_df = add_cyclical_time_features(last_row_df, timestamp_col="timestamp_utc") + ts = pd.to_datetime(last_row_df["timestamp_utc"].iloc[0], utc=True) + last_row_df["month"] = ts.month + last_row_df["day_of_year"] = ts.day_of_year + row = {} + for col in feature_cols: + if col in last_row_df.columns: + val = last_row_df[col].iloc[0] + row[col] = float(val) if pd.notna(val) else 0.0 + else: + row[col] = 0.0 + return row + except Exception: + return None + + def forecast_day_ahead(self, target_date: Optional[str] = None) -> Dict[str, Any]: + """24h A profile using FvCB model over IMS weather data. + + For each daytime hour, computes A from IMS temperature/GHI/humidity + using typical vine conditions. Falls back to FvCB-based projection + when Chronos or ML forecast is unavailable. + """ + try: + from src.ims_client import IMSClient + ims = IMSClient() + df = ims.load_cached() + if df.empty: + return {"error": "No IMS data cached for PS forecast."} + + if "timestamp_utc" in df.columns: + df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True) + df = df.set_index("timestamp_utc") + + target = target_date or str(date.today()) + try: + day_start = pd.Timestamp(target, tz="UTC") + day_end = day_start + pd.Timedelta(days=1) + day_df = df.loc[day_start:day_end] + except Exception: + day_df = pd.DataFrame() + + # If target date not in cache, use last available day + if day_df.empty: + day_df = df.tail(96) # ~24h of 15-min data + if day_df.empty: + return {"error": "Not enough IMS data for forecast."} + target = str(day_df.index[-1].date()) + + hourly = day_df.resample("1h").mean(numeric_only=True) + model = self._get_farquhar() + + # Map IMS columns (try exact settings names first, then fuzzy match) + def _find_col(df_cols, exact_names, fuzzy_terms, exclude_terms=()): + for name in exact_names: + if name in df_cols: + return name + for c in df_cols: + cl = c.lower() + if any(t in cl for t in fuzzy_terms) and not any(t in cl for t in exclude_terms): + return c + return None + + temp_col = _find_col(hourly.columns, ["air_temperature_c"], ["temp"], ["dew", "soil"]) + ghi_col = _find_col(hourly.columns, ["ghi_w_m2"], ["ghi", "rad", "irrad"]) + rh_col = _find_col(hourly.columns, ["rh_percent"], ["rh", "humid"]) + + hourly_results = [] + for idx, row in hourly.iterrows(): + hour = idx.hour if hasattr(idx, "hour") else 0 + if hour < 6 or hour > 19: + continue + + Tair = float(row[temp_col]) if temp_col and pd.notna(row.get(temp_col)) else 25.0 + Tleaf = Tair + 2.0 # leaf typically ~2C above air + ghi = float(row[ghi_col]) if ghi_col and pd.notna(row.get(ghi_col)) else 0.0 + PAR = ghi * 2.0 # approximate PAR from GHI (umol/m2/s ~ 2x W/m2) + rh = float(row[rh_col]) if rh_col and pd.notna(row.get(rh_col)) else 40.0 + + # Estimate VPD from T and RH + es = 0.6108 * np.exp(17.27 * Tair / (Tair + 237.3)) + VPD = max(es * (1 - rh / 100), 0.1) + + if PAR < 50: + A = 0.0 + limiting = "dark" + else: + A = model.calc_photosynthesis(PAR=PAR, Tleaf=Tleaf, CO2=400.0, VPD=VPD, Tair=Tair) + limiting = "rubisco" if Tleaf > 30 else "rubp" + + hourly_results.append({ + "hour": hour, + "A_predicted": round(A, 2), + "Tair": round(Tair, 1), + "PAR": round(PAR, 0), + "VPD": round(VPD, 2), + "limiting": limiting, + "shading_helps": Tleaf > 30.0, + }) + + if not hourly_results: + return {"error": "No daytime hours available in forecast range."} + + peak = max(hourly_results, key=lambda r: r["A_predicted"]) + total_A = sum(r["A_predicted"] for r in hourly_results) + stress_hours = sum(1 for r in hourly_results if r["limiting"] == "rubisco") + + return { + "date": target, + "method": "fvcb_projection", + "hourly": hourly_results, + "peak_A": peak["A_predicted"], + "peak_hour": peak["hour"], + "daily_total_A": round(total_A, 1), + "rubisco_limited_hours": stress_hours, + "note": "FvCB-based projection from IMS weather data. " + "PAR estimated as 2x GHI. Leaf temp estimated as Tair+2C.", + } + except Exception as exc: + return {"error": f"PS forecast failed: {exc}"} + + def simulate_shading( + self, + angle_offset: float, + hour: int, + date_str: Optional[str] = None, + ) -> Dict[str, Any]: + """Compare A at astronomical tracking vs offset angle.""" + shadow = self._get_shadow() + canopy = self._get_canopy() + + dt_str = date_str or str(date.today()) + try: + dt = pd.Timestamp(f"{dt_str} {hour:02d}:00:00", tz="Asia/Jerusalem") + except Exception: + dt = pd.Timestamp(f"{date.today()} {hour:02d}:00:00", tz="Asia/Jerusalem") + + solar_pos = shadow.get_solar_position(pd.DatetimeIndex([dt])) + elev = float(solar_pos["solar_elevation"].iloc[0]) + azim = float(solar_pos["solar_azimuth"].iloc[0]) + + if elev <= 2.0: + return {"error": f"Sun below horizon at hour {hour} (elevation {elev:.1f}\u00b0)."} + + tracker = shadow.compute_tracker_tilt(azim, elev) + astro_tilt = tracker["tracker_theta"] + + PAR, Tleaf, CO2, VPD, Tair = 1800.0, 32.0, 400.0, 2.5, 33.0 + + mask_un = shadow.project_shadow(elev, azim, astro_tilt) + res_un = canopy.compute_vine_A( + par=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair, + shadow_mask=mask_un, solar_elevation=elev, + solar_azimuth=azim, tracker_tilt=astro_tilt, + ) + + shaded_tilt = astro_tilt + angle_offset + mask_sh = shadow.project_shadow(elev, azim, shaded_tilt) + res_sh = canopy.compute_vine_A( + par=PAR, Tleaf=Tleaf, CO2=CO2, VPD=VPD, Tair=Tair, + shadow_mask=mask_sh, solar_elevation=elev, + solar_azimuth=azim, tracker_tilt=shaded_tilt, + ) + + A_un = res_un["A_vine"] + A_sh = res_sh["A_vine"] + change = ((A_sh - A_un) / A_un * 100) if A_un > 0 else 0 + + return { + "hour": hour, "date": dt_str, "angle_offset": angle_offset, + "solar_elevation": round(elev, 1), + "A_unshaded": round(A_un, 3), "A_shaded": round(A_sh, 3), + "A_change_pct": round(change, 1), + "sunlit_fraction_unshaded": round(res_un["sunlit_fraction"], 3), + "sunlit_fraction_shaded": round(res_sh["sunlit_fraction"], 3), + "tracker_tilt_astronomical": round(astro_tilt, 1), + "tracker_tilt_shaded": round(shaded_tilt, 1), + } + + def compare_angles(self, angles: Optional[List[int]] = None) -> Dict[str, Any]: + """Compare A and energy across tilt angle offsets.""" + try: + from src.tracker_optimizer import simulate_tilt_angles, load_sensor_data + df = load_sensor_data() + result_df = simulate_tilt_angles(df, angles=angles) + records = result_df.to_dict(orient="records") + for r in records: + for k, v in r.items(): + if isinstance(v, (float, np.floating)): + r[k] = round(float(v), 2) + return {"angles": records} + except Exception as exc: + return {"error": f"Angle comparison failed: {exc}"} + + def daily_schedule( + self, stress_threshold: float = 2.0, shade_angle: int = 20, + ) -> Dict[str, Any]: + """Hourly shading schedule based on leaf-air temperature stress.""" + try: + from src.tracker_optimizer import compute_daily_schedule, load_sensor_data + df = load_sensor_data() + last_date = df["date"].max() + day_df = df[df["date"] == last_date].copy() + if day_df.empty: + return {"error": "No sensor data available for schedule."} + result_df = compute_daily_schedule( + day_df, stress_threshold=stress_threshold, shade_angle=shade_angle, + ) + records = result_df.to_dict(orient="records") + for r in records: + for k, v in list(r.items()): + if isinstance(v, (float, np.floating)): + r[k] = round(float(v), 2) + elif isinstance(v, (pd.Timestamp, datetime)): + r[k] = str(v) + return {"date": str(last_date), "schedule": records} + except Exception as exc: + return {"error": f"Schedule failed: {exc}"} + + def get_photosynthesis_3d_scene( + self, + hour: Optional[int] = None, + date_str: Optional[str] = None, + height_px: int = 480, + ) -> Dict[str, Any]: + """Build 3D scene data and HTML for vine, tracker, sun and photosynthesis. + + Returns dict with scene_3d (data), scene_3d_html (full HTML string), + A_vine, sunlit_fraction, and optional error. + """ + try: + from src.vine_3d_scene import build_scene_data, build_scene_html + except Exception as exc: + return {"error": f"3D scene module unavailable: {exc}"} + + try: + from datetime import datetime + h = hour if hour is not None else datetime.now().hour + scene_data = build_scene_data(hour=h, date_str=date_str) + html = build_scene_html(scene_data, height_px=height_px) + return { + "scene_3d": scene_data, + "scene_3d_html": html, + "A_vine": scene_data["A_vine"], + "sunlit_fraction": scene_data["sunlit_fraction"], + "hour": scene_data["hour"], + "date": scene_data["date"], + } + except Exception as exc: + return {"error": f"3D scene build failed: {exc}"} + + +# ═══════════════════════════════════════════════════════════════════════ +# 4. EnergyService (TB generation + analytical prediction) +# ═══════════════════════════════════════════════════════════════════════ + +class EnergyService(BaseService): + """Energy generation data from ThingsBoard Plant asset. + + The 'Yeruham Vineyard' asset (type=Plant) provides: + - ``power``: instantaneous power in W + - ``production``: energy produced per 5-min interval in Wh + + Daily kWh = sum(production) / 1000 over the day. + """ + + service_name = "energy" + + def __init__(self, tb_client: Any = None): + self._tb = tb_client + + def _client(self): + if self._tb is None: + from src.data.thingsboard_client import ThingsBoardClient + self._tb = ThingsBoardClient() + return self._tb + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def get_current(self) -> Dict[str, Any]: + """Latest power reading from the Plant asset.""" + try: + vals = self._client().get_asset_latest("Plant", ["power", "production"]) + power_w = vals.get("power") + return { + "power_kw": round(power_w / 1000, 1) if power_w else None, + "source": "ThingsBoard Plant asset", + } + except Exception as exc: + return {"error": f"Energy current failed: {exc}"} + + def get_daily_production(self, target_date: Optional[str] = None) -> Dict[str, Any]: + """Accumulated energy production for a single day (real TB data). + + Returns dict with daily_kwh, peak_hour, hourly_profile. + """ + try: + target = target_date or str(date.today()) + day_start = pd.Timestamp(target, tz="UTC") + day_end = day_start + pd.Timedelta(days=1) + + df = self._client().get_asset_timeseries( + "Plant", ["production"], + start=day_start.to_pydatetime(), + end=day_end.to_pydatetime(), + limit=500, + interval_ms=3_600_000, # 1 hour + agg="SUM", + ) + if df.empty or "production" not in df.columns: + return {"date": target, "daily_kwh": None, "error": "No production data"} + + # production is in Wh per interval; hourly SUM = Wh per hour + df["kwh"] = df["production"].fillna(0) / 1000 + total_kwh = df["kwh"].sum() + + # Convert UTC → Israel local time for display + try: + import zoneinfo + tz_il = zoneinfo.ZoneInfo("Asia/Jerusalem") + except Exception: + tz_il = None + + hourly_profile = [] + peak_hour = 12 + peak_kwh = 0.0 + for ts, row in df.iterrows(): + local_ts = ts.astimezone(tz_il) if tz_il else ts + h = local_ts.hour if hasattr(local_ts, "hour") else 0 + kwh = row["kwh"] + hourly_profile.append({"hour": h, "energy_kwh": round(kwh, 2)}) + if kwh > peak_kwh: + peak_kwh = kwh + peak_hour = h + + return { + "date": target, + "daily_kwh": round(total_kwh, 1), + "peak_hour": peak_hour, + "peak_hour_kwh": round(peak_kwh, 2), + "hourly_profile": hourly_profile, + "source": "ThingsBoard Plant asset", + } + except Exception as exc: + return {"date": target_date, "daily_kwh": None, "error": f"Energy fetch failed: {exc}"} + + def get_history(self, hours_back: int = 24) -> Dict[str, Any]: + """Hourly power time-series from TB Plant asset.""" + try: + end = datetime.now(tz=timezone.utc) + start = end - timedelta(hours=hours_back) + df = self._client().get_asset_timeseries( + "Plant", ["power", "production"], + start=start, end=end, + limit=500, + interval_ms=3_600_000, + agg="AVG", + ) + if df.empty: + return {"error": f"No energy data in last {hours_back} hours."} + df["power_kw"] = df["power"].fillna(0) / 1000 + return summarise_dataframe(df[["power_kw"]]) + except Exception as exc: + return {"error": f"Energy history failed: {exc}"} + + def predict(self, target_date: Optional[str] = None, + *, ims_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]: + """For future dates: analytical estimate. For past/today: real TB data.""" + target = target_date or str(date.today()) + target_d = date.fromisoformat(target) + today = date.today() + + # Past or today → use real TB data + if target_d <= today: + return self.get_daily_production(target) + + # Future → analytical estimate from IMS GHI + return self._predict_analytical(target, ims_df=ims_df) + + def _predict_analytical(self, target_date: str, + *, ims_df: Optional[pd.DataFrame] = None) -> Dict[str, Any]: + """Energy estimate for future dates. + + Strategy (in priority order): + 1. ML predictor (XGBoost) with ThingsBoard Air1 weather persistence + 2. ML predictor with IMS weather persistence + 3. Analytical fallback (GHI × system capacity) + """ + # --- Try ML predictor with on-site weather first --- + try: + result = self._predict_ml(target_date) + if result and result.get("daily_kwh") is not None: + return result + except Exception: + pass # fall through to IMS / analytical + + # --- Fallback: analytical from IMS GHI --- + try: + if ims_df is not None: + df = ims_df + else: + from src.ims_client import IMSClient + df = IMSClient().load_cached() + if df.empty: + return {"date": target_date, "daily_kwh": None, "error": "No weather data"} + + if "timestamp_utc" in df.columns: + df = df.copy() + df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True) + df = df.set_index("timestamp_utc") + + # Try ML predictor with IMS data + try: + from src.energy_predictor import EnergyPredictor + ep = EnergyPredictor() + return ep.predict_day_from_weather_df(target_date, df.tail(96)) + except Exception: + pass # fall through to raw analytical + + # Raw analytical: GHI × capacity / STC + day_df = df.tail(96).copy() + if day_df.empty: + return {"date": target_date, "daily_kwh": None, "error": "Not enough IMS data"} + + ghi_col = next( + (c for c in day_df.columns if "ghi" in c.lower() or "rad" in c.lower()), None) + if ghi_col is None: + return {"date": target_date, "daily_kwh": None, "error": "No GHI column"} + + from config.settings import SYSTEM_CAPACITY_KW, STC_IRRADIANCE_W_M2 + system_kw = SYSTEM_CAPACITY_KW + stc_ghi = STC_IRRADIANCE_W_M2 + slot_hours = 0.25 + + total_kwh = 0.0 + hourly_kwh: Dict[int, float] = {} + for idx, row in day_df.iterrows(): + ghi = float(row[ghi_col]) if pd.notna(row.get(ghi_col)) else 0.0 + if ghi <= 0: + continue + energy = system_kw * (ghi / stc_ghi) * slot_hours + total_kwh += energy + h = idx.hour if hasattr(idx, "hour") else 0 + hourly_kwh[h] = hourly_kwh.get(h, 0) + energy + + peak_hour = max(hourly_kwh, key=hourly_kwh.get) if hourly_kwh else 12 + hourly_profile = [ + {"hour": h, "energy_kwh": round(e, 2)} + for h, e in sorted(hourly_kwh.items()) + ] + return { + "date": target_date, + "daily_kwh": round(total_kwh, 1), + "peak_hour": peak_hour, + "peak_hour_kwh": round(hourly_kwh.get(peak_hour, 0), 2), + "hourly_profile": hourly_profile, + "source": f"Analytical estimate (persistence forecast × {system_kw:.0f} kW system)", + } + except Exception as exc: + return {"date": target_date, "daily_kwh": None, "error": f"Prediction failed: {exc}"} + + def _predict_ml(self, target_date: str) -> Optional[Dict[str, Any]]: + """ML energy prediction using latest ThingsBoard Air1 weather as persistence forecast.""" + from src.energy_predictor import EnergyPredictor + + ep = EnergyPredictor() + + # Fetch last 24h of on-site weather (Air1) as persistence forecast + end = datetime.now(tz=timezone.utc) + start = end - timedelta(hours=24) + + df = self._client().get_timeseries( + "Air1", + keys=["GSR", "airTemperature", "windSpeed"], + start=start, end=end, + limit=500, + interval_ms=3_600_000, + agg="AVG", + ) + if df.empty or len(df) < 8: + return None + + return ep.predict_day_from_weather_df(target_date, df) + + +# ═══════════════════════════════════════════════════════════════════════ +# 5. AdvisoryService (Gemini day-ahead advisor) +# ═══════════════════════════════════════════════════════════════════════ + +class AdvisoryService(BaseService): + """Gemini-powered day-ahead stress advisory.""" + + service_name = "advisory" + + def __init__(self, vine_sensor_svc: Optional[VineSensorService] = None, verbose: bool = False): + self._vine_svc = vine_sensor_svc + self._verbose = verbose + + def run_advisory(self, target_date: Optional[str] = None) -> Dict[str, Any]: + """Full DayAheadAdvisor report, enriched with vine snapshot if available.""" + try: + from src.day_ahead_advisor import DayAheadAdvisor + from src.ims_client import IMSClient + + advisor = DayAheadAdvisor(verbose=self._verbose) + weather_df = IMSClient().load_cached() + if weather_df.empty: + return {"error": "No IMS weather data cached. Cannot run advisory."} + + vine_snapshot = None + if self._vine_svc: + snap_dict = self._vine_svc.get_snapshot() + if "error" not in snap_dict: + # Reconstruct a VineSnapshot-like object for to_advisor_text() + try: + from src.thingsboard_client import ThingsBoardClient + tb = self._vine_svc._client() + vine_snapshot = tb.get_vine_snapshot() + except Exception: + pass + + report = advisor.advise( + date=target_date or str(date.today()), + weather_forecast=weather_df, + phenological_stage="vegetative", + vine_snapshot=vine_snapshot, + ) + return DayAheadAdvisor.report_to_dict(report) + except Exception as exc: + return {"error": f"Advisory failed: {exc}"} + + +# ═══════════════════════════════════════════════════════════════════════ +# 6. BiologyService (rule lookup — no external deps) +# ═══════════════════════════════════════════════════════════════════════ + +class BiologyService(BaseService): + """Biology rules lookup — pure in-memory, no API calls.""" + + service_name = "biology" + + def __init__(self, rules: Optional[Dict[str, str]] = None): + if rules is None: + from src.vineyard_chatbot import BIOLOGY_RULES + rules = BIOLOGY_RULES + self._rules = rules + + def explain_rule(self, rule_name: str) -> Dict[str, Any]: + key = rule_name.lower().strip() + if key in self._rules: + return {"rule": key, "explanation": self._rules[key]} + return {"error": f"Unknown rule '{key}'", "available_rules": list(self._rules.keys())} + + def list_rules(self) -> Dict[str, Any]: + return {"rules": list(self._rules.keys())} + + +# ═══════════════════════════════════════════════════════════════════════ +# DataHub (service registry) +# ═══════════════════════════════════════════════════════════════════════ + +class DataHub: + """Lightweight registry of data-provider services. + + Usage + ----- + hub = DataHub.default() + hub.weather.get_current() + hub.vine_sensors.get_snapshot() + hub.photosynthesis.predict_fvcb(PAR=1500, ...) + hub.energy.get_current() + + The chatbot receives a hub at init and delegates all data access + through it — never importing data clients directly. + """ + + def __init__(self) -> None: + self._services: Dict[str, BaseService] = {} + + # -- registration -- + + def register(self, service: BaseService) -> None: + self._services[service.service_name] = service + + def get(self, name: str) -> BaseService: + if name not in self._services: + raise KeyError(f"No service registered as '{name}'. " + f"Available: {list(self._services)}") + return self._services[name] + + # -- typed accessors (convenience, avoids casts everywhere) -- + + @property + def weather(self) -> WeatherService: + return self._services["weather"] # type: ignore[return-value] + + @property + def vine_sensors(self) -> VineSensorService: + return self._services["vine_sensors"] # type: ignore[return-value] + + @property + def photosynthesis(self) -> PhotosynthesisService: + return self._services["photosynthesis"] # type: ignore[return-value] + + @property + def energy(self) -> EnergyService: + return self._services["energy"] # type: ignore[return-value] + + @property + def advisory(self) -> AdvisoryService: + return self._services["advisory"] # type: ignore[return-value] + + @property + def biology(self) -> BiologyService: + return self._services["biology"] # type: ignore[return-value] + + # -- factory -- + + @classmethod + def default(cls, verbose: bool = False) -> "DataHub": + """Create a hub with all default services (lazy clients).""" + hub = cls() + vine_svc = VineSensorService() + hub.register(WeatherService()) + hub.register(vine_svc) + hub.register(PhotosynthesisService()) + hub.register(EnergyService()) + hub.register(AdvisoryService(vine_sensor_svc=vine_svc, verbose=verbose)) + hub.register(BiologyService()) + return hub diff --git a/src/data/data_schema.py b/src/data/data_schema.py new file mode 100644 index 0000000000000000000000000000000000000000..136af38885d99d4662fbf30eaf1166f52657c718 --- /dev/null +++ b/src/data/data_schema.py @@ -0,0 +1,519 @@ +""" +SolarWine 2.0 — Data Schema +============================ +Canonical dataclasses for the four telemetry tables that flow through +the 15-minute control loop. + + SensorRaw — one-slot snapshot of all on-site + IMS inputs + BiologicalState — photosynthesis model outputs + phenological state + TrackerKinematics — tracker position, commands, operational mode + SimulationLog — complete audit record for one 15-min slot + +Storage +------- +CSV/Parquet backend via to_dict() / from_dict() helpers. Schema is forward- +compatible with a future TimescaleDB migration (all timestamps are UTC, +numeric fields are SI units). + +Unit conventions +---------------- +Temperatures : °C +PAR : µmol m⁻² s⁻¹ +DLI : mol m⁻² day⁻¹ +Irradiance (GHI) : W m⁻² +VPD : kPa +CO₂ : ppm +Angles : degrees (tilt: + = east-facing, 0 = horizontal, - = west-facing) +Energy : kWh +Soil moisture : % +Wind speed : m s⁻¹ +""" + +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +from datetime import datetime, timezone +from typing import Any, Dict, List, Optional + +from src.utils import cwsi_from_delta_t + + +# --------------------------------------------------------------------------- +# SensorRaw — single 15-min slot of all sensor inputs +# --------------------------------------------------------------------------- + +@dataclass +class SensorRaw: + """ + Canonical sensor snapshot for one 15-min control slot. + + Populated from ThingsBoard (TB) via VineSnapshot for real-time control, + or from CSV/Parquet for historical replay. IMS fields are always from + the IMS station 43 (Sde Boker) cache. + """ + + ts: datetime # UTC timestamp of the slot start + + # --- TB microclimate (treatment area Air2/3/4 average) --- + air_temp_c: Optional[float] = None + leaf_temp_c: Optional[float] = None + vpd_kpa: Optional[float] = None + co2_ppm: Optional[float] = None + air_leaf_delta_t: Optional[float] = None # proxy for CWSI + humidity_pct: Optional[float] = None + dew_temp_c: Optional[float] = None + + # --- PAR / irradiance --- + par_umol: Optional[float] = None # above-canopy ambient PAR (Air devices) + fruiting_zone_par_umol: Optional[float] = None # mid-canopy PAR (Crop3/5/6/7 avg) + ghi_w_m2: Optional[float] = None # IMS global horizontal irradiance + + # --- Daily light / spectral indices --- + dli_mol_m2: Optional[float] = None # daily light integral so far + ndvi: Optional[float] = None + pri: Optional[float] = None + + # --- Wind & rain --- + wind_speed_ms: Optional[float] = None + wind_angle_deg: Optional[float] = None + rain_mm: Optional[float] = None + air_pressure_hpa: Optional[float] = None + + # --- TB soil (treatment area Soil1/3/5/6 average) --- + soil_moisture_pct: Optional[float] = None + soil_temp_c: Optional[float] = None + soil_ec_ds_m: Optional[float] = None + soil_ph: Optional[float] = None + + # --- TB reference area (Crop1/2/4 avg, open sky) --- + reference_crop_par_umol: Optional[float] = None + reference_crop_leaf_temp_c: Optional[float] = None + reference_soil_moisture_pct: Optional[float] = None + + # --- Shading effectiveness --- + par_shading_ratio: Optional[float] = None # treatment / reference PAR (<1 = shaded) + + # --- Derived stress index --- + cwsi: Optional[float] = None # explicit CWSI if available from TB + + # --- Data provenance --- + source: str = "unknown" # "thingsboard" | "ims" | "csv" | "mixed" + quality_flags: List[str] = field(default_factory=list) + # e.g. ["soil5_temp_outlier_excluded", "air3_stale"] + + # ------------------------------------------------------------------ + # Factory: build from a VineSnapshot + # ------------------------------------------------------------------ + + @classmethod + def from_vine_snapshot(cls, snapshot: Any) -> "SensorRaw": + """ + Construct SensorRaw from a ThingsBoardClient.VineSnapshot. + + The snapshot already contains treatment-vs-reference aggregations + and bounded averages; this method simply re-maps them to the + canonical SensorRaw field names. + """ + flags: List[str] = [] + if hasattr(snapshot, "staleness_minutes") and snapshot.staleness_minutes > 20: + flags.append(f"stale_{snapshot.staleness_minutes:.0f}min") + + # CWSI proxy from air-leaf temperature delta (see src.utils.cwsi_from_delta_t) + cwsi_proxy: Optional[float] = None + delta_t = getattr(snapshot, "treatment_air_leaf_delta_t", None) + if delta_t is not None: + cwsi_proxy = cwsi_from_delta_t(delta_t=delta_t) + + return cls( + ts=getattr(snapshot, "snapshot_ts", datetime.now(tz=timezone.utc)), + + # Microclimate + air_temp_c=getattr(snapshot, "treatment_air_temp_c", None), + leaf_temp_c=getattr(snapshot, "treatment_leaf_temp_c", None) + or getattr(snapshot, "treatment_crop_leaf_temp_c", None), + vpd_kpa=getattr(snapshot, "treatment_vpd_kpa", None), + co2_ppm=getattr(snapshot, "treatment_co2_ppm", None), + air_leaf_delta_t=delta_t, + humidity_pct=getattr(snapshot, "ambient_humidity_pct", None), + + # PAR + par_umol=getattr(snapshot, "treatment_par_umol", None), + fruiting_zone_par_umol=getattr(snapshot, "treatment_crop_par_umol", None), + dli_mol_m2=getattr(snapshot, "treatment_crop_dli_mol_m2", None), + ndvi=getattr(snapshot, "treatment_crop_ndvi", None), + pri=getattr(snapshot, "treatment_pri", None), + + # Wind / weather + wind_speed_ms=getattr(snapshot, "ambient_wind_speed_ms", None), + wind_angle_deg=getattr(snapshot, "ambient_wind_angle_deg", None), + rain_mm=getattr(snapshot, "ambient_rain_mm", None), + + # Soil + soil_moisture_pct=getattr(snapshot, "treatment_soil_moisture_pct", None), + soil_temp_c=getattr(snapshot, "treatment_soil_temp_c", None), + soil_ec_ds_m=getattr(snapshot, "treatment_soil_ec_ds_m", None), + soil_ph=getattr(snapshot, "treatment_soil_ph", None), + + # Reference + reference_crop_par_umol=getattr(snapshot, "reference_crop_par_umol", None), + reference_crop_leaf_temp_c=getattr(snapshot, "reference_crop_leaf_temp_c", None), + reference_soil_moisture_pct=getattr(snapshot, "reference_soil_moisture_pct", None), + + # Shading ratio + par_shading_ratio=getattr(snapshot, "par_shading_ratio", None), + + cwsi=cwsi_proxy, + source="thingsboard", + quality_flags=flags, + ) + + # ------------------------------------------------------------------ + # Serialization + # ------------------------------------------------------------------ + + def to_dict(self) -> Dict[str, Any]: + d = asdict(self) + d["ts"] = self.ts.isoformat() if self.ts else None + return d + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "SensorRaw": + d = d.copy() + if isinstance(d.get("ts"), str): + d["ts"] = datetime.fromisoformat(d["ts"]) + return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__}) + + +# --------------------------------------------------------------------------- +# BiologicalState — photosynthesis model outputs + phenology +# --------------------------------------------------------------------------- + +@dataclass +class BiologicalState: + """ + Computed vine physiological state for one control slot. + + Produced by the FarquharModel (or ML ensemble via RoutingAgent) and + the phenology tracker. Drives the InterventionGate and TradeoffEngine. + """ + + ts: datetime + + # --- Photosynthesis model outputs --- + a_net_umol: Optional[float] = None # net carbon assimilation (µmol CO₂ m⁻² s⁻¹) + limiting_state: Optional[str] = None # "rubp" | "rubisco" | "tpu" | "transition" + shading_helps: Optional[bool] = None # True only when Rubisco-limited AND heat is bottleneck + + # --- Model provenance --- + model_used: str = "unknown" # "fvcb" | "fvcb_semillon" | "ml" | "ml_ensemble" + model_confidence: Optional[float] = None # 0–1 (1 = high confidence in routing choice) + + # --- Raw inputs echoed for auditing --- + par_input: Optional[float] = None + tleaf_input: Optional[float] = None + vpd_input: Optional[float] = None + co2_input: Optional[float] = None + + # --- Phenological state --- + phenological_stage: str = "vegetative" # vegetative | flowering | veraison | harvest + gdd_cumulative: Optional[float] = None # growing degree days since budburst + crop_value_weight: float = 1.0 # seasonal multiplier (1.5× at veraison, 0.5× post-harvest) + + # --- Stress levels --- + heat_stress_level: str = "none" # none | low | moderate | high | extreme + water_stress_level: str = "none" + sunburn_risk: bool = False # True when Tleaf > BERRY_SUNBURN_TEMP_C + + # --- Fruiting-zone specific --- + fruiting_zone_a_net: Optional[float] = None # A at mid-canopy zone (zone index 1) + fruiting_zone_par: Optional[float] = None # PAR at mid-canopy + top_canopy_a_net: Optional[float] = None # A at top-canopy zone (zone index 2) + + # ------------------------------------------------------------------ + + def to_dict(self) -> Dict[str, Any]: + d = asdict(self) + d["ts"] = self.ts.isoformat() if self.ts else None + return d + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "BiologicalState": + d = d.copy() + if isinstance(d.get("ts"), str): + d["ts"] = datetime.fromisoformat(d["ts"]) + return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__}) + + +# --------------------------------------------------------------------------- +# TrackerKinematics — tracker position and operational mode +# --------------------------------------------------------------------------- + +@dataclass +class TrackerKinematics: + """ + Single-axis tracker state for one control slot. + + astronomical_tilt_deg is always the sun-following position (full-energy). + shade_offset_deg is the deliberate deviation for vine protection. + effective_tilt_deg = astronomical_tilt_deg + shade_offset_deg. + + Angle convention: 0° = horizontal, positive = tilted toward east, + negative = tilted toward west (consistent with pvlib single-axis sign convention). + """ + + ts: datetime + + # --- Astronomical tracking (default / full-energy position) --- + astronomical_tilt_deg: float = 0.0 + solar_azimuth_deg: Optional[float] = None + solar_elevation_deg: Optional[float] = None + + # --- Shading offset (deliberate protection deviation) --- + shade_offset_deg: float = 0.0 # 0 = no protection, positive values = shade intervention + effective_tilt_deg: float = 0.0 # astronomical + shade_offset + + # --- Previous slot (for hysteresis) --- + previous_tilt_deg: Optional[float] = None + tilt_change_deg: float = 0.0 # effective_tilt - previous_tilt + motion_triggered: bool = False # True if |change| > ANGLE_TOLERANCE_DEG + + # --- Operational mode --- + operational_mode: str = "tracking" # tracking | wind_stow | heat_shield | harvest_park + mode_override_reason: Optional[str] = None + + # --- Panel surface temperatures --- + panel_temp_treatment_c: Optional[float] = None # Thermocouples1 avg + panel_temp_reference_c: Optional[float] = None # Thermocouples2 avg + + # ------------------------------------------------------------------ + + def to_dict(self) -> Dict[str, Any]: + d = asdict(self) + d["ts"] = self.ts.isoformat() if self.ts else None + return d + + @classmethod + def from_dict(cls, d: Dict[str, Any]) -> "TrackerKinematics": + d = d.copy() + if isinstance(d.get("ts"), str): + d["ts"] = datetime.fromisoformat(d["ts"]) + return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__}) + + +# --------------------------------------------------------------------------- +# SimulationLog — complete audit record for one 15-min slot +# --------------------------------------------------------------------------- + +@dataclass +class SimulationLog: + """ + Full audit record for one 15-minute control loop execution. + + Written to `data/simulation_log.parquet` (or CSV) after every slot. + Used for replay, validation, ROI reporting, and Phase 7 integration tests. + """ + + ts: datetime + slot_index: int # 0–95 for a 24-hour day (96 × 15-min slots) + date_str: str = "" # YYYY-MM-DD local date for partitioning + + # --- Nested state objects --- + sensor: Optional[SensorRaw] = None + bio: Optional[BiologicalState] = None + kinematics: Optional[TrackerKinematics] = None + + # --- InterventionGate outcome --- + intervention_gate_passed: bool = False + gate_rejection_reason: Optional[str] = None + # Rejection categories: "no_shade_window:morning" | "no_shade_window:may" | + # "overcast" | "below_temp_threshold" | "below_cwsi_threshold" | "budget_exhausted" + + # --- TradeoffEngine outcome --- + candidate_offsets_tested: List[float] = field(default_factory=list) + chosen_offset_deg: float = 0.0 + minimum_dose_rationale: Optional[str] = None + # e.g. "offset 5° sufficient: fruiting PAR reduced below 400 µmol/m²/s" + + # --- Safety rails --- + fvcb_a: Optional[float] = None + ml_a: Optional[float] = None + model_divergence_pct: Optional[float] = None # |fvcb_a - ml_a| / max * 100 + safety_fallback_triggered: bool = False + routing_decision: Optional[str] = None # "fvcb" | "ml" — which model was used + + # --- Energy budget accounting --- + energy_fraction_this_slot: float = 0.0 # fraction of max generation sacrificed + budget_remaining_daily_kwh: Optional[float] = None + budget_remaining_weekly_kwh: Optional[float] = None + budget_remaining_monthly_kwh: Optional[float] = None + + # --- Feedback (filled in the following slot) --- + a_net_actual: Optional[float] = None # measured A in next slot (for validation) + a_net_improvement_pct: Optional[float] = None # vs unshaded counterfactual + + # --- Explainability tags --- + decision_tags: List[str] = field(default_factory=list) + # e.g. ["rubisco_limited", "dose:5deg", "veraison_1.5x", "budget_ok:32%_remaining"] + + # ------------------------------------------------------------------ + # Serialization + # ------------------------------------------------------------------ + + def to_dict(self) -> Dict[str, Any]: + """Deep-serialize to a plain dict (JSON-serializable).""" + d: Dict[str, Any] = { + "ts": self.ts.isoformat() if self.ts else None, + "slot_index": self.slot_index, + "date_str": self.date_str, + "sensor": self.sensor.to_dict() if self.sensor else None, + "bio": self.bio.to_dict() if self.bio else None, + "kinematics": self.kinematics.to_dict() if self.kinematics else None, + "intervention_gate_passed": self.intervention_gate_passed, + "gate_rejection_reason": self.gate_rejection_reason, + "candidate_offsets_tested": self.candidate_offsets_tested, + "chosen_offset_deg": self.chosen_offset_deg, + "minimum_dose_rationale": self.minimum_dose_rationale, + "fvcb_a": self.fvcb_a, + "ml_a": self.ml_a, + "model_divergence_pct": self.model_divergence_pct, + "safety_fallback_triggered": self.safety_fallback_triggered, + "routing_decision": self.routing_decision, + "energy_fraction_this_slot": self.energy_fraction_this_slot, + "budget_remaining_daily_kwh": self.budget_remaining_daily_kwh, + "budget_remaining_weekly_kwh": self.budget_remaining_weekly_kwh, + "budget_remaining_monthly_kwh": self.budget_remaining_monthly_kwh, + "a_net_actual": self.a_net_actual, + "a_net_improvement_pct": self.a_net_improvement_pct, + "decision_tags": self.decision_tags, + } + return d + + def to_flat_row(self) -> Dict[str, Any]: + """ + Flatten all nested objects into a single dict row suitable for + appending to a Parquet or CSV log file. + + Nested field names are prefixed: sensor__*, bio__*, kinematics__*. + """ + row: Dict[str, Any] = { + "ts": self.ts.isoformat() if self.ts else None, + "slot_index": self.slot_index, + "date_str": self.date_str, + "gate_passed": self.intervention_gate_passed, + "gate_reason": self.gate_rejection_reason, + "chosen_offset_deg": self.chosen_offset_deg, + "fvcb_a": self.fvcb_a, + "ml_a": self.ml_a, + "divergence_pct": self.model_divergence_pct, + "fallback": self.safety_fallback_triggered, + "routing": self.routing_decision, + "energy_fraction": self.energy_fraction_this_slot, + "budget_daily_kwh": self.budget_remaining_daily_kwh, + "budget_monthly_kwh": self.budget_remaining_monthly_kwh, + "a_net_actual": self.a_net_actual, + "a_net_improvement_pct": self.a_net_improvement_pct, + "tags": "|".join(self.decision_tags), + } + if self.sensor: + for k, v in self.sensor.to_dict().items(): + if k not in ("ts", "quality_flags", "source"): + row[f"sensor__{k}"] = v + if self.bio: + for k, v in self.bio.to_dict().items(): + if k != "ts": + row[f"bio__{k}"] = v + if self.kinematics: + for k, v in self.kinematics.to_dict().items(): + if k != "ts": + row[f"kin__{k}"] = v + return row + + +# --------------------------------------------------------------------------- +# Public convenience re-exports from VineSnapshot +# --------------------------------------------------------------------------- + +def sensor_raw_from_vine_snapshot(snapshot: Any) -> SensorRaw: + """Module-level alias for SensorRaw.from_vine_snapshot().""" + return SensorRaw.from_vine_snapshot(snapshot) + + +# --------------------------------------------------------------------------- +# Quick self-test +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + import json + from datetime import timezone + + now = datetime.now(tz=timezone.utc) + + sensor = SensorRaw( + ts=now, + air_temp_c=33.5, + leaf_temp_c=35.1, + vpd_kpa=2.9, + co2_ppm=410.0, + fruiting_zone_par_umol=820.0, + soil_moisture_pct=31.2, + reference_crop_par_umol=1150.0, + par_shading_ratio=0.71, + source="thingsboard", + ) + bio = BiologicalState( + ts=now, + a_net_umol=14.3, + limiting_state="rubisco", + shading_helps=True, + model_used="fvcb_semillon", + phenological_stage="veraison", + crop_value_weight=1.5, + heat_stress_level="moderate", + sunburn_risk=True, + ) + kin = TrackerKinematics( + ts=now, + astronomical_tilt_deg=42.0, + shade_offset_deg=5.0, + effective_tilt_deg=47.0, + previous_tilt_deg=42.0, + tilt_change_deg=5.0, + motion_triggered=True, + operational_mode="tracking", + panel_temp_treatment_c=58.3, + ) + log = SimulationLog( + ts=now, + slot_index=52, + date_str="2025-07-15", + sensor=sensor, + bio=bio, + kinematics=kin, + intervention_gate_passed=True, + candidate_offsets_tested=[3.0, 5.0], + chosen_offset_deg=5.0, + minimum_dose_rationale="5° sufficient to reduce fruiting-zone PAR below 400", + fvcb_a=14.3, + ml_a=14.8, + model_divergence_pct=3.4, + routing_decision="fvcb_semillon", + energy_fraction_this_slot=0.042, + budget_remaining_daily_kwh=8.1, + decision_tags=["rubisco_limited", "dose:5deg", "veraison_1.5x", "budget_ok"], + ) + + print("SensorRaw:") + print(json.dumps(sensor.to_dict(), indent=2, default=str)) + print("\nBiologicalState:") + print(json.dumps(bio.to_dict(), indent=2, default=str)) + print("\nTrackerKinematics:") + print(json.dumps(kin.to_dict(), indent=2, default=str)) + print("\nSimulationLog flat row keys:") + row = log.to_flat_row() + print(f" {len(row)} columns") + print(" First 10:", list(row.keys())[:10]) + print("\nSensorRaw round-trip:") + s2 = SensorRaw.from_dict(sensor.to_dict()) + assert s2.air_temp_c == sensor.air_temp_c + assert isinstance(s2.ts, datetime) + print(" OK") diff --git a/src/data/ims_client.py b/src/data/ims_client.py new file mode 100644 index 0000000000000000000000000000000000000000..95ecdbb45dea68adcb443ecf664e5ef4803e10cc --- /dev/null +++ b/src/data/ims_client.py @@ -0,0 +1,215 @@ +""" +IMSClient: fetch and cache IMS weather data from station 43 (Sde Boker). +Resamples 10min data to 15min for alignment with sensor data. +""" + +import os +import time +from pathlib import Path +from typing import Optional + +import pandas as pd +import requests + +try: + from dotenv import load_dotenv + load_dotenv() +except ImportError: + pass + + +def _parse_ims_date(d: str) -> str: + """Convert YYYY-MM-DD to IMS format YYYY/MM/DD.""" + return d.replace("-", "/") + + +class IMSClient: + """Fetch IMS API data for a station and cache to Data/ims/.""" + + def __init__( + self, + token: Optional[str] = None, + station_id: Optional[int] = None, + cache_dir: Optional[Path] = None, + channel_map: Optional[dict[int, str]] = None, + ): + from config import settings + + self.token = (token or os.environ.get("IMS_API_TOKEN", "")).strip() + if not self.token: + raise ValueError( + "IMS API token is required. Set IMS_API_TOKEN in .env, " + "in Streamlit Secrets, or pass token= to IMSClient." + ) + self.station_id = station_id or settings.IMS_STATION_ID + self.cache_dir = cache_dir or settings.IMS_CACHE_DIR + self.channel_map = channel_map or settings.IMS_CHANNEL_MAP.copy() + self._base = f"{settings.IMS_BASE_URL}/{self.station_id}/data" + self._stations_url = settings.IMS_BASE_URL + + def get_station_metadata(self, station_id: Optional[int] = None) -> dict: + """ + Fetch station metadata from IMS API (name, location, monitors/channels). + Returns dict with 'stationId', 'name', 'monitors' (list of {channelId, name, units, ...}). + """ + sid = station_id or self.station_id + url = f"{self._stations_url}/{sid}" + headers = {"Authorization": f"ApiToken {self.token}"} + r = requests.get(url, headers=headers, timeout=30) + r.raise_for_status() + return r.json() + + def list_channels(self, station_id: Optional[int] = None) -> list[dict]: + """Return list of channel descriptors for the station (channelId, name, units, active).""" + meta = self.get_station_metadata(station_id) + monitors = meta.get("monitors", meta.get("channelGroups", [])) + # Flatten if nested; IMS may return list of { channelId, name, ... } + out = [] + for m in monitors: + if isinstance(m, dict): + out.append({ + "channelId": m.get("channelId", m.get("id")), + "name": m.get("name", m.get("channelName", "")), + "units": m.get("units", ""), + "active": m.get("active", True), + }) + return out + + def fetch_channel( + self, + channel_id: int, + from_date: str, + to_date: str, + ) -> pd.DataFrame: + """ + Fetch one channel for date range. Dates as YYYY-MM-DD. + Returns DataFrame with timestamp_utc and one value column. + """ + from_f = _parse_ims_date(from_date) + to_f = _parse_ims_date(to_date) + url = f"{self._base}/{channel_id}?from={from_f}&to={to_f}" + headers = {"Authorization": f"ApiToken {self.token}"} + r = requests.get(url, headers=headers, timeout=120) + r.raise_for_status() + if not r.text or not r.text.strip(): + return pd.DataFrame() + try: + raw = r.json() + except Exception: + return pd.DataFrame() + data = raw.get("data", raw) if isinstance(raw, dict) else raw + if not isinstance(data, list): + data = [] + col_name = self.channel_map.get(channel_id, f"channel_{channel_id}") + rows = [] + for item in data: + dt = item.get("datetime") + # IMS returns Israel time (Asia/Jerusalem); parse and convert to UTC + if isinstance(dt, str): + ts = pd.to_datetime(dt) + if ts.tzinfo is None: + ts = ts.tz_localize("Asia/Jerusalem").tz_convert("UTC") + else: + ts = ts.tz_convert("UTC") + else: + continue + ch_list = item.get("channels", []) + val = None + for ch in ch_list: + if ch.get("id") == channel_id and ch.get("status") == 1: + val = ch.get("value") + break + rows.append({"timestamp_utc": ts, col_name: val}) + df = pd.DataFrame(rows) + if not df.empty: + df = df.dropna(subset=[col_name]) + df = df.set_index("timestamp_utc").sort_index() + return df + + def fetch_all_channels( + self, + from_date: str, + to_date: str, + delay_seconds: float = 0.5, + ) -> pd.DataFrame: + """Fetch all configured channels and merge on timestamp_utc.""" + out = None + for ch_id, col_name in self.channel_map.items(): + df = self.fetch_channel(ch_id, from_date, to_date) + if df.empty: + continue + df = df.rename(columns={c: c for c in df.columns}) + if out is None: + out = df + else: + out = out.join(df, how="outer") + time.sleep(delay_seconds) + if out is None: + return pd.DataFrame() + out = out.reset_index() + return out + + def resample_to_15min(self, df: pd.DataFrame) -> pd.DataFrame: + """Resample 10min IMS data to 15min (mean). Expects timestamp_utc column.""" + if df.empty or "timestamp_utc" not in df.columns: + return df + d = df.set_index("timestamp_utc") + d = d.resample("15min").mean().dropna(how="all") + return d.reset_index() + + def load_cached(self, cache_path: Optional[Path] = None) -> pd.DataFrame: + """Load merged IMS data from cache file if it exists.""" + path = cache_path or (self.cache_dir / "ims_merged_15min.csv") + if not path.exists(): + return pd.DataFrame() + df = pd.read_csv(path) + if "timestamp_utc" in df.columns: + df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True) + return df + + def fetch_and_cache( + self, + from_date: str, + to_date: str, + cache_path: Optional[Path] = None, + chunk_days: Optional[int] = 60, + ) -> pd.DataFrame: + """ + Fetch all channels for the date range, resample to 15min, save to cache. + If chunk_days is set, split the range into chunks to avoid API empty responses. + """ + path = cache_path or (self.cache_dir / "ims_merged_15min.csv") + path.parent.mkdir(parents=True, exist_ok=True) + + from datetime import datetime, timedelta + + start = datetime.strptime(from_date, "%Y-%m-%d").date() + end = datetime.strptime(to_date, "%Y-%m-%d").date() + if start > end: + start, end = end, start + + if chunk_days is None or (end - start).days <= chunk_days: + df = self.fetch_all_channels(from_date, to_date) + else: + chunks = [] + d = start + while d < end: + chunk_end = min(d + timedelta(days=chunk_days), end) + from_s = d.strftime("%Y-%m-%d") + to_s = chunk_end.strftime("%Y-%m-%d") + try: + df_chunk = self.fetch_all_channels(from_s, to_s) + if not df_chunk.empty: + chunks.append(df_chunk) + except Exception: + pass # skip failed chunk, continue + d = chunk_end + df = pd.concat(chunks, ignore_index=True) if chunks else pd.DataFrame() + if not df.empty and "timestamp_utc" in df.columns: + df = df.drop_duplicates(subset=["timestamp_utc"]).sort_values("timestamp_utc") + + if df.empty: + return df + df = self.resample_to_15min(df) + df.to_csv(path, index=False) + return df diff --git a/src/data/redis_cache.py b/src/data/redis_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..e631ea074576ad8c4250838347fe2314bdb49252 --- /dev/null +++ b/src/data/redis_cache.py @@ -0,0 +1,152 @@ +""" +Thin Redis wrapper for cross-process caching (Upstash Redis REST API). + +Falls back gracefully to ``None`` returns when Redis is unavailable, +so callers can use in-memory TTLCache as a fallback. + +Usage:: + + from src.data.redis_cache import get_redis + + redis = get_redis() # None if no UPSTASH_REDIS_URL + if redis: + redis.set_json("weather:current", data, ttl=1800) + cached = redis.get_json("weather:current") +""" + +from __future__ import annotations + +import json +import logging +import os +import threading +from typing import Any, Optional + +log = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Singleton (thread-safe) +# --------------------------------------------------------------------------- + +_instance: Optional["RedisCache"] = None +_lock = threading.Lock() + + +def get_redis() -> Optional["RedisCache"]: + """Return the global RedisCache instance, or *None* if not configured.""" + global _instance + + # Fast path (no lock) + if _instance is not None: + return _instance + + url = os.environ.get("UPSTASH_REDIS_URL") + token = os.environ.get("UPSTASH_REDIS_TOKEN") + if not url or not token: + log.debug("Redis not configured (UPSTASH_REDIS_URL / UPSTASH_REDIS_TOKEN missing)") + return None + + with _lock: + # Double-check after acquiring lock + if _instance is not None: + return _instance + try: + _instance = RedisCache(url=url, token=token) + log.info("Redis connected: %s", url.split("@")[-1] if "@" in url else url[:40]) + return _instance + except Exception as exc: + log.error("Redis init failed: %s", exc) + return None + + +# --------------------------------------------------------------------------- +# RedisCache (Upstash REST) +# --------------------------------------------------------------------------- + +class RedisCache: + """Minimal Redis cache using the Upstash REST API (no native driver needed).""" + + def __init__(self, url: str, token: str): + self._url = url.rstrip("/") + self._headers = {"Authorization": f"Bearer {token}"} + # Lazy import — requests is already a project dependency + import requests as _req + self._req = _req + # Connectivity check + resp = self._req.get(f"{self._url}/ping", headers=self._headers, timeout=5) + resp.raise_for_status() + + # -- JSON helpers ------------------------------------------------------- + + def get_json(self, key: str) -> Optional[Any]: + """Retrieve and JSON-decode a key. Returns None on miss or error.""" + try: + resp = self._req.get( + f"{self._url}/get/{key}", + headers=self._headers, + timeout=5, + ) + resp.raise_for_status() + result = resp.json().get("result") + if result is None: + return None + return json.loads(result) + except Exception as exc: + log.debug("Redis GET %s failed: %s", key, exc) + return None + + def set_json(self, key: str, value: Any, ttl: int = 300) -> bool: + """JSON-encode and store *value* with a TTL in seconds.""" + try: + payload = json.dumps(value, default=str) + # Upstash REST API: POST pipeline format + resp = self._req.post( + f"{self._url}/pipeline", + headers={**self._headers, "Content-Type": "application/json"}, + json=[["SET", key, payload, "EX", str(ttl)]], + timeout=5, + ) + resp.raise_for_status() + return True + except Exception as exc: + log.debug("Redis SET %s failed: %s", key, exc) + return False + + def delete(self, key: str) -> bool: + """Delete a key.""" + try: + resp = self._req.get( + f"{self._url}/del/{key}", + headers=self._headers, + timeout=5, + ) + resp.raise_for_status() + return True + except Exception as exc: + log.debug("Redis DEL %s failed: %s", key, exc) + return False + + def exists(self, key: str) -> bool: + """Check if a key exists.""" + try: + resp = self._req.get( + f"{self._url}/exists/{key}", + headers=self._headers, + timeout=5, + ) + resp.raise_for_status() + return resp.json().get("result", 0) == 1 + except Exception: + return False + + def ping(self) -> bool: + """Health check.""" + try: + resp = self._req.get( + f"{self._url}/ping", + headers=self._headers, + timeout=5, + ) + return resp.status_code == 200 + except Exception: + return False diff --git a/src/data/sensor_data_loader.py b/src/data/sensor_data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..0b0732524d171a8e44309d6b982dc1e1ffed5f5f --- /dev/null +++ b/src/data/sensor_data_loader.py @@ -0,0 +1,87 @@ +""" +SensorDataLoader: load and filter sensors_wide.csv for Stage 1 (Farquhar model). +Uses only on-site sensor data from the sensor data directory. +""" + +from pathlib import Path +from typing import Optional + +import pandas as pd + + +# Stage 1 columns (Farquhar + CWSI) per context/2_plan.md +STAGE1_COLUMNS = [ + "Air1_PAR_ref", + "Air1_leafTemperature_ref", + "Air1_airTemperature_ref", + "Air1_CO2_ref", + "Air1_VPD_ref", + "Air1_airHumidity_ref", +] +# Optional spectral indices (Crop sensors); include if present +STAGE1_OPTIONAL = ["Air1_NDVI_ref", "Air1_PRI_ref", "Air1_rNDVI_ref", "Air1_RENDVI_ref"] + +# Default timestamp column name in wide CSV +DEFAULT_TIMESTAMP_COL = "time" + + +class SensorDataLoader: + """Load sensors_wide.csv and provide Stage 1 columns and daytime filter.""" + + def __init__( + self, + data_path: Optional[Path] = None, + metadata_path: Optional[Path] = None, + ): + from config import settings + + _default = settings.SENSORS_WIDE_PATH + if not _default.exists() and settings.SENSORS_WIDE_SAMPLE_PATH.exists(): + _default = settings.SENSORS_WIDE_SAMPLE_PATH + self.data_path = data_path or _default + self.metadata_path = metadata_path or settings.SENSORS_WIDE_METADATA_PATH + + def get_stage1_columns(self) -> list[str]: + """Return list of column names required for Stage 1 (Farquhar + CWSI).""" + return list(STAGE1_COLUMNS) + + def load( + self, + columns: Optional[list[str]] = None, + timestamp_col: Optional[str] = None, + ) -> pd.DataFrame: + """ + Load sensors_wide.csv. If columns is None, load all Stage 1 columns + plus timestamp. Columns not present are dropped from the request. + """ + ts_col = timestamp_col or DEFAULT_TIMESTAMP_COL + use_cols = columns if columns is not None else self.get_stage1_columns() + use_cols = [c for c in use_cols if c != ts_col] + if ts_col not in use_cols: + use_cols = [ts_col] + use_cols + + df = pd.read_csv(self.data_path, usecols=lambda c: c in use_cols) + missing = [c for c in use_cols if c not in df.columns] + if missing: + raise ValueError( + f"Sensor data missing required columns: {missing}. " + f"Available: {list(df.columns)[:20]}{'...' if len(df.columns) > 20 else ''}" + ) + if ts_col in df.columns: + df[ts_col] = pd.to_datetime(df[ts_col], utc=True) + df = df.sort_values(ts_col).reset_index(drop=True) + # Correct Air1_CO2_ref — raw sensor reads ≈ 30% too high + if "Air1_CO2_ref" in df.columns: + df["Air1_CO2_ref"] = df["Air1_CO2_ref"] * 0.7 + return df + + def filter_daytime( + self, + df: pd.DataFrame, + par_threshold: float = 50.0, + par_column: str = "Air1_PAR_ref", + ) -> pd.DataFrame: + """Keep only rows where PAR > par_threshold (daytime, umol m-2 s-1).""" + if par_column not in df.columns: + return df + return df.loc[df[par_column] > par_threshold].copy() diff --git a/src/data/thingsboard_client.py b/src/data/thingsboard_client.py new file mode 100644 index 0000000000000000000000000000000000000000..b7a2cd4510b31f8b30702b317b5d9cf8264e8e14 --- /dev/null +++ b/src/data/thingsboard_client.py @@ -0,0 +1,1058 @@ +""" +ThingsBoardClient: live telemetry client for the Seymour vineyard at +web.seymouragri.com. + +Device layout +------------- +TREATMENT area (rows 501–502, under solar panels): + Air2, Air3, Air4 — microclimate sensors under the panels + Crop3, Crop5, Crop6, Crop7 — fruiting-zone crop sensors (per panel position) + Soil1, Soil3, Soil5, Soil6 — root-zone soil probes + Irrigation1 — irrigation flow/volume/quality logger + Thermocouples-1 — panel surface temperature (4 positions) + +REFERENCE area (rows 503–504, open sky, no panels): + Crop1, Crop2, Crop4 — fruiting-zone crop sensors (no shading) + Soil2, Soil4, Soil7, Soil9 — root-zone soil probes + Thermocouples-2 — structural/ambient thermocouple reference + +AMBIENT (site-level outdoor baseline): + Air1 — outdoor climate station (above canopy, no panel) + +Credentials (env vars or .env): + THINGSBOARD_HOST — default https://web.seymouragri.com + THINGSBOARD_USERNAME — tenant login email + THINGSBOARD_PASSWORD — tenant login password + THINGSBOARD_TOKEN — pre-generated JWT (takes priority over user/pass) +""" + +from __future__ import annotations + +import math +import os +import time +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, field +from datetime import datetime, timezone +from enum import Enum +from typing import Any, Dict, List, Optional, Tuple + +import pandas as pd +import requests + + +# --------------------------------------------------------------------------- +# Enumerations +# --------------------------------------------------------------------------- + +class VineArea(str, Enum): + TREATMENT = "treatment" # under solar panels + REFERENCE = "reference" # open sky, no panels + AMBIENT = "ambient" # site-level outdoor baseline + + +# --------------------------------------------------------------------------- +# Device registry +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class DeviceInfo: + uuid: str + device_id: int + area: VineArea + row: Optional[int] + label: str + + +#: Full device registry mapping short name → DeviceInfo. +#: UUIDs are from devices.csv in the Research/PV_Vine_Tradeoff repository. +DEVICE_REGISTRY: Dict[str, DeviceInfo] = { + "Air1": DeviceInfo( + uuid="373041f0-089a-11ef-9126-b746c27d34bd", device_id=4, + area=VineArea.AMBIENT, row=None, label="Outdoor Climate (ambient baseline)", + ), + "Air2": DeviceInfo( + uuid="37bf89a0-089a-11ef-9126-b746c27d34bd", device_id=5, + area=VineArea.TREATMENT, row=501, label="Indoor Climate Row 501 (under panels)", + ), + "Air3": DeviceInfo( + uuid="3860aba0-089a-11ef-9126-b746c27d34bd", device_id=6, + area=VineArea.TREATMENT, row=502, label="Indoor Climate Row 502 (under panels)", + ), + "Air4": DeviceInfo( + uuid="04452660-7114-11ef-9360-f1ed9d9dc643", device_id=7, + area=VineArea.TREATMENT, row=502, label="Treatment Row 502 North (under panels)", + ), + "Crop1": DeviceInfo( + uuid="39224df0-089a-11ef-9126-b746c27d34bd", device_id=8, + area=VineArea.REFERENCE, row=503, label="Reference crop Row 503", + ), + "Crop2": DeviceInfo( + uuid="aa0d9970-7113-11ef-9360-f1ed9d9dc643", device_id=9, + area=VineArea.REFERENCE, row=503, label="Control crop Row 503", + ), + "Crop3": DeviceInfo( + uuid="859b3ce0-29dd-11f0-96bc-55874793181d", device_id=10, + area=VineArea.TREATMENT, row=502, label="Treatment 502 – West Bottom", + ), + "Crop4": DeviceInfo( + uuid="889765e0-29dd-11f0-96bc-55874793181d", device_id=11, + area=VineArea.REFERENCE, row=502, label="Control crop Row 502 (reference vine)", + ), + "Crop5": DeviceInfo( + uuid="8b092930-29dd-11f0-96bc-55874793181d", device_id=12, + area=VineArea.TREATMENT, row=502, label="Treatment 502 – East Upper", + ), + "Crop6": DeviceInfo( + uuid="8cce31c0-29dd-11f0-96bc-55874793181d", device_id=13, + area=VineArea.TREATMENT, row=502, label="Treatment 502 – East Bottom", + ), + "Crop7": DeviceInfo( + uuid="8e7440a0-29dd-11f0-96bc-55874793181d", device_id=14, + area=VineArea.TREATMENT, row=502, label="Treatment 502 – West Upper", + ), + "Soil1": DeviceInfo( + uuid="3586b0a0-089a-11ef-9126-b746c27d34bd", device_id=16, + area=VineArea.TREATMENT, row=502, label="Soil Row 502 (treatment)", + ), + "Soil2": DeviceInfo( + uuid="35cda4b0-089a-11ef-9126-b746c27d34bd", device_id=17, + area=VineArea.REFERENCE, row=503, label="Soil Row 503 (reference)", + ), + "Soil3": DeviceInfo( + uuid="3634caf0-089a-11ef-9126-b746c27d34bd", device_id=18, + area=VineArea.TREATMENT, row=501, label="Soil Row 501 (treatment)", + ), + "Soil4": DeviceInfo( + uuid="36a4cad0-089a-11ef-9126-b746c27d34bd", device_id=19, + area=VineArea.REFERENCE, row=504, label="Soil Row 504 Control", + ), + "Soil5": DeviceInfo( + uuid="77d55280-70e7-11ef-9360-f1ed9d9dc643", device_id=20, + area=VineArea.TREATMENT, row=502, label="Treatment Row 502 South", + ), + "Soil6": DeviceInfo( + uuid="7e4e4630-70e7-11ef-9360-f1ed9d9dc643", device_id=21, + area=VineArea.TREATMENT, row=502, label="Treatment Row 502 North", + ), + "Soil7": DeviceInfo( + uuid="842e5540-70e7-11ef-9360-f1ed9d9dc643", device_id=22, + area=VineArea.REFERENCE, row=504, label="Control 504 South", + ), + "Soil9": DeviceInfo( + uuid="91e44ff0-70e7-11ef-9360-f1ed9d9dc643", device_id=23, + area=VineArea.REFERENCE, row=504, label="Control 504 South (2nd probe)", + ), + "Irrigation1": DeviceInfo( + uuid="3a066c60-089a-11ef-9126-b746c27d34bd", device_id=15, + area=VineArea.TREATMENT, row=502, label="Irrigation Row 502", + ), + "Thermocouples1": DeviceInfo( + uuid="72ce88f0-c548-11ef-8bc2-fdab9f3349b7", device_id=2, + area=VineArea.TREATMENT, row=502, label="Panel surface temps Treatment 502", + ), + "Thermocouples2": DeviceInfo( + uuid="03e40ba0-cc0e-11ef-a2e9-55874793181d", device_id=3, + area=VineArea.REFERENCE, row=None, label="Panel/structure surface temps Reference", + ), + # Tracker controllers (panel angle + mode) + "Tracker501": DeviceInfo( + uuid="aac06e50-f769-11f0-b902-5ff1ea8c4cf9", device_id=0, + area=VineArea.TREATMENT, row=501, label="Tracker row 501", + ), + "Tracker502": DeviceInfo( + uuid="b99bd630-f769-11f0-b902-5ff1ea8c4cf9", device_id=0, + area=VineArea.TREATMENT, row=502, label="Tracker row 502", + ), + "Tracker503": DeviceInfo( + uuid="caffe4c0-f769-11f0-b902-5ff1ea8c4cf9", device_id=0, + area=VineArea.TREATMENT, row=503, label="Tracker row 503", + ), + "Tracker509": DeviceInfo( + uuid="bacf7c50-fcdc-11f0-b902-5ff1ea8c4cf9", device_id=0, + area=VineArea.TREATMENT, row=509, label="Tracker row 509", + ), +} + +# --------------------------------------------------------------------------- +# Asset registry (non-device entities — e.g. the plant-level energy asset) +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class AssetInfo: + uuid: str + label: str + +ASSET_REGISTRY: Dict[str, AssetInfo] = { + "Plant": AssetInfo( + uuid="dc94ddb0-dbe6-11f0-9352-a53ca0b6a212", + label="Yeruham Vineyard — plant-level energy", + ), +} + +ENERGY_KEYS: List[str] = ["power", "production"] +TRACKER_KEYS: List[str] = ["angle", "manualMode", "setAngle", "setMode"] + +# --------------------------------------------------------------------------- +# Telemetry key sets per device type +# --------------------------------------------------------------------------- + +AIR_KEYS: List[str] = [ + "airTemperature", "leafTemperature", "VPD", "CO2", "PAR", "DLI", + "airHumidity", "windSpeed", "windAngle", "rain", "airPressure", + "dewTemperature", "NDVI", "PRI", "airLeafDeltaT", +] + +CROP_KEYS: List[str] = [ + "PAR", "leafTemperature", "NDVI", "PRI", "DLI", "PARAvg1H", "PARAvg24H", +] + +SOIL_KEYS: List[str] = [ + "soilMoisture", "soilMoisture2", + "soilTemperature", "soilTemperature2", + "soilBulkEC", "soilpH", +] + +IRRIGATION_KEYS: List[str] = [ + "irrigationVolume", "irrigationMinutes", "irrigationFlowRate", + "irrigationEC", "irrigationPH", "waterTemperature", + "irrigationCycleVolume", "irrigationCycleMinutes", +] + +THERMOCOUPLE_KEYS: List[str] = [ + "thermocoupleTemperature_1", "thermocoupleTemperature_2", + "thermocoupleTemperature_3", "thermocoupleTemperature_4", +] + + +# --------------------------------------------------------------------------- +# VineSnapshot dataclass +# --------------------------------------------------------------------------- + +@dataclass +class VineSnapshot: + """ + Aggregated real-time vine state from all ThingsBoard sensors. + + Fields are grouped by area: + - ambient : Air1 (outdoor climate, site-level baseline) + - treatment : under solar panels (rows 501–502) + - reference : open sky / no panels (rows 503–504) + + None means the sensor did not return a value. + """ + + snapshot_ts: datetime + staleness_minutes: float + + # --- Ambient (Air1, outdoor baseline) --- + ambient_temp_c: Optional[float] = None + ambient_humidity_pct: Optional[float] = None + ambient_wind_speed_ms: Optional[float] = None + ambient_wind_angle_deg: Optional[float] = None + ambient_rain_mm: Optional[float] = None + + # --- Treatment microclimate (avg of Air2 / Air3 / Air4) --- + treatment_air_temp_c: Optional[float] = None + treatment_leaf_temp_c: Optional[float] = None + treatment_vpd_kpa: Optional[float] = None + treatment_co2_ppm: Optional[float] = None + treatment_par_umol: Optional[float] = None + treatment_dli_mol_m2: Optional[float] = None + treatment_ndvi: Optional[float] = None + treatment_pri: Optional[float] = None + treatment_air_leaf_delta_t: Optional[float] = None + + # --- Treatment crop (avg of Crop3 / Crop5 / Crop6 / Crop7) --- + treatment_crop_par_umol: Optional[float] = None + treatment_crop_leaf_temp_c: Optional[float] = None + treatment_crop_ndvi: Optional[float] = None + treatment_crop_dli_mol_m2: Optional[float] = None + treatment_crop_par_avg1h: Optional[float] = None + # Per-panel-position readings {position_label: {par, leaf_temp, ndvi}} + treatment_crop_by_position: Dict[str, Dict[str, Optional[float]]] = field(default_factory=dict) + + # --- Reference crop (avg of Crop1 / Crop2 / Crop4) --- + reference_crop_par_umol: Optional[float] = None + reference_crop_leaf_temp_c: Optional[float] = None + reference_crop_ndvi: Optional[float] = None + reference_crop_dli_mol_m2: Optional[float] = None + reference_crop_by_position: Dict[str, Dict[str, Optional[float]]] = field(default_factory=dict) + + # --- PAR shading ratio: treatment_crop_par / reference_crop_par --- + par_shading_ratio: Optional[float] = None # <1 = panels are shading + + # --- Treatment soil (avg of Soil1 / Soil3 / Soil5 / Soil6) --- + treatment_soil_moisture_pct: Optional[float] = None + treatment_soil_temp_c: Optional[float] = None + treatment_soil_ec_ds_m: Optional[float] = None + treatment_soil_ph: Optional[float] = None + + # --- Reference soil (avg of Soil2 / Soil4 / Soil7 / Soil9) --- + reference_soil_moisture_pct: Optional[float] = None + reference_soil_temp_c: Optional[float] = None + + # --- Irrigation (Irrigation1, row 502 treatment) --- + irrigation_last_volume_l: Optional[float] = None + irrigation_last_minutes: Optional[float] = None + irrigation_ec: Optional[float] = None + irrigation_ph: Optional[float] = None + water_temp_c: Optional[float] = None + + # --- Panel surface temperatures --- + treatment_panel_temp_c: Optional[float] = None # avg Thermocouples1 positions 1-4 + reference_panel_temp_c: Optional[float] = None # avg Thermocouples2 positions 1-4 + + def to_advisor_text(self) -> str: + """Format snapshot for inclusion in an AI advisory prompt.""" + age = f"{self.staleness_minutes:.0f}" if self.staleness_minutes < 120 else ">{:.0f}".format(self.staleness_minutes) + lines = [f"VINE STATE (ThingsBoard sensors, ~{age} min ago):"] + + lines.append(" TREATMENT area (rows 501-502, under solar panels):") + if self.treatment_air_temp_c is not None: + lines.append(f" Air temperature: {self.treatment_air_temp_c:.1f} C") + if self.treatment_leaf_temp_c is not None: + lines.append(f" Leaf temperature: {self.treatment_leaf_temp_c:.1f} C") + if self.treatment_air_leaf_delta_t is not None: + lines.append(f" Air-leaf delta-T: {self.treatment_air_leaf_delta_t:+.1f} C (proxy for heat stress)") + if self.treatment_vpd_kpa is not None: + lines.append(f" VPD: {self.treatment_vpd_kpa:.2f} kPa") + if self.treatment_co2_ppm is not None: + lines.append(f" CO2: {self.treatment_co2_ppm:.0f} ppm") + if self.treatment_crop_par_umol is not None: + lines.append(f" Fruiting-zone PAR: {self.treatment_crop_par_umol:.0f} umol/m2/s (avg of Crop3/5/6/7)") + if self.treatment_crop_dli_mol_m2 is not None: + lines.append(f" DLI today so far: {self.treatment_crop_dli_mol_m2:.1f} mol/m2/day") + if self.treatment_crop_ndvi is not None: + lines.append(f" Canopy NDVI: {self.treatment_crop_ndvi:.3f}") + if self.treatment_soil_moisture_pct is not None: + lines.append(f" Soil moisture: {self.treatment_soil_moisture_pct:.1f}% (avg Soil1/3/5/6)") + if self.treatment_soil_temp_c is not None: + lines.append(f" Soil temperature: {self.treatment_soil_temp_c:.1f} C") + if self.treatment_panel_temp_c is not None: + lines.append(f" Panel surface temp: {self.treatment_panel_temp_c:.1f} C") + + if self.treatment_crop_by_position: + lines.append(" Per-position PAR (Crop sensors):") + for pos, vals in self.treatment_crop_by_position.items(): + par = vals.get("par") + lt = vals.get("leaf_temp") + par_str = f"{par:.0f} umol/m2/s" if par is not None else "N/A" + lt_str = f" | leaf {lt:.1f} C" if lt is not None else "" + lines.append(f" {pos}: PAR {par_str}{lt_str}") + + lines.append("") + lines.append(" REFERENCE area (rows 503-504, open sky, no panels):") + if self.reference_crop_par_umol is not None: + lines.append(f" Fruiting-zone PAR: {self.reference_crop_par_umol:.0f} umol/m2/s (avg of Crop1/2/4)") + if self.reference_crop_leaf_temp_c is not None: + lines.append(f" Leaf temperature: {self.reference_crop_leaf_temp_c:.1f} C") + if self.reference_crop_ndvi is not None: + lines.append(f" Canopy NDVI: {self.reference_crop_ndvi:.3f}") + if self.reference_soil_moisture_pct is not None: + lines.append(f" Soil moisture: {self.reference_soil_moisture_pct:.1f}% (avg Soil2/4/7/9)") + if self.reference_crop_by_position: + lines.append(" Per-position PAR (Crop sensors):") + for pos, vals in self.reference_crop_by_position.items(): + par = vals.get("par") + par_str = f"{par:.0f} umol/m2/s" if par is not None else "N/A" + lines.append(f" {pos}: PAR {par_str}") + + if self.par_shading_ratio is not None: + reduction_pct = (1 - self.par_shading_ratio) * 100 + lines.append("") + lines.append(f" PAR shading ratio (treatment/reference): {self.par_shading_ratio:.2f}" + f" ({reduction_pct:.0f}% reduction by panels)") + + if self.ambient_temp_c is not None: + lines.append("") + lines.append(" AMBIENT (outdoor baseline, Air1):") + lines.append(f" Air temperature: {self.ambient_temp_c:.1f} C") + if self.ambient_wind_speed_ms is not None: + lines.append(f" Wind speed: {self.ambient_wind_speed_ms:.1f} m/s") + if self.ambient_rain_mm is not None and self.ambient_rain_mm > 0: + lines.append(f" Rain: {self.ambient_rain_mm:.1f} mm") + + any_irrigation = any(v is not None for v in [ + self.irrigation_last_volume_l, self.irrigation_last_minutes, + self.irrigation_ec, self.irrigation_ph, + ]) + if any_irrigation: + lines.append("") + lines.append(" IRRIGATION (Irrigation1, row 502):") + if self.irrigation_last_volume_l is not None: + lines.append(f" Last cycle volume: {self.irrigation_last_volume_l:.0f} L") + if self.irrigation_last_minutes is not None: + lines.append(f" Duration: {self.irrigation_last_minutes:.0f} min") + if self.irrigation_ec is not None: + lines.append(f" EC: {self.irrigation_ec:.2f} dS/m") + if self.irrigation_ph is not None: + lines.append(f" pH: {self.irrigation_ph:.1f}") + if self.water_temp_c is not None: + lines.append(f" Water temperature: {self.water_temp_c:.1f} C") + + return "\n".join(lines) + + def to_dict(self) -> Dict[str, Any]: + """Return a flat dict suitable for JSON serialization (e.g., chatbot tool result).""" + out: Dict[str, Any] = { + "snapshot_ts": self.snapshot_ts.isoformat(), + "staleness_minutes": round(self.staleness_minutes, 1), + } + for attr in ( + "ambient_temp_c", "ambient_humidity_pct", "ambient_wind_speed_ms", + "ambient_wind_angle_deg", "ambient_rain_mm", + "treatment_air_temp_c", "treatment_leaf_temp_c", "treatment_vpd_kpa", + "treatment_co2_ppm", "treatment_par_umol", "treatment_dli_mol_m2", + "treatment_ndvi", "treatment_pri", "treatment_air_leaf_delta_t", + "treatment_crop_par_umol", "treatment_crop_leaf_temp_c", + "treatment_crop_ndvi", "treatment_crop_dli_mol_m2", "treatment_crop_par_avg1h", + "reference_crop_par_umol", "reference_crop_leaf_temp_c", + "reference_crop_ndvi", "reference_crop_dli_mol_m2", + "par_shading_ratio", + "treatment_soil_moisture_pct", "treatment_soil_temp_c", + "treatment_soil_ec_ds_m", "treatment_soil_ph", + "reference_soil_moisture_pct", "reference_soil_temp_c", + "irrigation_last_volume_l", "irrigation_last_minutes", + "irrigation_ec", "irrigation_ph", "water_temp_c", + "treatment_panel_temp_c", "reference_panel_temp_c", + ): + val = getattr(self, attr) + out[attr] = round(val, 3) if val is not None else None + out["treatment_crop_by_position"] = self.treatment_crop_by_position + out["reference_crop_by_position"] = self.reference_crop_by_position + return out + + +# --------------------------------------------------------------------------- +# Configuration +# --------------------------------------------------------------------------- + +@dataclass +class ThingsBoardConfig: + """ThingsBoard connection settings. Data retrieval always uses prod (Seymour).""" + # Prod only — test (eu.thingsboard.cloud) is for deploying apps, not data + host: str = os.environ.get("THINGSBOARD_HOST", "https://web.seymouragri.com/") + username: Optional[str] = ( + os.environ.get("THINGSBOARD_USERNAME") or os.environ.get("TB_USERNAME") + ) + password: Optional[str] = ( + os.environ.get("THINGSBOARD_PASSWORD") or os.environ.get("TB_PASSWORD") + ) + token: Optional[str] = os.environ.get("THINGSBOARD_TOKEN") + + +# --------------------------------------------------------------------------- +# Client +# --------------------------------------------------------------------------- + +class ThingsBoardClient: + """ + Minimal ThingsBoard client for the Seymour vineyard. + + Authentication + -------------- + Provide THINGSBOARD_TOKEN for a pre-generated JWT, or + THINGSBOARD_USERNAME + THINGSBOARD_PASSWORD for login-based auth. + Tokens are cached and refreshed automatically before they expire. + + Usage + ----- + client = ThingsBoardClient() + snapshot = client.get_vine_snapshot() + print(snapshot.to_advisor_text()) + """ + + _TOKEN_TTL_SECONDS = 8_000 # ThingsBoard default is 9000 s; be conservative + + def __init__(self, config: Optional[ThingsBoardConfig] = None) -> None: + self.config = config or ThingsBoardConfig() + self._session = requests.Session() + self._session.headers.update({"Content-Type": "application/json"}) + self._jwt: Optional[str] = None + self._jwt_expires_at: float = 0.0 + + # ------------------------------------------------------------------ + # Authentication + # ------------------------------------------------------------------ + + def _ensure_jwt(self) -> str: + """Return a valid JWT, obtaining or refreshing as needed.""" + if self.config.token: + if "X-Authorization" not in self._session.headers: + self._session.headers["X-Authorization"] = f"Bearer {self.config.token}" + return self.config.token + + if self._jwt and time.monotonic() < self._jwt_expires_at: + return self._jwt + + if not self.config.username or not self.config.password: + raise RuntimeError( + "ThingsBoard authentication requires THINGSBOARD_TOKEN " + "or both THINGSBOARD_USERNAME and THINGSBOARD_PASSWORD." + ) + + url = f"{self.config.host.rstrip('/')}/api/auth/login" + resp = self._session.post( + url, + json={"username": self.config.username, "password": self.config.password}, + timeout=10, + ) + resp.raise_for_status() + token = resp.json()["token"] + self._jwt = token + self._jwt_expires_at = time.monotonic() + self._TOKEN_TTL_SECONDS + self._session.headers["X-Authorization"] = f"Bearer {token}" + return token + + # ------------------------------------------------------------------ + # Low-level API calls + # ------------------------------------------------------------------ + + # ------------------------------------------------------------------ + # Shared low-level helpers (DEVICE and ASSET use the same REST API, + # differing only in the entity-type path segment). + # ------------------------------------------------------------------ + + def _fetch_latest_raw( + self, + entity_type: str, + uuid: str, + keys: List[str], + ) -> Tuple[Dict[str, Optional[float]], Optional[datetime]]: + """Fetch most-recent telemetry for any entity type (DEVICE or ASSET).""" + self._ensure_jwt() + url = ( + f"{self.config.host.rstrip('/')}/api/plugins/telemetry/{entity_type}" + f"/{uuid}/values/timeseries" + ) + resp = self._session.get(url, params={"keys": ",".join(keys)}, timeout=15) + resp.raise_for_status() + raw: Dict[str, List[Dict]] = resp.json() + + values: Dict[str, Optional[float]] = {} + newest_ts_ms: Optional[int] = None + for key in keys: + entries = raw.get(key, []) + if entries: + values[key] = _safe_float(entries[0]["value"]) + ts_ms = entries[0].get("ts") + if ts_ms and (newest_ts_ms is None or ts_ms > newest_ts_ms): + newest_ts_ms = ts_ms + else: + values[key] = None + + newest_ts = ( + datetime.fromtimestamp(newest_ts_ms / 1000, tz=timezone.utc) + if newest_ts_ms else None + ) + return values, newest_ts + + def _fetch_timeseries_raw( + self, + entity_type: str, + uuid: str, + keys: List[str], + start: datetime, + end: datetime, + limit: int = 1000, + interval_ms: int = 900_000, + agg: str = "NONE", + ) -> pd.DataFrame: + """Fetch time-series telemetry for any entity type (DEVICE or ASSET).""" + self._ensure_jwt() + start_ms = int(start.timestamp() * 1000) + end_ms = int(end.timestamp() * 1000) + url = ( + f"{self.config.host.rstrip('/')}/api/plugins/telemetry/{entity_type}" + f"/{uuid}/values/timeseries" + ) + params: Dict[str, Any] = { + "keys": ",".join(keys), + "startTs": start_ms, + "endTs": end_ms, + "limit": limit, + "agg": agg, + } + if agg != "NONE": + params["interval"] = interval_ms + + resp = self._session.get(url, params=params, timeout=30) + resp.raise_for_status() + raw: Dict[str, List[Dict]] = resp.json() + + frames: Dict[str, pd.Series] = {} + for key, entries in raw.items(): + if key in keys and entries: + ts = pd.to_datetime([e["ts"] for e in entries], unit="ms", utc=True) + vals = [_safe_float(e["value"]) for e in entries] + frames[key] = pd.Series(vals, index=ts) + + if not frames: + return pd.DataFrame() + return pd.DataFrame(frames).sort_index() + + # ------------------------------------------------------------------ + # Device API (public) + # ------------------------------------------------------------------ + + def _fetch_latest( + self, + device_name: str, + keys: List[str], + ) -> Tuple[Dict[str, Optional[float]], Optional[datetime]]: + """Fetch most-recent values for a named device.""" + info = DEVICE_REGISTRY[device_name] + return self._fetch_latest_raw("DEVICE", info.uuid, keys) + + def get_latest_telemetry( + self, + device_name: str, + keys: List[str], + ) -> Dict[str, Optional[float]]: + """Return the most recent value for each key. Missing keys return None.""" + if device_name not in DEVICE_REGISTRY: + raise KeyError( + f"Unknown device: {device_name!r}. " + f"Valid names: {sorted(DEVICE_REGISTRY)}" + ) + values, _ = self._fetch_latest(device_name, keys) + return values + + def get_timeseries( + self, + device_name: str, + keys: List[str], + start: datetime, + end: datetime, + limit: int = 1000, + interval_ms: int = 900_000, # 15 minutes + agg: str = "NONE", + ) -> pd.DataFrame: + """Fetch time-series telemetry for a named device.""" + if device_name not in DEVICE_REGISTRY: + raise KeyError(f"Unknown device: {device_name!r}") + info = DEVICE_REGISTRY[device_name] + return self._fetch_timeseries_raw( + "DEVICE", info.uuid, keys, start, end, limit, interval_ms, agg, + ) + + # ------------------------------------------------------------------ + # Asset API (public) + # ------------------------------------------------------------------ + + def get_asset_timeseries( + self, + asset_name: str, + keys: List[str], + start: datetime, + end: datetime, + limit: int = 1000, + interval_ms: int = 3_600_000, # 1 hour + agg: str = "SUM", + ) -> pd.DataFrame: + """Fetch time-series from a ThingsBoard ASSET (e.g. Plant energy).""" + if asset_name not in ASSET_REGISTRY: + raise KeyError(f"Unknown asset: {asset_name!r}. Valid: {sorted(ASSET_REGISTRY)}") + info = ASSET_REGISTRY[asset_name] + return self._fetch_timeseries_raw( + "ASSET", info.uuid, keys, start, end, limit, interval_ms, agg, + ) + + def get_asset_latest( + self, + asset_name: str, + keys: List[str], + ) -> Dict[str, Optional[float]]: + """Fetch latest telemetry from a ThingsBoard ASSET.""" + if asset_name not in ASSET_REGISTRY: + raise KeyError(f"Unknown asset: {asset_name!r}") + info = ASSET_REGISTRY[asset_name] + values, _ = self._fetch_latest_raw("ASSET", info.uuid, keys) + return values + + # ------------------------------------------------------------------ + # Device commands (RPC + attribute writes) + # ------------------------------------------------------------------ + + def send_rpc_command( + self, + device_name: str, + method: str, + params: Any = None, + timeout: float = 10.0, + ) -> Dict[str, Any]: + """Send a two-way RPC command to a device. + + Uses POST /api/plugins/rpc/twoway/{deviceId}. + Falls back to one-way if two-way returns 404. + """ + if device_name not in DEVICE_REGISTRY: + raise KeyError(f"Unknown device: {device_name!r}") + info = DEVICE_REGISTRY[device_name] + self._ensure_jwt() + + payload = {"method": method, "params": params if params is not None else {}} + + # Try two-way RPC first + url = ( + f"{self.config.host.rstrip('/')}/api/plugins/rpc/twoway" + f"/{info.uuid}" + ) + resp = self._session.post(url, json=payload, timeout=timeout) + if resp.status_code in (404, 405): + # Fallback to one-way RPC + url = ( + f"{self.config.host.rstrip('/')}/api/plugins/rpc/oneway" + f"/{info.uuid}" + ) + resp = self._session.post(url, json=payload, timeout=timeout) + resp.raise_for_status() + try: + return resp.json() + except Exception: + return {"status": "ok", "status_code": resp.status_code} + + def set_device_attributes( + self, + device_name: str, + attributes: Dict[str, Any], + scope: str = "SHARED_SCOPE", + ) -> None: + """Write server-side attributes to a device. + + Uses POST /api/plugins/telemetry/DEVICE/{id}/attributes/{scope}. + This is an alternative to RPC for setting tracker targets. + """ + if device_name not in DEVICE_REGISTRY: + raise KeyError(f"Unknown device: {device_name!r}") + info = DEVICE_REGISTRY[device_name] + self._ensure_jwt() + + url = ( + f"{self.config.host.rstrip('/')}/api/plugins/telemetry/DEVICE" + f"/{info.uuid}/attributes/{scope}" + ) + resp = self._session.post(url, json=attributes, timeout=10) + resp.raise_for_status() + + # ------------------------------------------------------------------ + # High-level vine snapshot + # ------------------------------------------------------------------ + + # Dashboard-only: 4 devices for farmer view (temp, soil, irrigation) + _DASHBOARD_FETCH_PLAN: Dict[str, List[str]] = { + "Air1": AIR_KEYS, # ambient weather + "Air2": AIR_KEYS, # treatment air + "Soil1": SOIL_KEYS, # treatment soil + "Irrigation1": IRRIGATION_KEYS, + } + + # Light mode: 6 devices (adds crop PAR for chatbot/detailed view) + _LIGHT_FETCH_PLAN: Dict[str, List[str]] = { + "Air1": AIR_KEYS, # ambient + "Air2": AIR_KEYS, # treatment air (one representative) + "Crop1": CROP_KEYS, # reference crop + "Crop3": CROP_KEYS, # treatment crop + "Soil1": SOIL_KEYS, # treatment soil + "Irrigation1": IRRIGATION_KEYS, + } + + _FULL_FETCH_PLAN: Dict[str, List[str]] = { + "Air1": AIR_KEYS, + "Air2": AIR_KEYS, + "Air3": AIR_KEYS, + "Air4": AIR_KEYS, + "Crop1": CROP_KEYS, + "Crop2": CROP_KEYS, + "Crop3": CROP_KEYS, + "Crop4": CROP_KEYS, + "Crop5": CROP_KEYS, + "Crop6": CROP_KEYS, + "Crop7": CROP_KEYS, + "Soil1": SOIL_KEYS, + "Soil2": SOIL_KEYS, + "Soil3": SOIL_KEYS, + "Soil4": SOIL_KEYS, + "Soil5": SOIL_KEYS, + "Soil6": SOIL_KEYS, + "Soil7": SOIL_KEYS, + "Soil9": SOIL_KEYS, + "Irrigation1": IRRIGATION_KEYS, + "Thermocouples1": THERMOCOUPLE_KEYS, + "Thermocouples2": THERMOCOUPLE_KEYS, + } + + def get_vine_snapshot(self, light: bool = False, + mode: Optional[str] = None) -> VineSnapshot: + """ + Fetch latest telemetry from all relevant devices and return an + aggregated VineSnapshot distinguishing treatment vs reference areas. + + Uses a thread pool to parallelise HTTP requests. + Individual device failures are silently skipped (returns None fields). + + Parameters + ---------- + light : bool + If True, fetch only ~6 key devices instead of all 21. + mode : str, optional + "dashboard" = 4 devices only (air + soil + irrigation). + Overrides `light` when set. + """ + if mode == "dashboard": + fetch_plan = self._DASHBOARD_FETCH_PLAN + elif light: + fetch_plan = self._LIGHT_FETCH_PLAN + else: + fetch_plan = self._FULL_FETCH_PLAN + + # Ensure auth token before spawning threads (avoid race on login) + self._ensure_jwt() + + raw_results: Dict[str, Dict[str, Optional[float]]] = {} + newest_ts_overall: Optional[datetime] = None + + with ThreadPoolExecutor(max_workers=8) as pool: + future_map = { + pool.submit(self._fetch_latest, name, keys): name + for name, keys in fetch_plan.items() + } + for future in as_completed(future_map, timeout=25): + name = future_map[future] + try: + values, ts = future.result() + raw_results[name] = values + if ts and (newest_ts_overall is None or ts > newest_ts_overall): + newest_ts_overall = ts + except Exception: + raw_results[name] = {} + + now = datetime.now(tz=timezone.utc) + staleness = ( + (now - newest_ts_overall).total_seconds() / 60 + if newest_ts_overall else float("nan") + ) + + # ---------- Ambient (Air1) ---------- + air1 = raw_results.get("Air1", {}) + + # ---------- Treatment microclimate (Air2/3/4) ---------- + treatment_air = [raw_results.get(d, {}) for d in ("Air2", "Air3", "Air4")] + + # ---------- Treatment crop by position ---------- + position_labels = { + "Crop3": "502-west-bottom", + "Crop5": "502-east-upper", + "Crop6": "502-east-bottom", + "Crop7": "502-west-upper", + } + treatment_crop_devs = { + label: raw_results.get(dev, {}) + for dev, label in position_labels.items() + } + treatment_crop_by_pos: Dict[str, Dict[str, Optional[float]]] = { + label: { + "par": v.get("PAR"), + "leaf_temp": v.get("leafTemperature"), + "ndvi": v.get("NDVI"), + "dli": v.get("DLI"), + } + for label, v in treatment_crop_devs.items() + } + + # ---------- Reference crop by position ---------- + ref_position_labels = { + "Crop1": "503-ref", + "Crop2": "503-control", + "Crop4": "502-control", + } + reference_crop_devs = { + label: raw_results.get(dev, {}) + for dev, label in ref_position_labels.items() + } + reference_crop_by_pos: Dict[str, Dict[str, Optional[float]]] = { + label: { + "par": v.get("PAR"), + "leaf_temp": v.get("leafTemperature"), + "ndvi": v.get("NDVI"), + "dli": v.get("DLI"), + } + for label, v in reference_crop_devs.items() + } + + # ---------- Soil averages ---------- + treatment_soil_devs = [raw_results.get(d, {}) for d in ("Soil1", "Soil3", "Soil5", "Soil6")] + reference_soil_devs = [raw_results.get(d, {}) for d in ("Soil2", "Soil4", "Soil7", "Soil9")] + + def _avg_soil_moisture(devs: List[Dict]) -> Optional[float]: + all_vals = [] + for d in devs: + for k in ("soilMoisture", "soilMoisture2"): + if d.get(k) is not None: + all_vals.append(d[k]) + lo, hi = _BOUNDS["soil_moisture"] + return _bounded_avg(lo, hi, *all_vals) if all_vals else None + + def _avg_soil_temp(devs: List[Dict]) -> Optional[float]: + all_vals = [] + for d in devs: + for k in ("soilTemperature", "soilTemperature2"): + if d.get(k) is not None: + all_vals.append(d[k]) + lo, hi = _BOUNDS["soil_temp"] + return _bounded_avg(lo, hi, *all_vals) if all_vals else None + + # ---------- Panel temps ---------- + tc1 = raw_results.get("Thermocouples1", {}) + tc2 = raw_results.get("Thermocouples2", {}) + + irr = raw_results.get("Irrigation1", {}) + + # ---------- PAR shading ratio (bounded to reject sensor faults) ---------- + t_par = _bounded_avg(*_BOUNDS["par"], *[v.get("PAR") for v in treatment_crop_devs.values()]) + r_par = _bounded_avg(*_BOUNDS["par"], *[v.get("PAR") for v in reference_crop_devs.values()]) + par_ratio: Optional[float] = None + if t_par is not None and r_par is not None and r_par > 0: + par_ratio = t_par / r_par + + snapshot = VineSnapshot( + snapshot_ts=now, + staleness_minutes=staleness, + + # Ambient — apply bounds to catch single-device faults too + ambient_temp_c=_bounded_avg(*_BOUNDS["air_temp"], air1.get("airTemperature")), + ambient_humidity_pct=_bounded_avg(0, 100, air1.get("airHumidity")), + ambient_wind_speed_ms=_bounded_avg(0, 60, air1.get("windSpeed")), + ambient_wind_angle_deg=_bounded_avg(0, 360, air1.get("windAngle")), + ambient_rain_mm=_bounded_avg(0, 500, air1.get("rain")), + + # Treatment climate — bounded to reject sensor faults + treatment_air_temp_c=_bounded_avg(*_BOUNDS["air_temp"], *[d.get("airTemperature") for d in treatment_air]), + treatment_leaf_temp_c=_bounded_avg(*_BOUNDS["leaf_temp"], *[d.get("leafTemperature") for d in treatment_air]), + treatment_vpd_kpa=_bounded_avg(*_BOUNDS["vpd"], *[d.get("VPD") for d in treatment_air]), + treatment_co2_ppm=_bounded_avg(*_BOUNDS["co2"], *[d.get("CO2") for d in treatment_air]), + treatment_par_umol=_bounded_avg(*_BOUNDS["par"], *[d.get("PAR") for d in treatment_air]), + treatment_dli_mol_m2=_bounded_avg(*_BOUNDS["dli"], *[d.get("DLI") for d in treatment_air]), + treatment_ndvi=_bounded_avg(*_BOUNDS["ndvi"], *[d.get("NDVI") for d in treatment_air]), + treatment_pri=_bounded_avg(*_BOUNDS["pri"], *[d.get("PRI") for d in treatment_air]), + treatment_air_leaf_delta_t=_bounded_avg(-20, 20, *[d.get("airLeafDeltaT") for d in treatment_air]), + + # Treatment crop + treatment_crop_par_umol=t_par, + treatment_crop_leaf_temp_c=_bounded_avg( + *_BOUNDS["leaf_temp"], *[v.get("leafTemperature") for v in treatment_crop_devs.values()] + ), + treatment_crop_ndvi=_bounded_avg( + *_BOUNDS["ndvi"], *[v.get("NDVI") for v in treatment_crop_devs.values()] + ), + treatment_crop_dli_mol_m2=_bounded_avg( + *_BOUNDS["dli"], *[v.get("DLI") for v in treatment_crop_devs.values()] + ), + treatment_crop_par_avg1h=_bounded_avg( + *_BOUNDS["par"], *[v.get("PARAvg1H") for v in treatment_crop_devs.values()] + ), + treatment_crop_by_position=treatment_crop_by_pos, + + # Reference crop + reference_crop_par_umol=r_par, + reference_crop_leaf_temp_c=_bounded_avg( + *_BOUNDS["leaf_temp"], *[v.get("leafTemperature") for v in reference_crop_devs.values()] + ), + reference_crop_ndvi=_bounded_avg( + *_BOUNDS["ndvi"], *[v.get("NDVI") for v in reference_crop_devs.values()] + ), + reference_crop_dli_mol_m2=_bounded_avg( + *_BOUNDS["dli"], *[v.get("DLI") for v in reference_crop_devs.values()] + ), + reference_crop_by_position=reference_crop_by_pos, + + par_shading_ratio=par_ratio, + + # Treatment soil + treatment_soil_moisture_pct=_avg_soil_moisture(treatment_soil_devs), + treatment_soil_temp_c=_avg_soil_temp(treatment_soil_devs), + treatment_soil_ec_ds_m=_safe_avg(*[d.get("soilBulkEC") for d in treatment_soil_devs]), + treatment_soil_ph=_safe_avg(*[d.get("soilpH") for d in treatment_soil_devs]), + + # Reference soil + reference_soil_moisture_pct=_avg_soil_moisture(reference_soil_devs), + reference_soil_temp_c=_avg_soil_temp(reference_soil_devs), + + # Irrigation + irrigation_last_volume_l=irr.get("irrigationCycleVolume") or irr.get("irrigationVolume"), + irrigation_last_minutes=irr.get("irrigationCycleMinutes") or irr.get("irrigationMinutes"), + irrigation_ec=irr.get("irrigationEC"), + irrigation_ph=irr.get("irrigationPH"), + water_temp_c=irr.get("waterTemperature"), + + # Panel temps + treatment_panel_temp_c=_bounded_avg( + *_BOUNDS["panel_temp"], *[tc1.get(k) for k in THERMOCOUPLE_KEYS] + ), + reference_panel_temp_c=_bounded_avg( + *_BOUNDS["panel_temp"], *[tc2.get(k) for k in THERMOCOUPLE_KEYS] + ), + ) + return snapshot + + +# --------------------------------------------------------------------------- +# Helpers (module-level so threads can share without self) +# --------------------------------------------------------------------------- + +def _safe_float(val: Any) -> Optional[float]: + """Convert a TB telemetry value string/number to float, or None on failure.""" + if val is None: + return None + try: + f = float(val) + return None if math.isnan(f) or math.isinf(f) else f + except (TypeError, ValueError): + return None + + +def _safe_avg(*vals: Any) -> Optional[float]: + """Return the mean of non-None, finite values, or None if none available.""" + valid = [v for v in vals if v is not None and isinstance(v, (int, float)) + and not math.isnan(v) and not math.isinf(v)] + return sum(valid) / len(valid) if valid else None + + +def _bounded_avg(lo: float, hi: float, *vals: Any) -> Optional[float]: + """Return the mean of values within [lo, hi], rejecting sensor faults outside that range.""" + valid = [v for v in vals if v is not None and isinstance(v, (int, float)) + and not math.isnan(v) and not math.isinf(v) and lo <= v <= hi] + return sum(valid) / len(valid) if valid else None + + +# Physical plausibility bounds for Negev site +_BOUNDS = { + "air_temp": (-5.0, 55.0), # °C — extreme Negev range + "leaf_temp": (-5.0, 60.0), # °C — leaves can exceed air under direct sun + "soil_temp": (-2.0, 45.0), # °C — soil in Negev + "soil_moisture": (0.0, 100.0), # % + "par": (0.0, 3000.0), # µmol m⁻² s⁻¹ + "vpd": (0.0, 10.0), # kPa + "co2": (300.0, 2000.0), # ppm + "ndvi": (-1.0, 1.0), + "pri": (-1.0, 1.0), + "dli": (0.0, 80.0), # mol m⁻² day⁻¹ + "panel_temp": (-10.0, 100.0), # °C — panel surface +} + + +# --------------------------------------------------------------------------- +# CLI smoke test +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + client = ThingsBoardClient() + print("Fetching vine snapshot from ThingsBoard...") + try: + snap = client.get_vine_snapshot() + print(snap.to_advisor_text()) + print(f"\nSnapshot age: {snap.staleness_minutes:.1f} min") + except Exception as exc: + print(f"Error: {exc}") + print("Make sure THINGSBOARD_USERNAME/PASSWORD or THINGSBOARD_TOKEN are set in your .env") diff --git a/src/data_providers.py b/src/data_providers.py new file mode 100644 index 0000000000000000000000000000000000000000..4740f73edcc91e49e5766d0e61376e279a62785c --- /dev/null +++ b/src/data_providers.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.data.data_providers.""" +from src.data.data_providers import * # noqa: F401, F403 diff --git a/src/data_schema.py b/src/data_schema.py new file mode 100644 index 0000000000000000000000000000000000000000..7958a4ffa2f199e6f5ce72e618808b9079ac7145 --- /dev/null +++ b/src/data_schema.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.data.data_schema.""" +from src.data.data_schema import * # noqa: F401, F403 diff --git a/src/day_ahead_advisor.py b/src/day_ahead_advisor.py new file mode 100644 index 0000000000000000000000000000000000000000..a5248cd2b412f4b5913e8575ca6583238cff8fc2 --- /dev/null +++ b/src/day_ahead_advisor.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.advisor.day_ahead_advisor.""" +from src.advisor.day_ahead_advisor import * # noqa: F401, F403 diff --git a/src/day_ahead_planner.py b/src/day_ahead_planner.py new file mode 100644 index 0000000000000000000000000000000000000000..4c41ff5d8dd33960d688fcec7a935edf07207563 --- /dev/null +++ b/src/day_ahead_planner.py @@ -0,0 +1,580 @@ +""" +DayAheadPlanner: dynamic-programming trajectory optimizer for agrivoltaic control. + +Given a day-ahead weather forecast (temperature, GHI) and the current energy +budget, finds the optimal tilt-offset trajectory for the next day that +maximises a combined utility of crop protection and energy generation. + +Algorithm +--------- +For each 15-min slot t from sunrise to sunset: + 1. Predict vine state: Tleaf ≈ Tair (proxy), GHI from forecast, CWSI from + temperature heuristic, shading_helps from FvCB Rubisco transition. + 2. Run InterventionGate — if blocked, slot must stay at θ_astro (offset=0). + 3. For each candidate offset θ ∈ CANDIDATE_OFFSETS: + U_t(θ) = Price_energy · E_t(θ) + Price_crop · A_t(θ) − MovementCost(θ, θ_{t-1}) + where E_t is energy generated and A_t is agronomic value (weighted by + phenological stage and zone). + 4. DP recurrence: V_t(θ) = U_t(θ) + max_{θ'} V_{t-1}(θ') + with cumulative energy sacrifice ≤ daily budget constraint. + +The result is a DayAheadPlan: a list of SlotPlan objects, one per 15-min slot, +each containing the chosen offset, expected energy cost, and explainability tags. + +References +---------- +- config/settings.py §Day-Ahead DP Planner +- context/2_plan.md §3.3 +""" + +from __future__ import annotations + +import math +from dataclasses import dataclass, field +from datetime import date, datetime, timedelta +from typing import List, Optional + +import numpy as np +import pandas as pd + +from config.settings import ( + CANDIDATE_OFFSETS, + DP_BASE_CROP_VALUE, + DP_FLAT_ENERGY_PRICE_ILS_KWH, + DP_MOVEMENT_COST, + DP_SLOT_DURATION_MIN, + NO_SHADE_BEFORE_HOUR, + SEMILLON_TRANSITION_TEMP_C, + SHADE_ELIGIBLE_CWSI_ABOVE, + SHADE_ELIGIBLE_GHI_ABOVE, + SHADE_ELIGIBLE_TLEAF_ABOVE, + STAGE_CROP_MULTIPLIER, + ZONE_CROP_WEIGHTS, +) + + +# --------------------------------------------------------------------------- +# Data containers +# --------------------------------------------------------------------------- + +@dataclass +class SlotPlan: + """Planned tilt offset for a single 15-min slot.""" + + time: str # "HH:MM" UTC + offset_deg: float # degrees off astronomical tracking (0 = full tracking) + energy_cost_kwh: float # estimated energy sacrifice (kWh) + gate_passed: bool # whether InterventionGate allowed intervention + tags: List[str] = field(default_factory=list) # explainability tags + + +@dataclass +class DayAheadPlan: + """Complete day-ahead tilt trajectory plan.""" + + target_date: str # ISO date string + slots: List[SlotPlan] # one per daylight 15-min slot + total_energy_cost_kwh: float # sum of all slot costs + daily_budget_kwh: float # available daily budget + budget_utilisation_pct: float # total_cost / budget × 100 + stage_id: str # phenological stage used + n_intervention_slots: int # slots where offset > 0 + + def to_dict(self) -> dict: + return { + "target_date": self.target_date, + "stage_id": self.stage_id, + "daily_budget_kwh": round(self.daily_budget_kwh, 4), + "total_energy_cost_kwh": round(self.total_energy_cost_kwh, 4), + "budget_utilisation_pct": round(self.budget_utilisation_pct, 1), + "n_intervention_slots": self.n_intervention_slots, + "slots": [ + { + "time": s.time, + "offset_deg": s.offset_deg, + "energy_cost_kwh": round(s.energy_cost_kwh, 6), + "gate_passed": s.gate_passed, + "tags": s.tags, + } + for s in self.slots + ], + } + + +# --------------------------------------------------------------------------- +# DayAheadPlanner +# --------------------------------------------------------------------------- + +class DayAheadPlanner: + """DP-based day-ahead trajectory optimizer. + + Parameters + ---------- + shadow_model : object, optional + ShadowModel instance for solar position and tracker geometry. + baseline_predictor : BaselinePredictor, optional + Hybrid FvCB+ML predictor for per-slot photosynthesis baseline. + If provided, ``plan_day()`` uses predicted A for crop value instead + of the temperature-only heuristic. + energy_price : float + Energy price (ILS/kWh) for the utility function. + crop_value : float + Base crop value (ILS per µmol CO₂ m⁻² s⁻¹ per slot). + movement_cost : float + Penalty per degree of tilt change between consecutive slots (ILS-equivalent). + """ + + def __init__( + self, + shadow_model=None, + baseline_predictor=None, + energy_price: float = DP_FLAT_ENERGY_PRICE_ILS_KWH, + crop_value: float = DP_BASE_CROP_VALUE, + movement_cost: float = DP_MOVEMENT_COST, + ): + self._shadow_model = shadow_model + self._baseline_predictor = baseline_predictor + self.energy_price = energy_price + self.crop_value = crop_value + self.movement_cost = movement_cost + + @property + def shadow_model(self): + if self._shadow_model is None: + from src.shading.solar_geometry import ShadowModel + self._shadow_model = ShadowModel() + return self._shadow_model + + # ------------------------------------------------------------------ + # Main entry point + # ------------------------------------------------------------------ + + def plan_day( + self, + target_date: date, + forecast_temps: List[float], + forecast_ghi: List[float], + daily_budget_kwh: float, + stage_id: Optional[str] = None, + ) -> DayAheadPlan: + """Generate an optimal tilt trajectory for the given day. + + Parameters + ---------- + target_date : date + The day to plan for. + forecast_temps : list of float + Forecast air temperature (°C) for each 15-min slot (96 values). + Only daylight slots are used; nighttime values are ignored. + forecast_ghi : list of float + Forecast GHI (W/m²) for each 15-min slot (96 values). + daily_budget_kwh : float + Available energy sacrifice budget for the day (kWh). + stage_id : str, optional + Phenological stage identifier. If None, estimated from date. + + Returns + ------- + DayAheadPlan + """ + if stage_id is None: + from src.models.phenology import estimate_stage_for_date + stage_id = estimate_stage_for_date(target_date).id + + # Crop value multiplier for this phenological stage + crop_multiplier = self._get_crop_multiplier(stage_id) + + # Compute baseline A predictions if predictor is available + baseline_a: Optional[List[float]] = None + if self._baseline_predictor is not None: + try: + baseline_a = self._baseline_predictor.predict_day( + forecast_temps, forecast_ghi, + ) + except Exception as exc: + import logging + logging.getLogger(__name__).warning( + "Baseline predictor failed, using temperature heuristic: %s", exc, + ) + + # Build slot timeline (sunrise to sunset only) + slots_info = self._build_slot_info( + target_date, forecast_temps, forecast_ghi, crop_multiplier, + baseline_a=baseline_a, + ) + + if not slots_info: + return DayAheadPlan( + target_date=str(target_date), + slots=[], + total_energy_cost_kwh=0.0, + daily_budget_kwh=daily_budget_kwh, + budget_utilisation_pct=0.0, + stage_id=stage_id, + n_intervention_slots=0, + ) + + # Run DP optimization + offsets = [0] + [o for o in CANDIDATE_OFFSETS if o > 0] + planned_slots = self._dp_optimize( + slots_info, offsets, daily_budget_kwh, + ) + + total_cost = sum(s.energy_cost_kwh for s in planned_slots) + n_interventions = sum(1 for s in planned_slots if s.offset_deg > 0) + utilisation = (total_cost / daily_budget_kwh * 100) if daily_budget_kwh > 0 else 0.0 + + return DayAheadPlan( + target_date=str(target_date), + slots=planned_slots, + total_energy_cost_kwh=total_cost, + daily_budget_kwh=daily_budget_kwh, + budget_utilisation_pct=utilisation, + stage_id=stage_id, + n_intervention_slots=n_interventions, + ) + + # ------------------------------------------------------------------ + # Slot info builder + # ------------------------------------------------------------------ + + def _build_slot_info( + self, + target_date: date, + forecast_temps: List[float], + forecast_ghi: List[float], + crop_multiplier: float, + baseline_a: Optional[List[float]] = None, + ) -> List[dict]: + """Build per-slot metadata for daylight hours. + + Returns list of dicts with keys: time_str, hour, temp_c, ghi, + solar_elevation, solar_azimuth, astro_tilt, gate_passed, + gate_reason, energy_per_slot_kwh, crop_value_weight. + """ + day_start = pd.Timestamp(target_date, tz="UTC") + times = pd.date_range(day_start, periods=96, freq="15min") + + # Solar positions for the whole day + solar_pos = self.shadow_model.get_solar_position(times) + + slots = [] + for i, ts in enumerate(times): + hour = ts.hour + ts.minute / 60.0 + elev = float(solar_pos.iloc[i]["solar_elevation"]) + + # Skip nighttime + if elev <= 2: + continue + + temp_c = forecast_temps[i] if i < len(forecast_temps) else 25.0 + ghi = forecast_ghi[i] if i < len(forecast_ghi) else 0.0 + + # Skip slots with no meaningful irradiance + if ghi < 50: + continue + + azim = float(solar_pos.iloc[i]["solar_azimuth"]) + tracker = self.shadow_model.compute_tracker_tilt(azim, elev) + astro_tilt = float(tracker["tracker_theta"]) + + # Gate check (simplified — uses forecast data as proxy) + gate_passed, gate_reason = self._check_gate( + temp_c, ghi, hour, + ) + + # Energy at astronomical tracking (kWh per kWp for this slot) + aoi = float(tracker["aoi"]) + energy_astro = max(0.0, math.cos(math.radians(aoi))) * 0.25 + + slot_dict = { + "time_str": ts.strftime("%H:%M"), + "hour": hour, + "temp_c": temp_c, + "ghi": ghi, + "solar_elevation": elev, + "solar_azimuth": azim, + "astro_tilt": astro_tilt, + "gate_passed": gate_passed, + "gate_reason": gate_reason, + "energy_astro_kwh": energy_astro, + "crop_multiplier": crop_multiplier, + } + # Attach baseline A if available (from BaselinePredictor) + if baseline_a is not None and i < len(baseline_a): + slot_dict["baseline_a"] = baseline_a[i] + slots.append(slot_dict) + + return slots + + def _check_gate( + self, + temp_c: float, + ghi: float, + hour: float, + ) -> tuple[bool, str]: + """Simplified gate check using forecast data. + + Uses the same thresholds as InterventionGate but without sensor data. + CWSI is estimated from temperature (proxy). + """ + # No shade before configured hour + if hour < NO_SHADE_BEFORE_HOUR: + return False, f"before_{NO_SHADE_BEFORE_HOUR}:00" + + # Temperature below Rubisco transition + if temp_c < SHADE_ELIGIBLE_TLEAF_ABOVE: + return False, f"temp_{temp_c:.0f}C_below_threshold" + + # GHI below meaningful radiation + if ghi < SHADE_ELIGIBLE_GHI_ABOVE: + return False, f"ghi_{ghi:.0f}_below_threshold" + + # CWSI proxy from temperature (simplified: T>35 → stressed) + cwsi_proxy = max(0.0, min(1.0, (temp_c - 30.0) / 10.0)) + if cwsi_proxy < SHADE_ELIGIBLE_CWSI_ABOVE: + return False, f"cwsi_proxy_{cwsi_proxy:.2f}_below_threshold" + + # FvCB shading_helps: above transition temp + high GHI = Rubisco-limited + shading_helps = temp_c >= SEMILLON_TRANSITION_TEMP_C and ghi >= 400 + if not shading_helps: + return False, "fvcb_shading_not_helpful" + + return True, "gate_passed" + + # ------------------------------------------------------------------ + # DP optimizer + # ------------------------------------------------------------------ + + def _dp_optimize( + self, + slots_info: List[dict], + offsets: List[float], + daily_budget_kwh: float, + ) -> List[SlotPlan]: + """Dynamic programming over slots × offsets with budget constraint. + + State: (slot_index, offset_index) + Constraint: cumulative energy cost ≤ daily_budget_kwh + Objective: maximise total utility (energy revenue + crop protection − movement cost) + """ + n_slots = len(slots_info) + n_offsets = len(offsets) + + # Discretise budget into steps for tractable DP + budget_steps = 100 + budget_per_step = daily_budget_kwh / budget_steps if daily_budget_kwh > 0 else 0.001 + + # DP table: V[t][o][b] = best utility from slot t onwards + # with offset o at slot t and b budget steps remaining + # Use forward pass to fill, then backtrack. + INF = float("-inf") + + # Pre-compute per-slot utilities for each offset + slot_utilities = [] # [slot][offset] → (utility, energy_cost) + for si in slots_info: + utils_for_slot = [] + for offset in offsets: + u, cost = self._slot_utility(si, offset) + utils_for_slot.append((u, cost)) + slot_utilities.append(utils_for_slot) + + # Forward DP + # V[t][o][b] = max total utility achievable from slots 0..t + # ending at offset o with b budget steps consumed + V = np.full((n_slots, n_offsets, budget_steps + 1), INF) + choice = np.full((n_slots, n_offsets, budget_steps + 1), -1, dtype=int) + + # Initialize slot 0 + for oi, offset in enumerate(offsets): + if not slots_info[0]["gate_passed"] and offset > 0: + continue # gate blocked + u, cost = slot_utilities[0][oi] + b_used = int(math.ceil(cost / budget_per_step)) if cost > 0 else 0 + if b_used <= budget_steps: + V[0, oi, b_used] = u + + # Fill forward + for t in range(1, n_slots): + gate_passed = slots_info[t]["gate_passed"] + for oi, offset in enumerate(offsets): + if not gate_passed and offset > 0: + continue # gate blocked — only offset=0 allowed + + u_t, cost_t = slot_utilities[t][oi] + b_cost = int(math.ceil(cost_t / budget_per_step)) if cost_t > 0 else 0 + + for prev_oi, prev_offset in enumerate(offsets): + # Movement cost between consecutive offsets + move_penalty = self.movement_cost * abs(offset - prev_offset) + + for b_prev in range(budget_steps + 1): + if V[t - 1, prev_oi, b_prev] == INF: + continue + b_total = b_prev + b_cost + if b_total > budget_steps: + continue # budget exceeded + + val = V[t - 1, prev_oi, b_prev] + u_t - move_penalty + if val > V[t, oi, b_total]: + V[t, oi, b_total] = val + choice[t, oi, b_total] = prev_oi + + # Backtrack: find best final state + best_val = INF + best_oi = 0 + best_b = 0 + for oi in range(n_offsets): + for b in range(budget_steps + 1): + if V[n_slots - 1, oi, b] > best_val: + best_val = V[n_slots - 1, oi, b] + best_oi = oi + best_b = b + + # Trace back the path + path = [0] * n_slots + path[n_slots - 1] = best_oi + current_b = best_b + for t in range(n_slots - 1, 0, -1): + prev_oi = choice[t, path[t], current_b] + if prev_oi < 0: + prev_oi = 0 # fallback to astronomical + # Recover budget used at slot t + _, cost_t = slot_utilities[t][path[t]] + b_cost = int(math.ceil(cost_t / budget_per_step)) if cost_t > 0 else 0 + current_b = max(0, current_b - b_cost) + path[t - 1] = prev_oi + + # Build SlotPlan list + planned: List[SlotPlan] = [] + for t, si in enumerate(slots_info): + oi = path[t] + offset = offsets[oi] + _, cost = slot_utilities[t][oi] + + tags = [] + if not si["gate_passed"]: + tags.append(f"gate_blocked:{si['gate_reason']}") + elif offset > 0: + tags.append(f"intervention:{offset}deg") + else: + tags.append("full_tracking") + + planned.append(SlotPlan( + time=si["time_str"], + offset_deg=offset, + energy_cost_kwh=round(cost, 6), + gate_passed=si["gate_passed"], + tags=tags, + )) + + return planned + + def _slot_utility(self, si: dict, offset_deg: float) -> tuple[float, float]: + """Compute utility and energy cost for a slot at a given offset. + + Utility = energy_revenue + crop_protection_value + Energy cost = energy_astro − energy_at_offset (kWh) + + Returns (utility, energy_cost_kwh). + """ + energy_astro = si["energy_astro_kwh"] + + # Energy at offset: cos(AOI + offset) approximation + sacrifice_frac = 1.0 - math.cos(math.radians(offset_deg)) + energy_at_offset = energy_astro * (1.0 - sacrifice_frac) + energy_cost = energy_astro - energy_at_offset # kWh sacrificed + + # Energy revenue (ILS) + energy_revenue = energy_at_offset * self.energy_price + + # Crop protection value: non-zero only when gate passes and offset > 0 + crop_value = 0.0 + if si["gate_passed"] and offset_deg > 0: + # Higher offset → more shade → more crop protection (diminishing returns) + shade_benefit = math.sqrt(offset_deg / 20.0) # diminishing returns + + if "baseline_a" in si and si["baseline_a"] > 0: + # Use actual photosynthesis prediction for stress severity. + # Higher A under full sun means more to protect; the benefit of + # shading scales with how much photosynthesis is at risk. + baseline_a = si["baseline_a"] + # Normalize: A ~ 10-20 µmol typical → severity 1.0-2.0 + stress_severity = baseline_a / 10.0 + else: + # Fallback: temperature heuristic + stress_severity = max(0.0, si["temp_c"] - SEMILLON_TRANSITION_TEMP_C) / 10.0 + + crop_value = ( + self.crop_value + * si["crop_multiplier"] + * stress_severity + * shade_benefit + ) + + utility = energy_revenue + crop_value + return utility, energy_cost + + # ------------------------------------------------------------------ + # Helpers + # ------------------------------------------------------------------ + + @staticmethod + def _get_crop_multiplier(stage_id: str) -> float: + """Map phenological stage ID to crop value multiplier.""" + # Map stage IDs to the STAGE_CROP_MULTIPLIER keys + stage_map = { + "budburst_vegetative": "pre_flowering", + "flowering_fruit_set": "fruit_set", + "berry_growth": "fruit_set", + "veraison_ripening": "veraison", + "post_harvest_reserves": "post_harvest", + "winter_dormancy": "post_harvest", + } + mapped = stage_map.get(stage_id, "fruit_set") + return STAGE_CROP_MULTIPLIER.get(mapped, 1.0) + + +# --------------------------------------------------------------------------- +# CLI smoke test +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + from src.shading.solar_geometry import ShadowModel + + shadow = ShadowModel() + planner = DayAheadPlanner(shadow_model=shadow) + + # Simulate a hot July day in Sde Boker + test_date = date(2025, 7, 15) + + # Generate synthetic forecast: sinusoidal temperature peaking at 38°C at 14:00 UTC + temps = [] + ghis = [] + for slot in range(96): + hour = slot * 0.25 + # Temperature: 25°C at night, peaks at 38°C around 11:00 UTC (14:00 local) + t = 25.0 + 13.0 * max(0, math.sin(math.pi * (hour - 5) / 14)) if 5 <= hour <= 19 else 25.0 + temps.append(t) + # GHI: 0 at night, peaks at 950 W/m² at solar noon (~9:40 UTC) + g = max(0, 950 * math.sin(math.pi * (hour - 4) / 12)) if 4 <= hour <= 16 else 0.0 + ghis.append(g) + + plan = planner.plan_day( + target_date=test_date, + forecast_temps=temps, + forecast_ghi=ghis, + daily_budget_kwh=2.0, # typical daily budget from EnergyBudgetPlanner + ) + + print(f"Day-Ahead Plan for {plan.target_date}") + print(f" Stage: {plan.stage_id}") + print(f" Budget: {plan.daily_budget_kwh:.2f} kWh") + print(f" Total cost: {plan.total_energy_cost_kwh:.4f} kWh ({plan.budget_utilisation_pct:.1f}%)") + print(f" Intervention slots: {plan.n_intervention_slots}/{len(plan.slots)}") + print() + print(f" {'Time':>5} {'Offset':>7} {'Cost':>10} {'Gate':>6} Tags") + print(f" {'-' * 60}") + for s in plan.slots: + status = "PASS" if s.gate_passed else "BLOCK" + print(f" {s.time:>5} {s.offset_deg:>5.0f}° {s.energy_cost_kwh:>10.6f} {status:>6} {', '.join(s.tags)}") diff --git a/src/energy_budget.py b/src/energy_budget.py new file mode 100644 index 0000000000000000000000000000000000000000..6b9902e57282a02cbc6861616e57b6b0f42ae9a7 --- /dev/null +++ b/src/energy_budget.py @@ -0,0 +1,309 @@ +""" +EnergyBudgetPlanner: hierarchical energy sacrifice budget for agrivoltaic control. + +Budget hierarchy: + Annual → Monthly → Weekly → Daily → 15-min Slot + +The system defaults to full astronomical tracking (max energy). Shading +interventions draw from a tight budget (default 5% of annual generation). +Budget is pre-allocated down the hierarchy so that hot days/hours get more, +and the system never overspends. + +References: + - config/settings.py for all thresholds and weights + - context/2_plan.md §3.1 for design rationale +""" + +from __future__ import annotations + +from datetime import date, timedelta +from typing import Optional + +import numpy as np +import pandas as pd + +from config.settings import ( + ANNUAL_RESERVE_PCT, + DAILY_MARGIN_PCT, + MAX_ENERGY_REDUCTION_PCT, + MONTHLY_BUDGET_WEIGHTS, + NO_SHADE_BEFORE_HOUR, + WEEKLY_RESERVE_PCT, +) + + +class EnergyBudgetPlanner: + """Hierarchical energy sacrifice budget for agrivoltaic shading control. + + Parameters + ---------- + max_energy_reduction_pct : float + Maximum fraction of annual PV generation the vines can "spend" on + shading (default from config: 5%). + shadow_model : object, optional + ShadowModel instance used to estimate slot-level energy potential. + If None, annual plan uses a simplified analytical estimate. + """ + + def __init__( + self, + max_energy_reduction_pct: float = MAX_ENERGY_REDUCTION_PCT, + shadow_model=None, + ): + self.max_pct = max_energy_reduction_pct + self.shadow = shadow_model + + # ------------------------------------------------------------------ + # Annual plan + # ------------------------------------------------------------------ + + def compute_annual_plan(self, year: int) -> dict: + """Compute seasonal energy potential and allocate monthly budgets. + + Iterates every 15-min slot from May 1 to Sep 30, computing energy + under astronomical tracking. Then distributes the sacrifice budget + across months using MONTHLY_BUDGET_WEIGHTS. + + Returns dict with: + year, total_potential_kWh, total_budget_kWh, annual_reserve_kWh, + monthly_budgets (dict[int, float]), budget_spent_kWh + """ + season_start = pd.Timestamp(f"{year}-05-01", tz="UTC") + season_end = pd.Timestamp(f"{year}-09-30 23:45", tz="UTC") + times = pd.date_range(season_start, season_end, freq="15min") + + if self.shadow is not None: + energy_per_slot = self._energy_from_shadow_model(times) + else: + energy_per_slot = self._energy_analytical(times) + + total_potential = float(np.sum(energy_per_slot)) + total_budget = total_potential * self.max_pct / 100.0 + annual_reserve = total_budget * ANNUAL_RESERVE_PCT / 100.0 + distributable = total_budget - annual_reserve + + monthly_budgets = { + month: distributable * weight + for month, weight in MONTHLY_BUDGET_WEIGHTS.items() + } + + return { + "year": year, + "total_potential_kWh": round(total_potential, 2), + "total_budget_kWh": round(total_budget, 2), + "annual_reserve_kWh": round(annual_reserve, 2), + "monthly_budgets": {m: round(v, 4) for m, v in monthly_budgets.items()}, + "budget_spent_kWh": 0.0, + } + + def _energy_from_shadow_model(self, times: pd.DatetimeIndex) -> np.ndarray: + """Estimate per-slot energy using the ShadowModel's solar position.""" + solar_pos = self.shadow.get_solar_position(times) + energy = [] + for _, sp in solar_pos.iterrows(): + if sp["solar_elevation"] <= 0: + energy.append(0.0) + continue + tracker = self.shadow.compute_tracker_tilt( + sp["solar_azimuth"], sp["solar_elevation"] + ) + # cos(AOI) × 0.25h slot duration → kWh per kWp + e = max(0.0, np.cos(np.radians(tracker["aoi"]))) * 0.25 + energy.append(e) + return np.array(energy) + + @staticmethod + def _energy_analytical(times: pd.DatetimeIndex) -> np.ndarray: + """Simplified analytical estimate when no ShadowModel is available. + + Vectorized: computes all ~15k slots in one numpy pass. + Uses a sinusoidal day profile peaking at solar noon. Good enough + for budget planning; not used for real-time control. + """ + from config.settings import SITE_LATITUDE + + hour_utc = times.hour + times.minute / 60.0 + solar_noon_utc = 12.0 - 34.8 / 15.0 # ≈ 9.68 UTC + hour_angle = (hour_utc - solar_noon_utc) * 15.0 # degrees + + lat_rad = np.radians(SITE_LATITUDE) + doy = times.dayofyear + decl_rad = np.radians(23.45 * np.sin(np.radians(360.0 / 365.0 * (doy - 81)))) + ha_rad = np.radians(hour_angle) + + sin_elev = ( + np.sin(lat_rad) * np.sin(decl_rad) + + np.cos(lat_rad) * np.cos(decl_rad) * np.cos(ha_rad) + ) + # Astronomical tracking → AOI ≈ 0 → cos(AOI) ≈ 1 + # Scale by clearness (~0.75 for Sde Boker) and slot duration (0.25h) + return np.where(sin_elev > 0, sin_elev * 0.75 * 0.25, 0.0) + + # ------------------------------------------------------------------ + # Weekly plan + # ------------------------------------------------------------------ + + def compute_weekly_plan( + self, + week_start: pd.Timestamp | date, + monthly_remaining: float, + forecast_tmax: Optional[list[float]] = None, + rollover: float = 0.0, + ) -> dict: + """Distribute weekly budget to days, weighted by (Tmax - 30)². + + Days with forecast Tmax < 30°C get zero allocation (no stress + expected). Hot days get quadratically more budget. + + Parameters + ---------- + week_start : date-like + First day of the week. + monthly_remaining : float + Remaining monthly budget (kWh). + forecast_tmax : list of 7 floats, optional + Forecast daily maximum temperature for each day of the week. + If None, budget is split evenly. + rollover : float + Unspent budget rolled over from the previous week. + + Returns dict with: + weekly_total_kWh, weekly_reserve_kWh, daily_budgets_kWh (list[7]) + """ + if not isinstance(week_start, pd.Timestamp): + week_start = pd.Timestamp(week_start) + + month = week_start.month + # Estimate weeks remaining in the month + if month == 12: + month_end = pd.Timestamp(f"{week_start.year}-12-31") + elif month == 9: + month_end = pd.Timestamp(f"{week_start.year}-09-30") + else: + month_end = pd.Timestamp( + f"{week_start.year}-{month + 1:02d}-01" + ) - timedelta(days=1) + days_left = max(1, (month_end - week_start).days) + weeks_left = max(1, days_left // 7) + + weekly_raw = monthly_remaining / weeks_left + rollover + weekly_reserve = weekly_raw * WEEKLY_RESERVE_PCT / 100.0 + distributable = weekly_raw - weekly_reserve + + if forecast_tmax is not None and len(forecast_tmax) == 7: + weights = [max(0.0, t - 30.0) ** 2 for t in forecast_tmax] + total_w = sum(weights) + if total_w > 0: + daily = [distributable * w / total_w for w in weights] + else: + daily = [0.0] * 7 # all days < 30°C → no budget needed + else: + daily = [distributable / 7.0] * 7 + + return { + "weekly_total_kWh": round(weekly_raw, 4), + "weekly_reserve_kWh": round(weekly_reserve, 4), + "daily_budgets_kWh": [round(d, 4) for d in daily], + } + + # ------------------------------------------------------------------ + # Daily plan + # ------------------------------------------------------------------ + + def compute_daily_plan( + self, + day: date | pd.Timestamp, + daily_budget: float, + rollover: float = 0.0, + ) -> dict: + """Distribute daily budget to 15-min slots. + + Zero before NO_SHADE_BEFORE_HOUR (10:00). Peak allocation at + 11:00–14:00 (60% of planned budget). + + Returns dict with: + date, daily_total_kWh, daily_margin_kWh, daily_margin_remaining_kWh, + slot_budgets (dict[str, float]), cumulative_spent + """ + daily_raw = daily_budget + rollover + daily_margin = daily_raw * DAILY_MARGIN_PCT / 100.0 + planned = daily_raw - daily_margin + + # Time blocks with their share of the planned budget. + # The non-zero weights must sum to 1.0. + transition_end = max(NO_SHADE_BEFORE_HOUR + 1, 11) + blocks = [ + ((5, NO_SHADE_BEFORE_HOUR), 0.00), # morning — no shade + ((NO_SHADE_BEFORE_HOUR, transition_end), 0.05), # transition + ((transition_end, 14), 0.60), # peak stress window + ((14, 16), 0.30), # sustained heat + ((16, 20), 0.05), # rare late stress + ] + + slot_budgets: dict[str, float] = {} + for (h_start, h_end), weight in blocks: + block_budget = planned * weight + n_slots = (h_end - h_start) * 4 # 4 slots per hour + per_slot = block_budget / n_slots if n_slots > 0 else 0.0 + for h in range(h_start, h_end): + for m in (0, 15, 30, 45): + slot_budgets[f"{h:02d}:{m:02d}"] = round(per_slot, 6) + + return { + "date": str(day), + "daily_total_kWh": round(daily_raw, 4), + "daily_margin_kWh": round(daily_margin, 4), + "daily_margin_remaining_kWh": round(daily_margin, 4), + "slot_budgets": slot_budgets, + "cumulative_spent": 0.0, + } + + # ------------------------------------------------------------------ + # Slot-level execution helpers + # ------------------------------------------------------------------ + + def spend_slot(self, daily_plan: dict, slot_key: str, amount: float) -> float: + """Deduct energy from a slot's budget. Returns amount actually spent. + + If the slot budget is insufficient, draws from the daily margin. + """ + available = daily_plan["slot_budgets"].get(slot_key, 0.0) + if amount <= available: + daily_plan["slot_budgets"][slot_key] -= amount + daily_plan["cumulative_spent"] += amount + return amount + + # Slot budget exhausted — try daily margin + shortfall = amount - available + margin = daily_plan["daily_margin_remaining_kWh"] + from_margin = min(shortfall, margin) + total_spent = available + from_margin + + daily_plan["slot_budgets"][slot_key] = 0.0 + daily_plan["daily_margin_remaining_kWh"] -= from_margin + daily_plan["cumulative_spent"] += total_spent + return round(total_spent, 6) + + def emergency_draw(self, annual_plan: dict, amount: float) -> float: + """Draw from annual reserve for extreme heat events. + + Returns the amount actually drawn (may be less than requested if + the reserve is depleted). + """ + available = annual_plan["annual_reserve_kWh"] + drawn = min(amount, available) + annual_plan["annual_reserve_kWh"] = round(available - drawn, 4) + annual_plan["budget_spent_kWh"] = round( + annual_plan["budget_spent_kWh"] + drawn, 4 + ) + return round(drawn, 4) + + # ------------------------------------------------------------------ + # Rollover helper + # ------------------------------------------------------------------ + + def compute_daily_rollover(self, daily_plan: dict) -> float: + """Compute unspent budget at end of day (available for next day).""" + unspent_slots = sum(daily_plan["slot_budgets"].values()) + unspent_margin = daily_plan["daily_margin_remaining_kWh"] + return round(unspent_slots + unspent_margin, 4) diff --git a/src/energy_predictor.py b/src/energy_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..d56c063f24fd7226c204e96be0b2e10b04672b3e --- /dev/null +++ b/src/energy_predictor.py @@ -0,0 +1,292 @@ +"""ML-based energy generation predictor for the Yeruham Vineyard PV system. + +Trained on ThingsBoard production data + on-site weather (Air1 sensor). +Two models: + - **XGBoost** (primary): uses GSR, temperature, solar elevation, hour, + clearness index, wind speed. Backtested MAPE ≈ 4.6 %. + - **Linear fallback**: uses only GSR + temperature (when fewer features + are available, e.g. IMS-only forecasts). MAPE ≈ 6.9 %. + +Usage:: + + from src.energy_predictor import EnergyPredictor + ep = EnergyPredictor() # loads saved model + daily = ep.predict_day("2026-03-15", + forecast_ghi=[...], # 24 hourly W/m² + forecast_temp=[...]) # 24 hourly °C +""" + +from __future__ import annotations + +import pickle +from datetime import date, datetime, timedelta, timezone +from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence + +import numpy as np +import pandas as pd + +_MODEL_PATH = Path(__file__).resolve().parent.parent / "Data" / "energy_predictor_model.pkl" + +# Site constants +_LATITUDE_DEG = 30.85 +_LAT_RAD = np.radians(_LATITUDE_DEG) +_SYSTEM_CAPACITY_KW = 48.0 + + +def _solar_sin_elevation(day_of_year: int, hour_utc: int) -> float: + """Approximate sin(solar elevation) for Sde Boker.""" + dec = np.radians(23.45 * np.sin(np.radians(360 / 365 * (day_of_year - 81)))) + ha = np.radians(15 * (hour_utc + 2 - 12)) # UTC+2 ≈ local solar + return float(max(0.0, np.sin(_LAT_RAD) * np.sin(dec) + + np.cos(_LAT_RAD) * np.cos(dec) * np.cos(ha))) + + +class EnergyPredictor: + """Predict PV energy generation from weather features.""" + + def __init__(self, model_path: Optional[Path] = None): + path = model_path or _MODEL_PATH + if not path.exists(): + raise FileNotFoundError( + f"Energy model not found at {path}. " + "Run the training notebook / script first." + ) + with open(path, "rb") as f: + bundle = pickle.load(f) + + self._xgb = bundle["xgb_model"] + self._xgb_features = bundle["xgb_features"] + self._lr = bundle["lr_fallback"] + self._lr_features = bundle["lr_features"] + self._meta = { + k: v for k, v in bundle.items() + if k not in ("xgb_model", "lr_fallback") + } + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def predict_hourly( + self, + target_date: str | date, + forecast_ghi: Sequence[float], + forecast_temp: Sequence[float], + forecast_wind: Optional[Sequence[float]] = None, + ) -> pd.DataFrame: + """Predict hourly energy production (kWh) for *target_date*. + + Parameters + ---------- + target_date : str or date + ISO date, e.g. ``"2026-03-15"``. + forecast_ghi : sequence of 24 floats + Hourly Global Solar Radiation (W/m²), index 0 = 00:00 UTC. + forecast_temp : sequence of 24 floats + Hourly air temperature (°C). + forecast_wind : sequence of 24 floats, optional + Hourly wind speed (m/s). Falls back to 1.5 m/s if omitted. + + Returns + ------- + DataFrame with columns ``hour_utc``, ``production_kwh``, + ``method`` ('xgb' or 'lr_fallback'). + """ + if isinstance(target_date, str): + target_date = date.fromisoformat(target_date) + + doy = target_date.timetuple().tm_yday + ghi = np.asarray(forecast_ghi, dtype=float) + temp = np.asarray(forecast_temp, dtype=float) + wind = (np.asarray(forecast_wind, dtype=float) + if forecast_wind is not None + else np.full(24, 1.5)) + + rows = [] + for h in range(24): + sin_el = _solar_sin_elevation(doy, h) + clearness = (ghi[h] / (sin_el * 1000) + if sin_el > 0.05 else 0.0) + clearness = min(clearness, 1.5) + rows.append({ + "GSR": ghi[h], + "airTemperature": temp[h], + "sin_elevation": sin_el, + "hour": h, + "clearness": clearness, + "windSpeed": wind[h], + }) + + df = pd.DataFrame(rows) + + # Prefer XGBoost; fall back to LR if it fails + try: + preds = self._xgb.predict(df[self._xgb_features]) + method = "xgb" + except Exception: + preds = self._lr.predict(df[self._lr_features]) + method = "lr_fallback" + + preds = np.clip(preds, 0, None) + + # Zero out nighttime hours (sun below horizon) + for i in range(24): + if df.loc[i, "sin_elevation"] < 0.02 and ghi[i] < 10: + preds[i] = 0.0 + + return pd.DataFrame({ + "hour_utc": range(24), + "production_kwh": np.round(preds, 2), + "method": method, + }) + + def predict_day( + self, + target_date: str | date, + forecast_ghi: Sequence[float], + forecast_temp: Sequence[float], + forecast_wind: Optional[Sequence[float]] = None, + ) -> Dict[str, Any]: + """Predict daily energy production with hourly profile. + + Returns dict matching the EnergyService.predict() schema. + """ + hourly = self.predict_hourly( + target_date, forecast_ghi, forecast_temp, forecast_wind, + ) + total_kwh = hourly["production_kwh"].sum() + peak_idx = hourly["production_kwh"].idxmax() + peak_hour = int(hourly.loc[peak_idx, "hour_utc"]) + peak_kwh = float(hourly.loc[peak_idx, "production_kwh"]) + + # Convert UTC hours → local display (Israel = UTC+2/+3) + hourly_profile = [ + {"hour": int(row["hour_utc"]), "energy_kwh": round(row["production_kwh"], 2)} + for _, row in hourly.iterrows() + ] + + return { + "date": str(target_date), + "daily_kwh": round(float(total_kwh), 1), + "peak_hour": peak_hour, + "peak_hour_kwh": round(peak_kwh, 2), + "hourly_profile": hourly_profile, + "source": f"ML energy predictor ({hourly.iloc[0]['method']})", + "model_mape_pct": self._meta.get("test_mape_pct"), + } + + def predict_day_from_weather_df( + self, + target_date: str | date, + weather_df: pd.DataFrame, + ) -> Dict[str, Any]: + """Predict from a DataFrame that has hourly GSR/airTemperature columns. + + Accepts either ThingsBoard Air1 format or IMS format (ghi_w_m2). + """ + if weather_df.empty: + return {"date": str(target_date), "daily_kwh": None, + "error": "Empty weather DataFrame"} + + # Resolve column names + ghi_col = next((c for c in weather_df.columns + if c in ("GSR", "ghi_w_m2", "GHI")), None) + temp_col = next((c for c in weather_df.columns + if c in ("airTemperature", "air_temperature_c", "temperature")), None) + wind_col = next((c for c in weather_df.columns + if c in ("windSpeed", "wind_speed_ms", "wind_speed")), None) + + if ghi_col is None or temp_col is None: + return {"date": str(target_date), "daily_kwh": None, + "error": f"Missing columns. Need GSR + temp, got {list(weather_df.columns)}"} + + # Resample to 24 hourly values + df = weather_df.copy() + if hasattr(df.index, "hour"): + hourly = df.resample("h").mean() + else: + hourly = df + + # Ensure 24 hours (pad missing with 0 for GHI, forward-fill temp) + ghi_24 = np.zeros(24) + temp_24 = np.full(24, 15.0) # sensible default + wind_24 = np.full(24, 1.5) + + for _, row in hourly.iterrows(): + h = row.name.hour if hasattr(row.name, "hour") else 0 + if 0 <= h < 24: + ghi_24[h] = row[ghi_col] if pd.notna(row.get(ghi_col)) else 0 + if pd.notna(row.get(temp_col)): + temp_24[h] = row[temp_col] + if wind_col and pd.notna(row.get(wind_col)): + wind_24[h] = row[wind_col] + + return self.predict_day(target_date, ghi_24, temp_24, wind_24) + + def backtest( + self, + tb_client: Any, + days_back: int = 7, + ) -> pd.DataFrame: + """Compare ML predictions vs actual production for recent days. + + Returns DataFrame with columns: date, actual_kwh, predicted_kwh, + error_kwh, error_pct. + """ + from datetime import timezone as tz + import pytz + + utc = pytz.UTC + rows = [] + now = datetime.now(tz=utc) + + for d in range(days_back, 0, -1): + day = now - timedelta(days=d) + day_start = utc.localize(datetime(day.year, day.month, day.day)) + day_end = day_start + timedelta(days=1) + + # Actual production + try: + actual_df = tb_client.get_asset_timeseries( + "Plant", ["production"], + start=day_start, end=day_end, + limit=500, interval_ms=3_600_000, agg="SUM", + ) + actual_kwh = actual_df["production"].sum() / 1000 if not actual_df.empty else 0 + except Exception: + actual_kwh = 0 + + if actual_kwh < 10: + continue + + # Weather for that day → prediction + try: + wx = tb_client.get_timeseries( + "Air1", ["GSR", "airTemperature", "windSpeed"], + start=day_start, end=day_end, + limit=100, interval_ms=3_600_000, agg="AVG", + ) + if wx.empty: + continue + pred = self.predict_day_from_weather_df( + day_start.strftime("%Y-%m-%d"), wx, + ) + pred_kwh = pred.get("daily_kwh") or 0 + except Exception: + continue + + rows.append({ + "date": day_start.strftime("%Y-%m-%d"), + "actual_kwh": round(actual_kwh, 1), + "predicted_kwh": round(pred_kwh, 1), + "error_kwh": round(pred_kwh - actual_kwh, 1), + "error_pct": round((pred_kwh - actual_kwh) / actual_kwh * 100, 1), + }) + + return pd.DataFrame(rows) + + @property + def metadata(self) -> Dict[str, Any]: + """Model training metadata.""" + return dict(self._meta) diff --git a/src/farquhar_model.py b/src/farquhar_model.py new file mode 100644 index 0000000000000000000000000000000000000000..bcb0c73195a12c980f6bc4523c7d9ea5f9e3527c --- /dev/null +++ b/src/farquhar_model.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.models.farquhar_model.""" +from src.models.farquhar_model import * # noqa: F401, F403 diff --git a/src/forecasting/__init__.py b/src/forecasting/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..c9168e4a58c96160530f458bf8b73c486dd1118b --- /dev/null +++ b/src/forecasting/__init__.py @@ -0,0 +1 @@ +"""Forecasting: predictor, time-series, Chronos, preprocessor, time features.""" diff --git a/src/forecasting/chronos_forecaster.py b/src/forecasting/chronos_forecaster.py new file mode 100644 index 0000000000000000000000000000000000000000..f3fae31db47a79b64f3393db231c11f29b841c04 --- /dev/null +++ b/src/forecasting/chronos_forecaster.py @@ -0,0 +1,586 @@ +""" +ChronosForecaster: Day-ahead photosynthesis (A) forecasting using Amazon +Chronos-2 foundation model with native covariate support and optional +LoRA fine-tuning. + +Improvement history: + v1: Broken — daytime-only rows with hidden gaps → MAE ~8.5 + v2: Regular 15-min grid + predict_df + daytime eval → MAE ~1.75 (20w) + v3: + On-site sensor covariates (PAR, VPD, T_leaf, CO2) + + 14-day context (captures ~2 weeks of diurnal pattern) + + LoRA fine-tuning (1000 steps, lr=1e-4) + + Configurable covariate modes for ablation + → MAE 1.37 (May), 3.0-3.4 (Jun-Sep), overall beats ML baseline (2.7) + v4: Revisited input features: added engineered time (hour_sin/cos, doy_sin/cos) and + stress_risk_ims (VPD from IMS T+RH) in load_data; tried extended IMS (tdmax/tdmin). + Ablation on current data: best zero-shot = sensor (MAE ~3.86) or all (MAE ~3.91, R² 0.52). + Time/stress as covariates slightly hurt; kept 4-col IMS + sensor for \"all\". +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +import torch +from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score + +from config.settings import ( + PROCESSED_DIR, IMS_CACHE_DIR, OUTPUTS_DIR, GROWING_SEASON_MONTHS, +) +from src.time_features import add_cyclical_time_features + +# --------------------------------------------------------------------------- +# Covariate definitions +# --------------------------------------------------------------------------- + +# IMS station 43 weather (available as day-ahead forecasts in production) +# tdmax_c, tdmin_c available in data; ablation showed 4-col IMS best for this dataset +IMS_COVARIATE_COLS = [ + "ghi_w_m2", "air_temperature_c", "rh_percent", "wind_speed_ms", +] + +# On-site Seymour sensors (past-only: not available as forecasts) +SENSOR_COVARIATE_COLS = [ + "PAR_site", "VPD_site", "T_leaf_site", "CO2_site", +] + +# Engineered time features (deterministic from timestamp; available for future) +TIME_COVARIATE_COLS = ["hour_sin", "hour_cos", "doy_sin", "doy_cos"] + +# Stress risk from IMS-derived VPD (past + future; VPD_ims from T + RH) +STRESS_COVARIATE_COL = "stress_risk_ims" + +# Column mapping from raw sensor CSV → clean names +_SENSOR_COL_MAP = { + "Air1_PAR_ref": "PAR_site", + "Air1_VPD_ref": "VPD_site", + "Air1_leafTemperature_ref": "T_leaf_site", + "Air1_CO2_ref": "CO2_site", +} + +FREQ = "15min" +STEPS_PER_DAY = 96 # 24h / 15min + +# VPD from IMS T and RH (Buck formula, kPa) for stress_risk_ims +def _vpd_from_ims_kpa(T_c: np.ndarray, rh_percent: np.ndarray) -> np.ndarray: + """Saturation vapour pressure (kPa) then VPD = esat * (1 - RH/100).""" + esat = 0.611 * np.exp(17.27 * T_c / (T_c + 237.3)) + return esat * (1.0 - np.clip(rh_percent, 0, 100) / 100.0) + + +# Covariate mode presets +# "all" = extended IMS (incl. tdmax/tdmin) + sensor; time/stress available in data for optional use +COVARIATE_MODES = { + "none": {"past": [], "future": []}, + "ims": {"past": IMS_COVARIATE_COLS, "future": IMS_COVARIATE_COLS}, + "sensor": {"past": SENSOR_COVARIATE_COLS, "future": []}, + "all": { + "past": IMS_COVARIATE_COLS + SENSOR_COVARIATE_COLS, + "future": IMS_COVARIATE_COLS, + }, +} + + +class ChronosForecaster: + """Day-ahead A forecaster using Chronos-2 with configurable covariates.""" + + def __init__( + self, + model_name: str = "amazon/chronos-2", + device: str = "mps", + context_days: int = 14, + ): + self.model_name = model_name + self.device = device + self.context_steps = context_days * STEPS_PER_DAY + self._pipeline = None + + @property + def pipeline(self): + """Lazy-load Chronos-2 pipeline on first use.""" + if self._pipeline is None: + from chronos import Chronos2Pipeline + + self._pipeline = Chronos2Pipeline.from_pretrained( + self.model_name, + device_map=self.device, + dtype=torch.float32, + ) + return self._pipeline + + @pipeline.setter + def pipeline(self, value): + """Allow setting pipeline (e.g. after fine-tuning).""" + self._pipeline = value + + # ------------------------------------------------------------------ + # Data loading and resampling + # ------------------------------------------------------------------ + + @staticmethod + def load_data( + labels_path: Optional[Path] = None, + ims_path: Optional[Path] = None, + sensor_path: Optional[Path] = None, + growing_season_only: bool = True, + ) -> pd.DataFrame: + """Load labels + IMS + on-site sensors, merge, resample to regular grid. + + Growing-season-only mode (default) drops Oct-Apr dormancy months, + concatenating seasons into a continuous series with season boundaries + marked by a 'season' column. + """ + from config.settings import DATA_DIR, SEYMOUR_DIR + + labels_path = labels_path or PROCESSED_DIR / "stage1_labels.csv" + ims_path = ims_path or IMS_CACHE_DIR / "ims_merged_15min.csv" + sensor_path = sensor_path or SEYMOUR_DIR / "sensors_wide.csv" + + # --- Labels --- + labels = pd.read_csv(labels_path, parse_dates=["time"]) + labels.rename(columns={"time": "timestamp_utc"}, inplace=True) + labels["timestamp_utc"] = pd.to_datetime(labels["timestamp_utc"], utc=True) + + # --- IMS --- + ims = pd.read_csv(ims_path, parse_dates=["timestamp_utc"]) + ims["timestamp_utc"] = pd.to_datetime(ims["timestamp_utc"], utc=True) + + # --- On-site sensors --- + raw_cols = ["time"] + list(_SENSOR_COL_MAP.keys()) + sensors = pd.read_csv(sensor_path, usecols=raw_cols, parse_dates=["time"]) + sensors.rename(columns={"time": "timestamp_utc", **_SENSOR_COL_MAP}, inplace=True) + sensors["timestamp_utc"] = pd.to_datetime(sensors["timestamp_utc"], utc=True) + + # --- Merge --- + merged = labels.merge(ims, on="timestamp_utc", how="inner") + merged = merged.merge(sensors, on="timestamp_utc", how="left") + merged.sort_values("timestamp_utc", inplace=True) + merged.set_index("timestamp_utc", inplace=True) + + # --- Resample to regular 15-min grid --- + full_idx = pd.date_range( + merged.index.min(), merged.index.max(), freq=FREQ, tz="UTC", + ) + resampled = merged.reindex(full_idx) + resampled.index.name = "timestamp_utc" + + # Fill A=0 overnight, interpolate covariates + resampled["A"] = resampled["A"].fillna(0.0) + all_cov_cols = [ + c for c in IMS_COVARIATE_COLS + SENSOR_COVARIATE_COLS + if c in resampled.columns + ] + for col in all_cov_cols: + resampled[col] = ( + resampled[col].interpolate(method="time").ffill().bfill() + ) + if col in ("ghi_w_m2", "PAR_site"): + resampled[col] = resampled[col].clip(lower=0) + + # Engineered time covariates (deterministic; available for future) + resampled = add_cyclical_time_features(resampled, index_is_timestamp=True) + + # Stress risk from IMS VPD (past + future; 0–1 scale, clip VPD at 6 kPa) + if "air_temperature_c" in resampled.columns and "rh_percent" in resampled.columns: + vpd_ims = _vpd_from_ims_kpa( + resampled["air_temperature_c"].values, + resampled["rh_percent"].values, + ) + resampled[STRESS_COVARIATE_COL] = np.clip(vpd_ims / 6.0, 0.0, 1.0) + + resampled.reset_index(inplace=True) + + # --- Growing-season filter --- + if growing_season_only: + resampled["month"] = resampled["timestamp_utc"].dt.month + resampled = resampled[ + resampled["month"].isin(GROWING_SEASON_MONTHS) + ].copy() + resampled.drop(columns=["month"], inplace=True) + resampled.reset_index(drop=True, inplace=True) + + # Add season column (year of growing season) + resampled["season"] = resampled["timestamp_utc"].dt.year + + return resampled + + @staticmethod + def load_sparse_data( + labels_path: Optional[Path] = None, + ims_path: Optional[Path] = None, + ) -> pd.DataFrame: + """Load original daytime-only merged data (no resampling). + Used to identify daytime timestamps for evaluation masking. + """ + labels_path = labels_path or PROCESSED_DIR / "stage1_labels.csv" + ims_path = ims_path or IMS_CACHE_DIR / "ims_merged_15min.csv" + + labels = pd.read_csv(labels_path, parse_dates=["time"]) + labels.rename(columns={"time": "timestamp_utc"}, inplace=True) + labels["timestamp_utc"] = pd.to_datetime(labels["timestamp_utc"], utc=True) + + ims = pd.read_csv(ims_path, parse_dates=["timestamp_utc"]) + ims["timestamp_utc"] = pd.to_datetime(ims["timestamp_utc"], utc=True) + + merged = labels.merge(ims, on="timestamp_utc", how="inner") + merged.sort_values("timestamp_utc", inplace=True) + merged.reset_index(drop=True, inplace=True) + return merged + + # ------------------------------------------------------------------ + # predict_df based forecasting + # ------------------------------------------------------------------ + + def forecast_day( + self, + df: pd.DataFrame, + context_end_idx: int, + prediction_length: int = STEPS_PER_DAY, + covariate_mode: str = "all", + ) -> pd.DataFrame: + """Forecast next prediction_length steps using predict_df API. + + covariate_mode: 'none', 'ims', 'sensor', or 'all' + """ + mode_cfg = COVARIATE_MODES[covariate_mode] + past_cols = [c for c in mode_cfg["past"] if c in df.columns] + future_cols = [c for c in mode_cfg["future"] if c in df.columns] + + ctx_start = max(0, context_end_idx - self.context_steps) + ctx = df.iloc[ctx_start:context_end_idx].copy() + + # Build history DataFrame + hist = ctx[["timestamp_utc", "A"]].copy() + hist.rename(columns={"timestamp_utc": "timestamp", "A": "target"}, inplace=True) + hist["item_id"] = "A" + for col in past_cols: + hist[col] = ctx[col].values + + # Build future covariates DataFrame + future_df = None + if future_cols: + fwd = df.iloc[context_end_idx : context_end_idx + prediction_length] + if len(fwd) >= prediction_length: + future_df = fwd[["timestamp_utc"]].copy() + future_df.rename(columns={"timestamp_utc": "timestamp"}, inplace=True) + future_df["item_id"] = "A" + for col in future_cols: + future_df[col] = fwd[col].values + + result = self.pipeline.predict_df( + df=hist, + future_df=future_df, + id_column="item_id", + timestamp_column="timestamp", + target="target", + prediction_length=prediction_length, + quantile_levels=[0.1, 0.5, 0.9], + ) + + fwd_timestamps = df["timestamp_utc"].iloc[ + context_end_idx : context_end_idx + prediction_length + ].values + + out = pd.DataFrame({ + "timestamp_utc": fwd_timestamps[:len(result)], + "median": result["0.5"].values, + "low_10": result["0.1"].values, + "high_90": result["0.9"].values, + }) + return out + + # ------------------------------------------------------------------ + # LoRA fine-tuning + # ------------------------------------------------------------------ + + def finetune( + self, + df: pd.DataFrame, + train_ratio: float = 0.75, + prediction_length: int = STEPS_PER_DAY, + covariate_mode: str = "all", + num_steps: int = 500, + learning_rate: float = 1e-5, + batch_size: Optional[int] = None, + output_dir: Optional[str] = None, + ) -> None: + """LoRA fine-tune Chronos-2 on the training portion of the data. + + Uses the dict API for fit() with past and future covariates. + Only the training portion (before train_ratio split) is used — + no data leakage. + """ + split_idx = int(len(df) * train_ratio) + train_df = df.iloc[:split_idx].copy() + + mode_cfg = COVARIATE_MODES[covariate_mode] + past_cols = [c for c in mode_cfg["past"] if c in df.columns] + future_cols = [c for c in mode_cfg["future"] if c in df.columns] + + # Build training inputs: sliding windows over the training data + # Each window: context_steps history + prediction_length target + min_window = self.context_steps + prediction_length + inputs = [] + + # Sample windows every prediction_length steps for diversity + stride = prediction_length + for end_idx in range(min_window, len(train_df), stride): + ctx_start = end_idx - min_window + ctx_end = end_idx - prediction_length + + target = train_df["A"].iloc[ctx_start:ctx_end].values.astype(np.float32) + entry: dict = {"target": target} + + if past_cols: + past_covs = {} + for col in past_cols: + past_covs[col] = ( + train_df[col].iloc[ctx_start:ctx_end].values.astype(np.float32) + ) + entry["past_covariates"] = past_covs + + if future_cols: + future_covs = {} + for col in future_cols: + # Use actual values from training data as future covariates + future_covs[col] = ( + train_df[col].iloc[ctx_end:end_idx].values.astype(np.float32) + ) + entry["future_covariates"] = future_covs + + inputs.append(entry) + + if not inputs: + print("Not enough training data for fine-tuning.") + return + + # Build validation inputs from last 10% of training portion + val_split = int(len(inputs) * 0.9) + train_inputs = inputs[:val_split] + val_inputs = inputs[val_split:] if val_split < len(inputs) else None + + output_dir = output_dir or str(OUTPUTS_DIR / "chronos_finetuned") + effective_batch = batch_size if batch_size is not None else min(32, len(train_inputs)) + + print(f"Fine-tuning with LoRA: {len(train_inputs)} train windows, " + f"{len(val_inputs) if val_inputs else 0} val windows, " + f"{num_steps} steps, batch_size={effective_batch}") + + finetuned = self.pipeline.fit( + inputs=train_inputs, + prediction_length=prediction_length, + validation_inputs=val_inputs, + finetune_mode="lora", + learning_rate=learning_rate, + num_steps=num_steps, + batch_size=effective_batch, + output_dir=output_dir, + ) + + self.pipeline = finetuned + print(f"Fine-tuning complete. Model saved → {output_dir}") + + # ------------------------------------------------------------------ + # Walk-forward benchmark + # ------------------------------------------------------------------ + + def benchmark( + self, + df: Optional[pd.DataFrame] = None, + train_ratio: float = 0.75, + prediction_length: int = STEPS_PER_DAY, + max_test_days: Optional[int] = None, + covariate_modes: Optional[list[str]] = None, + ) -> pd.DataFrame: + """Walk-forward evaluation across covariate modes. + + Predicts 96 steps (24h) on the regular grid, evaluates ONLY on + daytime steps where actual A > 0. + """ + if df is None: + df = self.load_data() + + if covariate_modes is None: + covariate_modes = ["none", "ims", "sensor", "all"] + + sparse = self.load_sparse_data() + daytime_timestamps = set(sparse["timestamp_utc"]) + + split_idx = int(len(df) * train_ratio) + test_starts = list(range(split_idx, len(df) - prediction_length, prediction_length)) + if max_test_days is not None: + test_starts = test_starts[:max_test_days] + + results = {} + for mode in covariate_modes: + all_actual, all_pred = [], [] + + for start_idx in test_starts: + forecast_df = self.forecast_day( + df, start_idx, prediction_length, covariate_mode=mode, + ) + + actual_slice = df.iloc[start_idx : start_idx + prediction_length] + if len(actual_slice) < prediction_length: + continue + + daytime_mask = actual_slice["timestamp_utc"].isin(daytime_timestamps).values + daytime_mask = daytime_mask[:len(forecast_df)] + + if daytime_mask.sum() < 5: + continue + + actual_day = actual_slice["A"].values[:len(forecast_df)][daytime_mask] + pred_day = np.clip(forecast_df["median"].values[daytime_mask], 0, None) + + all_actual.append(actual_day) + all_pred.append(pred_day) + + if not all_actual: + continue + + actual_flat = np.concatenate(all_actual) + pred_flat = np.concatenate(all_pred) + + results[mode] = { + "MAE": round(float(mean_absolute_error(actual_flat, pred_flat)), 4), + "RMSE": round( + float(np.sqrt(mean_squared_error(actual_flat, pred_flat))), 4 + ), + "R2": round(float(r2_score(actual_flat, pred_flat)), 4), + "n_windows": len(all_actual), + "n_steps": len(actual_flat), + } + print(f" {mode:12s}: MAE={results[mode]['MAE']:.4f} " + f"RMSE={results[mode]['RMSE']:.4f} R²={results[mode]['R2']:.4f} " + f"({results[mode]['n_windows']} windows, " + f"{results[mode]['n_steps']} daytime steps)") + + comparison = pd.DataFrame(results).T + comparison.index.name = "mode" + comparison.reset_index(inplace=True) + + # Append ML baseline row for app comparison + ml_baseline = pd.DataFrame([{ + "mode": "ML baseline (best)", + "MAE": 2.7, + "RMSE": np.nan, + "R2": np.nan, + "n_windows": np.nan, + "n_steps": np.nan, + }]) + comparison = pd.concat([comparison, ml_baseline], ignore_index=True) + + OUTPUTS_DIR.mkdir(parents=True, exist_ok=True) + comparison.to_csv(OUTPUTS_DIR / "chronos_benchmark.csv", index=False) + print(f"Saved benchmark → {OUTPUTS_DIR / 'chronos_benchmark.csv'}") + + return comparison + + # ------------------------------------------------------------------ + # Sample forecast plot + # ------------------------------------------------------------------ + + def plot_sample_forecast( + self, + df: Optional[pd.DataFrame] = None, + test_day_idx: int = 0, + train_ratio: float = 0.75, + prediction_length: int = STEPS_PER_DAY, + ) -> None: + """Generate a sample forecast plot with confidence bands.""" + import matplotlib.pyplot as plt + + if df is None: + df = self.load_data() + + split_idx = int(len(df) * train_ratio) + start_idx = split_idx + test_day_idx * prediction_length + + if start_idx + prediction_length > len(df): + print("Not enough data for sample forecast plot.") + return + + forecast_df = self.forecast_day( + df, start_idx, prediction_length, covariate_mode="all", + ) + actual = df["A"].iloc[start_idx : start_idx + prediction_length].values + + fig, ax = plt.subplots(figsize=(12, 5)) + hours = np.arange(len(forecast_df)) * 0.25 + + ax.plot(hours, actual[:len(forecast_df)], "k-", linewidth=1.5, label="Actual A") + ax.plot( + hours, np.clip(forecast_df["median"].values, 0, None), + "b-", linewidth=1.5, label="Chronos-2 median", + ) + ax.fill_between( + hours, + np.clip(forecast_df["low_10"].values, 0, None), + forecast_df["high_90"].values, + alpha=0.25, color="steelblue", label="10-90% CI", + ) + ax.set_xlabel("Hours ahead") + ax.set_ylabel("A (umol CO2 m-2 s-1)") + ax.axhline(0, color="gray", linewidth=0.5, linestyle="--") + + ts = df["timestamp_utc"].iloc[start_idx] + ax.set_title(f"Chronos-2 Day-Ahead Forecast — {ts:%Y-%m-%d %H:%M}") + ax.legend() + ax.grid(True, alpha=0.3) + + OUTPUTS_DIR.mkdir(parents=True, exist_ok=True) + fig.savefig( + OUTPUTS_DIR / "chronos_forecast_sample.png", dpi=150, bbox_inches="tight", + ) + plt.close(fig) + print(f"Saved plot → {OUTPUTS_DIR / 'chronos_forecast_sample.png'}") + + +# ---------------------------------------------------------------------- +# CLI entry point +# ---------------------------------------------------------------------- + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description="Chronos-2 day-ahead A forecasting") + parser.add_argument("--device", default="mps", help="torch device") + parser.add_argument("--context-days", type=int, default=14, help="context window in days") + parser.add_argument("--max-days", type=int, default=None, help="limit test windows") + parser.add_argument("--plot", action="store_true", help="generate sample forecast plot") + parser.add_argument( + "--finetune", action="store_true", + help="LoRA fine-tune before benchmarking", + ) + parser.add_argument("--ft-steps", type=int, default=500, help="fine-tuning steps") + parser.add_argument( + "--modes", nargs="+", default=["none", "ims", "sensor", "all"], + help="covariate modes to benchmark", + ) + args = parser.parse_args() + + forecaster = ChronosForecaster( + device=args.device, context_days=args.context_days, + ) + + print("Loading data (growing-season grid + on-site sensors)...") + df = forecaster.load_data() + print(f" Grid: {len(df)} rows, seasons: {sorted(df['season'].unique())}") + + if args.finetune: + print(f"\nLoRA fine-tuning ({args.ft_steps} steps)...") + forecaster.finetune(df, num_steps=args.ft_steps, covariate_mode="all") + + print("\nRunning walk-forward benchmark (daytime-only evaluation)...") + results = forecaster.benchmark( + df, max_test_days=args.max_days, covariate_modes=args.modes, + ) + print(f"\n{results.to_string(index=False)}") + + if args.plot: + print("\nGenerating sample forecast plot...") + forecaster.plot_sample_forecast(df) diff --git a/src/forecasting/predictor.py b/src/forecasting/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..d143fce0949a9e5bdcecb2aaf2a9e197909a7502 --- /dev/null +++ b/src/forecasting/predictor.py @@ -0,0 +1,141 @@ +""" +PhotosynthesisPredictor: train and evaluate regression models on IMS +features; report RMSE, MAE, R2. +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import numpy as np +import pandas as pd +from sklearn.linear_model import LinearRegression +from sklearn.tree import DecisionTreeRegressor +from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor +from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score + +try: + from xgboost import XGBRegressor + _HAS_XGB = True +except ImportError: + _HAS_XGB = False + +try: + import matplotlib.pyplot as plt + _HAS_PLOT = True +except ImportError: + _HAS_PLOT = False + + +class PhotosynthesisPredictor: + """Train multiple regressors and evaluate on test set.""" + + def __init__(self): + self.models: dict = { + "LinearRegression": LinearRegression(), + "DecisionTree": DecisionTreeRegressor(max_depth=6, min_samples_leaf=10), + "RandomForest": RandomForestRegressor( + n_estimators=200, max_depth=8, min_samples_leaf=5, + n_jobs=-1, random_state=42, + ), + "GradientBoosting": GradientBoostingRegressor( + n_estimators=300, max_depth=4, learning_rate=0.05, + min_samples_leaf=10, random_state=42, + ), + } + if _HAS_XGB: + self.models["XGBoost"] = XGBRegressor( + n_estimators=300, max_depth=4, learning_rate=0.05, + min_child_weight=10, reg_alpha=0.1, reg_lambda=1.0, + n_jobs=-1, random_state=42, + ) + self.results: dict[str, dict] = {} + + def train(self, X_train: pd.DataFrame, y_train: pd.Series) -> None: + """Fit all models on (X_train, y_train).""" + for name, model in self.models.items(): + model.fit(X_train, y_train) + + def evaluate( + self, + X_test: pd.DataFrame, + y_test: pd.Series, + ) -> pd.DataFrame: + """ + Predict with each model, compute RMSE, MAE, R2. Return comparison table. + """ + rows = [] + for name, model in self.models.items(): + pred = model.predict(X_test) + rmse = float(np.sqrt(mean_squared_error(y_test, pred))) + mae = float(mean_absolute_error(y_test, pred)) + r2 = float(r2_score(y_test, pred)) + self.results[name] = {"predictions": pred, "rmse": rmse, "mae": mae, "r2": r2} + rows.append({"model": name, "RMSE": rmse, "MAE": mae, "R2": r2}) + return pd.DataFrame(rows) + + def get_feature_importance(self, model_name: str | None = None) -> pd.DataFrame: + """ + Return feature importance from tree-based models. + Prefers XGBoost > GradientBoosting > RandomForest > DecisionTree. + """ + if model_name: + candidates = [model_name] + else: + candidates = ["XGBoost", "GradientBoosting", "RandomForest", "DecisionTree"] + for name in candidates: + m = self.models.get(name) + if m is not None and hasattr(m, "feature_importances_"): + imp = m.feature_importances_ + return pd.DataFrame({ + "feature": getattr(m, "feature_names_in_", list(range(len(imp)))), + "importance": imp, + }).sort_values("importance", ascending=False) + return pd.DataFrame() + + def plot_results( + self, + y_test: pd.Series, + predictions: Optional[dict[str, np.ndarray]] = None, + save_path: Optional[Path] = None, + ) -> None: + """ + Predicted vs approx A scatter and optional time series overlay. + predictions: dict model_name -> pred array; if None use self.results. + """ + if not _HAS_PLOT: + return + preds = predictions or {n: self.results[n]["predictions"] for n in self.results} + if not preds: + return + fig, axes = plt.subplots(1, 2, figsize=(12, 5)) + # Scatter: pick best model by R2 + best = max(self.results, key=lambda n: self.results[n].get("r2", -999)) if self.results else list(preds.keys())[0] + name = best if best in preds else list(preds.keys())[0] + ax = axes[0] + ax.scatter(y_test, preds[name], alpha=0.5, s=10) + mn = min(y_test.min(), preds[name].min()) + mx = max(y_test.max(), preds[name].max()) + ax.plot([mn, mx], [mn, mx], "k--", label="1:1") + ax.set_xlabel("Approx A (µmol m⁻² s⁻¹)") + ax.set_ylabel("Predicted A") + ax.set_title(f"Predicted vs approx A ({name})") + ax.legend() + ax.set_aspect("equal") + # Time series overlay — show top 2 models by R2 + ax = axes[1] + ax.plot(y_test.values, label="Approx A", alpha=0.8) + ranked = sorted(self.results, key=lambda n: self.results[n].get("r2", -999), reverse=True) + for n in ranked[:2]: + if n in preds: + ax.plot(preds[n], label=f"{n} (R²={self.results[n]['r2']:.2f})", alpha=0.7) + ax.set_xlabel("Time index") + ax.set_ylabel("A (umol m-2 s-1)") + ax.set_title("Time series overlay") + ax.legend() + plt.tight_layout() + if save_path: + save_path.parent.mkdir(parents=True, exist_ok=True) + plt.savefig(save_path, dpi=150) + plt.close() diff --git a/src/forecasting/preprocessor.py b/src/forecasting/preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..fc37aa02170e943c8b9a453b813db23489f60b3d --- /dev/null +++ b/src/forecasting/preprocessor.py @@ -0,0 +1,131 @@ +""" +Preprocessor: merge IMS with Stage 1 labels, time features, temporal split, +and scaling (fitted on train only). No sensor data in features. +""" + +from typing import Optional + +import numpy as np +import pandas as pd +from sklearn.preprocessing import StandardScaler + +from src.time_features import add_cyclical_time_features + + +class Preprocessor: + """ + Merge IMS weather with labels (A), create time features, temporal train/test + split, and scale features using training set only. + """ + + def __init__(self, train_ratio: Optional[float] = None): + from config import settings + self.train_ratio = settings.TRAIN_RATIO if train_ratio is None else train_ratio + self.scaler = StandardScaler() + self._fitted = False + self._feature_cols: Optional[list[str]] = None + + def merge_ims_with_labels( + self, + ims_df: pd.DataFrame, + labels: pd.Series, + timestamp_col_ims: str = "timestamp_utc", + timestamp_index_labels: bool = True, + ) -> pd.DataFrame: + """ + Inner join IMS and labels on timestamp. labels can be Series with + datetime index or a column; if timestamp_index_labels, labels.index + is used as timestamp. + """ + if ims_df.empty or labels.empty: + return pd.DataFrame() + if timestamp_col_ims not in ims_df.columns: + return pd.DataFrame() + if timestamp_index_labels: + lab = labels.copy() + lab.name = "A" + lab = lab.reset_index() + ts_lab = lab.columns[0] + merged = ims_df.merge(lab, left_on=timestamp_col_ims, right_on=ts_lab, how="inner") + if ts_lab != timestamp_col_ims and ts_lab in merged.columns: + merged = merged.drop(columns=[ts_lab]) + if "timestamp_utc" not in merged.columns and ts_lab == timestamp_col_ims and ts_lab in merged.columns: + merged = merged.rename(columns={ts_lab: timestamp_col_ims}) + else: + raise ValueError( + "merge_ims_with_labels: labels must have a datetime index " + "(use timestamp_index_labels=True). Position-based alignment " + "is not supported because it silently produces incorrect joins " + "when IMS and label row counts differ." + ) + return merged + + def create_time_features( + self, + df: pd.DataFrame, + timestamp_col: str = "timestamp_utc", + ) -> pd.DataFrame: + """Add cyclical + raw time features for ML models.""" + if timestamp_col not in df.columns: + return df + out = add_cyclical_time_features(df, timestamp_col=timestamp_col) + ts = pd.to_datetime(out[timestamp_col], utc=True) + # Raw integers (good for tree-based models that can split on thresholds) + out["month"] = ts.dt.month + out["day_of_year"] = ts.dt.dayofyear + return out + + def temporal_split( + self, + df: pd.DataFrame, + target_col: str = "A", + feature_cols: Optional[list[str]] = None, + ) -> tuple[pd.DataFrame, pd.Series, pd.DataFrame, pd.Series]: + """ + Split by time: first train_ratio for train, rest for test. + Returns (X_train, y_train, X_test, y_test). If feature_cols is None, + use IMS numeric columns + time features (exclude timestamp and target). + """ + if df.empty or target_col not in df.columns: + return ( + pd.DataFrame(), pd.Series(dtype=float), + pd.DataFrame(), pd.Series(dtype=float), + ) + exclude = {target_col, "timestamp_utc", "time", "source"} + if feature_cols is None: + feature_cols = [ + c for c in df.select_dtypes(include=[np.number]).columns + if c not in exclude + ] + self._feature_cols = feature_cols + # Drop rows with NaN in features or target + subset = feature_cols + [target_col] + df = df.dropna(subset=subset).reset_index(drop=True) + X = df[feature_cols].copy() + y = df[target_col] + n = int(len(df) * self.train_ratio) + if n <= 0 or n >= len(df): + return X, y, pd.DataFrame(), pd.Series(dtype=float) + X_train, X_test = X.iloc[:n], X.iloc[n:] + y_train, y_test = y.iloc[:n], y.iloc[n:] + return X_train, y_train, X_test, y_test + + def fit_transform_train(self, X_train: pd.DataFrame) -> pd.DataFrame: + """Fit scaler on X_train and return scaled X_train.""" + self.scaler.fit(X_train) + self._fitted = True + return pd.DataFrame( + self.scaler.transform(X_train), + index=X_train.index, + columns=X_train.columns, + ) + + def transform_test(self, X_test: pd.DataFrame) -> pd.DataFrame: + """Transform X_test with fitted scaler.""" + if not self._fitted: + return X_test + return pd.DataFrame( + self.scaler.transform(X_test), + index=X_test.index, + columns=X_test.columns, + ) diff --git a/src/forecasting/time_features.py b/src/forecasting/time_features.py new file mode 100644 index 0000000000000000000000000000000000000000..d9ed835f696633d97ce3f7e51c241bce3c827c78 --- /dev/null +++ b/src/forecasting/time_features.py @@ -0,0 +1,61 @@ +""" +Shared utilities for time-based feature engineering. + +Centralises cyclical encodings for hour-of-day and day-of-year so that +Preprocessor, ChronosForecaster, and LLMDataEngineer use the same logic. +""" + +from __future__ import annotations + +from typing import Literal + +import numpy as np +import pandas as pd + + +def add_cyclical_time_features( + df: pd.DataFrame, + timestamp_col: str | None = None, + index_is_timestamp: bool = False, + day_period: float = 365.25, +) -> pd.DataFrame: + """ + Add hour_sin/hour_cos and doy_sin/doy_cos to a DataFrame. + + Parameters + ---------- + df : DataFrame + Input data. + timestamp_col : str or None + Column containing timestamps; if None and index_is_timestamp=True, + the index is used as the timestamp source. + index_is_timestamp : bool + Whether to treat the index as the timestamp source when timestamp_col + is None. + day_period : float + Period for day-of-year cycle (default 365.25). + + Returns + ------- + DataFrame + Copy of df with four extra columns: hour_sin, hour_cos, doy_sin, doy_cos. + """ + out = df.copy() + + if timestamp_col is not None and timestamp_col in out.columns: + ts = pd.to_datetime(out[timestamp_col], utc=True) + elif index_is_timestamp and isinstance(out.index, pd.DatetimeIndex): + ts = out.index + else: + # No-op if we cannot resolve a timestamp + return out + + hour = ts.dt.hour + ts.dt.minute / 60.0 + doy = ts.dt.dayofyear.astype(float) + + out["hour_sin"] = np.sin(2 * np.pi * hour / 24.0) + out["hour_cos"] = np.cos(2 * np.pi * hour / 24.0) + out["doy_sin"] = np.sin(2 * np.pi * doy / day_period) + out["doy_cos"] = np.cos(2 * np.pi * doy / day_period) + return out + diff --git a/src/forecasting/ts_predictor.py b/src/forecasting/ts_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..4e893d4415529c40b7efcf6623a3e1cccd0c8691 --- /dev/null +++ b/src/forecasting/ts_predictor.py @@ -0,0 +1,339 @@ +""" +TimeSeriesPredictor: lag-based direct multi-horizon forecasting for +photosynthesis rate A. Uses daytime-session indexing to handle 12h+ +nighttime gaps, with per-horizon models (XGBoost / GradientBoosting). + +Each growing season (May-Sep) is handled independently — sessions, lags, +and targets never cross the off-season gap (Oct-Apr). +""" + +from __future__ import annotations + +from typing import Optional + +import numpy as np +import pandas as pd +from sklearn.ensemble import GradientBoostingRegressor +from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score + +try: + from xgboost import XGBRegressor + _HAS_XGB = True +except ImportError: + _HAS_XGB = False + +# Horizons: name -> (steps within session | None, calendar days | None) +HORIZONS = { + "15min": {"steps": 1, "days": None}, + "1hour": {"steps": 4, "days": None}, + "1day": {"steps": None, "days": 1}, + "1week": {"steps": None, "days": 7}, + "1month": {"steps": None, "days": 30}, +} + +LAG_COLS = ["A", "ghi_w_m2", "air_temperature_c"] +LAG_STEPS = [1, 2, 3, 4, 8, 12] +ROLLING_WINDOWS = [4, 12] +MAX_GAP_MINUTES = 30 + + +class TimeSeriesPredictor: + """Train one model per forecast horizon using lag features.""" + + def __init__(self): + self.models: dict[str, object] = {} + self.feature_cols: Optional[list[str]] = None + self.results: dict[str, dict] = {} + + # ------------------------------------------------------------------ + # Season splitting + # ------------------------------------------------------------------ + + @staticmethod + def assign_season(df: pd.DataFrame, ts_col: str = "timestamp_utc") -> pd.DataFrame: + """Add a 'season' column (year of each row's growing season).""" + out = df.copy() + ts = pd.to_datetime(out[ts_col], utc=True) + out["season"] = ts.dt.year + return out + + # ------------------------------------------------------------------ + # Session identification + # ------------------------------------------------------------------ + + @staticmethod + def identify_sessions(df: pd.DataFrame, ts_col: str = "timestamp_utc") -> pd.DataFrame: + """Assign session_id to contiguous daytime blocks (gap <= MAX_GAP_MINUTES). + Sessions are identified within each season independently.""" + out = df.copy() + out = out.sort_values(ts_col).reset_index(drop=True) + ts = pd.to_datetime(out[ts_col], utc=True) + gap = ts.diff().dt.total_seconds() / 60 + out["session_id"] = (gap > MAX_GAP_MINUTES).cumsum() + return out + + # ------------------------------------------------------------------ + # Lag / rolling features + # ------------------------------------------------------------------ + + @staticmethod + def create_lag_features(df: pd.DataFrame) -> pd.DataFrame: + """Create within-session lags, rolling stats, and previous-session summary. + Previous-session features only link sessions within the same season.""" + out = df.copy() + + # Per-session lags and rolling stats + for col in LAG_COLS: + if col not in out.columns: + continue + for lag in LAG_STEPS: + col_name = f"{col}_lag{lag}" + out[col_name] = out.groupby("session_id")[col].shift(lag) + for w in ROLLING_WINDOWS: + out[f"{col}_rmean{w}"] = out.groupby("session_id")[col].transform( + lambda s: s.shift(1).rolling(w, min_periods=1).mean() + ) + if col == "A": + out[f"{col}_rstd{w}"] = out.groupby("session_id")[col].transform( + lambda s: s.shift(1).rolling(w, min_periods=1).std() + ) + + # Previous-session summary for A (within same season) + if "A" in out.columns and "season" in out.columns: + sess_stats = out.groupby("session_id").agg( + season=("season", "first"), + mean_A=("A", "mean"), + max_A=("A", "max"), + ) + # Shift within season so first session of each season gets NaN + sess_stats["prev_sess_mean_A"] = sess_stats.groupby("season")["mean_A"].shift(1) + sess_stats["prev_sess_max_A"] = sess_stats.groupby("season")["max_A"].shift(1) + out = out.merge( + sess_stats[["prev_sess_mean_A", "prev_sess_max_A"]], + left_on="session_id", right_index=True, how="left", + ) + + # Fill NaN lags at session start with prev-session end values (within season) + for col in LAG_COLS: + if col not in df.columns: + continue + sess_end = df.groupby("session_id").agg( + last_val=(col, "last"), + ) + if "season" in df.columns: + sess_season = df.groupby("session_id")["season"].first() + sess_end["season"] = sess_season + sess_end["prev_end"] = sess_end.groupby("season")["last_val"].shift(1) + else: + sess_end["prev_end"] = sess_end["last_val"].shift(1) + out = out.merge( + sess_end[["prev_end"]].rename(columns={"prev_end": f"_prev_end_{col}"}), + left_on="session_id", right_index=True, how="left", + ) + for lag in LAG_STEPS: + lag_col = f"{col}_lag{lag}" + if lag_col in out.columns: + out[lag_col] = out[lag_col].fillna(out[f"_prev_end_{col}"]) + out.drop(columns=[f"_prev_end_{col}"], inplace=True) + + return out + + # ------------------------------------------------------------------ + # Horizon targets + # ------------------------------------------------------------------ + + @staticmethod + def create_horizon_target(df: pd.DataFrame, horizon_name: str, + ts_col: str = "timestamp_utc") -> pd.Series: + """Create target column for a given horizon. + Calendar-day targets only match within the same season.""" + h = HORIZONS[horizon_name] + ts = pd.to_datetime(df[ts_col], utc=True) + + if h["steps"] is not None: + # Within-session shift + target = df.groupby("session_id")["A"].shift(-h["steps"]) + else: + # Calendar-day match within same season + days = h["days"] + target_ts = ts + pd.Timedelta(days=days) + + if "season" in df.columns: + # Build per-season lookup so targets don't cross seasons + target = pd.Series(np.nan, index=df.index) + for season, grp in df.groupby("season"): + grp_ts = pd.to_datetime(grp[ts_col], utc=True) + lookup = pd.Series(grp["A"].values, index=grp_ts) + lookup = lookup[~lookup.index.duplicated(keep="first")] + grp_target_ts = (grp_ts + pd.Timedelta(days=days)).dt.floor("15min") + matched = grp_target_ts.map(lookup) + target.loc[grp.index] = matched.values + else: + target_ts_rounded = target_ts.dt.floor("15min") + lookup = pd.Series(df["A"].values, index=ts) + lookup = lookup[~lookup.index.duplicated(keep="first")] + target = target_ts_rounded.map(lookup) + target = target.reset_index(drop=True) + + return target + + # ------------------------------------------------------------------ + # Feature columns + # ------------------------------------------------------------------ + + def _get_feature_cols(self, df: pd.DataFrame) -> list[str]: + """Return numeric feature columns, excluding targets / metadata.""" + exclude = {"A", "timestamp_utc", "time", "source", "session_id", + "target", "season"} + cols = [c for c in df.select_dtypes(include=[np.number]).columns if c not in exclude] + return cols + + # ------------------------------------------------------------------ + # Train / evaluate + # ------------------------------------------------------------------ + + def train_all_horizons(self, df: pd.DataFrame, train_ratio: float = 0.75) -> pd.DataFrame: + """Train one model per horizon, treating each season independently. + + Within each season the first ``train_ratio`` rows are used for + training and the remainder for testing. Training data from all + seasons is pooled to fit a single model per horizon, and test + data from all seasons is pooled for evaluation. Per-season + metrics are also reported. + """ + self.feature_cols = self._get_feature_cols(df) + seasons = sorted(df["season"].unique()) + rows = [] + + for horizon_name in HORIZONS: + # Collect train/test splits per season + all_X_train, all_y_train = [], [] + all_X_test, all_y_test = [], [] + season_train_n: dict[int, int] = {} + season_test: dict[int, tuple] = {} + + for season in seasons: + sdf = df[df["season"] == season].copy() + target = self.create_horizon_target(sdf, horizon_name) + sdf = sdf.copy() + sdf["target"] = target.values + sdf = sdf.dropna(subset=self.feature_cols + ["target"]) + + if len(sdf) < 30: + continue + + n = int(len(sdf) * train_ratio) + if n < 10 or len(sdf) - n < 5: + continue + + X_tr = sdf[self.feature_cols].iloc[:n] + y_tr = sdf["target"].iloc[:n] + X_te = sdf[self.feature_cols].iloc[n:] + y_te = sdf["target"].iloc[n:] + + all_X_train.append(X_tr) + all_y_train.append(y_tr) + all_X_test.append(X_te) + all_y_test.append(y_te) + season_train_n[season] = len(X_tr) + season_test[season] = (X_te, y_te) + + if not all_X_train or not all_X_test: + print(f" {horizon_name}: insufficient data across seasons, skipping") + continue + + X_train = pd.concat(all_X_train) + y_train = pd.concat(all_y_train) + X_test = pd.concat(all_X_test) + y_test = pd.concat(all_y_test) + + model = self._make_model() + model.fit(X_train, y_train) + self.models[horizon_name] = model + + # Overall metrics + pred = model.predict(X_test) + rmse = float(np.sqrt(mean_squared_error(y_test, pred))) + mae = float(mean_absolute_error(y_test, pred)) + r2 = float(r2_score(y_test, pred)) + self.results[horizon_name] = { + "predictions": pred, "y_test": y_test.values, + "rmse": rmse, "mae": mae, "r2": r2, + "n_train": len(X_train), "n_test": len(X_test), + } + rows.append({ + "horizon": horizon_name, "season": "all", + "approach": "time_series", + "RMSE": round(rmse, 4), "MAE": round(mae, 4), "R2": round(r2, 4), + "n_train": len(X_train), "n_test": len(X_test), + }) + print(f" {horizon_name} [all]: RMSE={rmse:.4f} MAE={mae:.4f} " + f"R²={r2:.4f} (train={len(X_train)}, test={len(X_test)})") + + # Per-season metrics + for season, (X_te_s, y_te_s) in season_test.items(): + pred_s = model.predict(X_te_s) + rmse_s = float(np.sqrt(mean_squared_error(y_te_s, pred_s))) + mae_s = float(mean_absolute_error(y_te_s, pred_s)) + r2_s = float(r2_score(y_te_s, pred_s)) + rows.append({ + "horizon": horizon_name, "season": str(season), + "approach": "time_series", + "RMSE": round(rmse_s, 4), "MAE": round(mae_s, 4), + "R2": round(r2_s, 4), + "n_train": season_train_n[season], + "n_test": len(X_te_s), + }) + print(f" {horizon_name} [{season}]: RMSE={rmse_s:.4f} " + f"MAE={mae_s:.4f} R²={r2_s:.4f} (test={len(X_te_s)})") + + return pd.DataFrame(rows) + + def get_comparison_with_baseline( + self, baseline_metrics: pd.DataFrame + ) -> pd.DataFrame: + """Combine TS horizon results with cross-sectional baseline into one table.""" + ts_rows = [] + for horizon_name, res in self.results.items(): + ts_rows.append({ + "horizon": horizon_name, "season": "all", + "approach": "time_series", + "RMSE": round(res["rmse"], 4), + "MAE": round(res["mae"], 4), + "R2": round(res["r2"], 4), + }) + ts_df = pd.DataFrame(ts_rows) + + # Best cross-sectional model + if not baseline_metrics.empty: + best_idx = baseline_metrics["RMSE"].idxmin() + best = baseline_metrics.loc[best_idx] + bl_rows = [] + for h in HORIZONS: + bl_rows.append({ + "horizon": h, "season": "all", + "approach": f"cross_sectional ({best['model']})", + "RMSE": round(float(best["RMSE"]), 4), + "MAE": round(float(best["MAE"]), 4), + "R2": round(float(best["R2"]), 4), + }) + bl_df = pd.DataFrame(bl_rows) + return pd.concat([ts_df, bl_df], ignore_index=True) + return ts_df + + # ------------------------------------------------------------------ + # Internals + # ------------------------------------------------------------------ + + @staticmethod + def _make_model(): + if _HAS_XGB: + return XGBRegressor( + n_estimators=300, max_depth=4, learning_rate=0.05, + min_child_weight=10, reg_alpha=0.1, reg_lambda=1.0, + n_jobs=-1, random_state=42, + ) + return GradientBoostingRegressor( + n_estimators=300, max_depth=4, learning_rate=0.05, + min_samples_leaf=10, random_state=42, + ) diff --git a/src/genai/__init__.py b/src/genai/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..bd5af5997e1ca0cf4530445fd98cc4d61ccc5fe6 --- /dev/null +++ b/src/genai/__init__.py @@ -0,0 +1 @@ +"""GenAI utilities.""" diff --git a/src/genai/utils.py b/src/genai/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..fbeed0e4ce7d437cf1a532bb985567d571cf2037 --- /dev/null +++ b/src/genai/utils.py @@ -0,0 +1,104 @@ +""" +Shared utilities for working with the Google Gemini (genai) client. + +Centralises: +- GOOGLE_API_KEY resolution (Streamlit secrets → environment variable). +- genai.Client construction. +- Robust JSON object extraction from model responses. +""" + +from __future__ import annotations + +import json +import os +import re +from typing import Optional + + +def get_google_api_key(explicit: Optional[str] = None) -> str: + """ + Resolve the Google API key used for Gemini. + + Resolution order: + 1. Explicit argument (if non-empty). + 2. Streamlit secrets["GOOGLE_API_KEY"] (if available and non-empty). + 3. Environment variable GOOGLE_API_KEY. + + Raises + ------ + ValueError + If no key can be found. + """ + if explicit: + return explicit + + # Try Streamlit secrets if available + try: + import streamlit as st # type: ignore + + key = st.secrets.get("GOOGLE_API_KEY", "") + if key: + return str(key) + except Exception: + pass + + key = os.environ.get("GOOGLE_API_KEY", "").strip() + if not key: + raise ValueError( + "GOOGLE_API_KEY not found. Set it as an environment variable or in " + "Streamlit secrets." + ) + return key + + +def get_genai_client(api_key: Optional[str] = None): + """ + Construct and return a google.genai.Client using the resolved API key. + + Parameters + ---------- + api_key : str, optional + Explicit key to use; falls back to get_google_api_key() when None or empty. + """ + try: + from google import genai # type: ignore + except ImportError as e: + raise ImportError( + "Could not import 'google.genai'. Install the Gemini SDK with:\n" + " pip install google-genai\n" + "Then run Streamlit using the same Python environment (e.g. activate " + "your venv or conda env before 'streamlit run app.py')." + ) from e + + key = get_google_API_key_safe(api_key) + return genai.Client(api_key=key) + + +def get_google_API_key_safe(explicit: Optional[str] = None) -> str: + """ + Wrapper for get_google_api_key used internally to avoid circular imports. + + Kept separate so that callers can patch or override in tests if needed. + """ + return get_google_api_key(explicit) + + +def extract_json_object(text: str) -> dict: + """ + Extract a JSON object from raw model text. + + Strips optional markdown ``` fences and returns the first {...} block. + + Raises + ------ + ValueError + If no JSON object can be found or parsed. + """ + # Strip markdown fences like ```json ... ``` + cleaned = re.sub(r"```(?:json)?\s*", "", text).strip().rstrip("`").strip() + start = cleaned.find("{") + end = cleaned.rfind("}") + 1 + if start == -1 or end <= start: + raise ValueError(f"No JSON object found in LLM response:\n{cleaned[:300]}") + return json.loads(cleaned[start:end]) + diff --git a/src/genai_utils.py b/src/genai_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..0747e7fe235ba54e5922f6b0f73017189980ae17 --- /dev/null +++ b/src/genai_utils.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.genai.utils.""" +from src.genai.utils import * # noqa: F401, F403 diff --git a/src/ims_client.py b/src/ims_client.py new file mode 100644 index 0000000000000000000000000000000000000000..60354ea080842cb6ee630b3de2dd61550a611133 --- /dev/null +++ b/src/ims_client.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.data.ims_client.""" +from src.data.ims_client import * # noqa: F401, F403 diff --git a/src/llm_data_engineer.py b/src/llm_data_engineer.py new file mode 100644 index 0000000000000000000000000000000000000000..69a062c35210e7cc639f939d119274d021681326 --- /dev/null +++ b/src/llm_data_engineer.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.chatbot.llm_data_engineer.""" +from src.chatbot.llm_data_engineer import * # noqa: F401, F403 diff --git a/src/models/__init__.py b/src/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..47efe341a84e86f965ae03879b5fbdf808a4068a --- /dev/null +++ b/src/models/__init__.py @@ -0,0 +1 @@ +"""Biology models: Farquhar, canopy photosynthesis, phenology.""" diff --git a/src/models/canopy_photosynthesis.py b/src/models/canopy_photosynthesis.py new file mode 100644 index 0000000000000000000000000000000000000000..f1d8cae53f5e12af50c6f24e1d2518ddf1904f95 --- /dev/null +++ b/src/models/canopy_photosynthesis.py @@ -0,0 +1,143 @@ +""" +CanopyPhotosynthesisModel: integrate shadow geometry with Farquhar model +to compute vine-level photosynthesis from zone-level PAR distribution. +""" + +from __future__ import annotations + +import numpy as np +import pandas as pd + +from config.settings import FRUITING_ZONE_INDEX +from src.farquhar_model import FarquharModel +from src.solar_geometry import ShadowModel + + +class CanopyPhotosynthesisModel: + """Compute vine-level A by running Farquhar on each canopy zone.""" + + def __init__( + self, + shadow_model: ShadowModel | None = None, + farquhar_model: FarquharModel | None = None, + lai: float = 2.5, + shade_temp_offset: float = -1.5, + diffuse_fraction: float = 0.15, + ): + self.shadow = shadow_model or ShadowModel() + self.farquhar = farquhar_model or FarquharModel() + self.lai = lai + self.shade_temp_offset = shade_temp_offset + self.diffuse_fraction = diffuse_fraction + + # Zone weights from LAI distribution (bottom to top) + nv = self.shadow.n_vertical + nh = self.shadow.n_horizontal + # Distribute LAI weights across zones + vert_weights = self.shadow.lai_weights # shape (n_vertical,) + # Each horizontal zone within a row gets equal share + self._zone_weights = np.outer(vert_weights, np.ones(nh) / nh) + # Normalize so total = 1 + self._zone_weights /= self._zone_weights.sum() + + def compute_vine_A( + self, + par: float, + Tleaf: float, + CO2: float, + VPD: float, + Tair: float, + shadow_mask: np.ndarray, + solar_elevation: float | None = None, + solar_azimuth: float | None = None, + tracker_tilt: float | None = None, + ) -> dict: + """ + Compute vine-level A for a single timestep. + + Returns dict with: + A_vine: weighted vine-level A (umol CO2 m-2 s-1) + A_zones: array of A per zone (n_vertical x n_horizontal) + sunlit_fraction: fraction of zones in sun + par_zones: PAR per zone + """ + par_zones = self.shadow.compute_par_distribution( + par, shadow_mask, self.diffuse_fraction, + solar_elevation=solar_elevation, solar_azimuth=solar_azimuth, + tracker_tilt=tracker_tilt, + ) + A_zones = np.zeros_like(par_zones) + + for iz in range(self.shadow.n_vertical): + for ix in range(self.shadow.n_horizontal): + zone_par = par_zones[iz, ix] + # Shaded zones are slightly cooler + zone_tleaf = Tleaf + (self.shade_temp_offset if shadow_mask[iz, ix] else 0.0) + zone_tair = Tair + (self.shade_temp_offset * 0.5 if shadow_mask[iz, ix] else 0.0) + + if zone_par > 0: + A_zones[iz, ix] = self.farquhar.calc_photosynthesis( + PAR=zone_par, Tleaf=zone_tleaf, CO2=CO2, + VPD=VPD, Tair=zone_tair, + ) + + A_vine = float(np.sum(A_zones * self._zone_weights)) * self.lai + sunlit_frac = self.shadow.sunlit_fraction(shadow_mask) + + # Extract fruiting zone (zone 1) and top canopy (zone 2) summaries + fz = FRUITING_ZONE_INDEX # default 1 + top = min(self.shadow.n_vertical - 1, 2) # zone 2 = apical + + fruiting_zone_A = float(A_zones[fz, :].mean()) if A_zones.shape[0] > fz else 0.0 + fruiting_zone_par = float(par_zones[fz, :].mean()) if par_zones.shape[0] > fz else 0.0 + top_canopy_A = float(A_zones[top, :].mean()) if A_zones.shape[0] > top else 0.0 + top_canopy_par = float(par_zones[top, :].mean()) if par_zones.shape[0] > top else 0.0 + + return { + "A_vine": A_vine, + "A_zones": A_zones, + "sunlit_fraction": sunlit_frac, + "par_zones": par_zones, + "fruiting_zone_A": fruiting_zone_A, + "fruiting_zone_par": fruiting_zone_par, + "top_canopy_A": top_canopy_A, + "top_canopy_par": top_canopy_par, + } + + def compute_timeseries( + self, + df: pd.DataFrame, + shadow_masks: np.ndarray, + par_col: str = "Air1_PAR_ref", + tleaf_col: str = "Air1_leafTemperature_ref", + co2_col: str = "Air1_CO2_ref", + vpd_col: str = "Air1_VPD_ref", + tair_col: str = "Air1_airTemperature_ref", + ) -> pd.DataFrame: + """ + Compute vine-level A for each row in df using pre-computed shadow masks. + shadow_masks: array of shape (len(df), n_vertical, n_horizontal). + """ + records = [] + for i, (_, row) in enumerate(df.iterrows()): + par = float(row[par_col]) if pd.notna(row[par_col]) else 0.0 + tleaf = float(row[tleaf_col]) if pd.notna(row[tleaf_col]) else 25.0 + co2 = float(row[co2_col]) if pd.notna(row[co2_col]) else 400.0 + vpd = float(row[vpd_col]) if pd.notna(row[vpd_col]) else 1.5 + tair = float(row[tair_col]) if pd.notna(row[tair_col]) else 25.0 + + mask = shadow_masks[i] + result = self.compute_vine_A(par, tleaf, co2, vpd, tair, mask) + + # Also compute reference (no panel = no shadow) + no_shadow = np.zeros_like(mask, dtype=bool) + ref_result = self.compute_vine_A(par, tleaf, co2, vpd, tair, no_shadow) + + records.append({ + "A_vine_panel": result["A_vine"], + "A_vine_ref": ref_result["A_vine"], + "sunlit_fraction": result["sunlit_fraction"], + "par_mean_panel": result["par_zones"].mean(), + "par_mean_ref": ref_result["par_zones"].mean(), + }) + return pd.DataFrame(records, index=df.index) diff --git a/src/models/farquhar_model.py b/src/models/farquhar_model.py new file mode 100644 index 0000000000000000000000000000000000000000..303252cb171ded4e65f7ac5c1f1d7c1981975e89 --- /dev/null +++ b/src/models/farquhar_model.py @@ -0,0 +1,320 @@ +""" +FarquharModel: mechanistic photosynthesis (Farquhar et al. 1980) with +Greer & Weedon (2012) temperature response for Vitis vinifera cv. Semillon. +Uses only on-site sensor inputs (PAR, Tleaf, CO2, VPD, Tair, etc.). + +Parameters calibrated from: + Greer, D.H. & Weedon, M.M. (2012) Modelling photosynthetic responses to + temperature of grapevine (Vitis vinifera cv. Semillon) leaves on vines grown + in a hot climate. Plant, Cell & Environment 35, 1050-1064. +""" + +from typing import Optional + +import numpy as np +import pandas as pd + +# Gas constant J/(mol·K) +R = 8.314 +# O2 concentration at chloroplast (mmol mol-1) +OI = 210.0 +# Curvature of J vs light (dimensionless) +THETA = 0.9 +# Quantum efficiency (mol e- per mol photons) base; PRI can scale this +ALPHA_DEFAULT = 0.24 +# Dark respiration as fraction of Vcmax +RD_FRAC = 0.015 + +# --- Greer & Weedon (2012) Table / Fig 11 fitted parameters --- +# Cc-based values from paper: k25_Vcmax=38.5, k25_Jmax=98.3 +# Ha/Hd (activation/deactivation energies) and Topt from Arrhenius fitting. +# NOTE: We use Ci-based apparent k25 values (60.0, 120.0) scaled ~1.5x from the +# paper's Cc-based values to compensate for mesophyll conductance (gm=5-10 +# µmol/m²/s/Pa, paper p.1054) not modelled explicitly. The temperature SHAPE +# (Ha, Hd, Topt) is preserved from the paper. +_GW12_VCMAX = dict(k25=60.0, Ha=87700.0, Hd=203500.0, Topt=312.15) +_GW12_JMAX = dict(k25=120.0, Ha=63500.0, Hd=202900.0, Topt=309.05) + + +def _entropy_from_topt(Ha: float, Hd: float, Topt: float) -> float: + """Derive entropy term S from Topt using Medlyn et al. (2002) Eqn 9.""" + return (Hd / Topt) + R * np.log(Ha / (Hd - Ha)) + + +# Pre-compute entropy terms +_GW12_VCMAX["S"] = _entropy_from_topt( + _GW12_VCMAX["Ha"], _GW12_VCMAX["Hd"], _GW12_VCMAX["Topt"] +) +_GW12_JMAX["S"] = _entropy_from_topt( + _GW12_JMAX["Ha"], _GW12_JMAX["Hd"], _GW12_JMAX["Topt"] +) + + +def _modified_arrhenius(Tk: float, k25: float, Ha: float, Hd: float, S: float) -> float: + """Modified Arrhenius with high-temperature deactivation (Medlyn et al. 2002 Eqn 8). + + k(T) = k25 * exp(Ha*(Tk-298.15)/(298.15*R*Tk)) + * (1 + exp((298.15*S - Hd)/(298.15*R))) + / (1 + exp((S*Tk - Hd)/(R*Tk))) + """ + # Activation component + exp_ha = np.exp(Ha * (Tk - 298.15) / (298.15 * R * Tk)) + # Deactivation at reference temperature (normalisation) + denom_25 = 1.0 + np.exp((298.15 * S - Hd) / (298.15 * R)) + # Deactivation at leaf temperature + denom_tk = 1.0 + np.exp((S * Tk - Hd) / (R * Tk)) + return k25 * exp_ha * denom_25 / denom_tk + + +class FarquharModel: + """ + Farquhar et al. (1980) with Bernacchi et al. (2001) temperature functions + and Greer & Weedon (2012) Vcmax/Jmax for grapevine. + """ + + def __init__( + self, + k25_vcmax: float = _GW12_VCMAX["k25"], + k25_jmax: float = _GW12_JMAX["k25"], + Ha_vcmax: float = _GW12_VCMAX["Ha"], + Hd_vcmax: float = _GW12_VCMAX["Hd"], + S_vcmax: float = _GW12_VCMAX["S"], + Ha_jmax: float = _GW12_JMAX["Ha"], + Hd_jmax: float = _GW12_JMAX["Hd"], + S_jmax: float = _GW12_JMAX["S"], + alpha: float = ALPHA_DEFAULT, + theta: float = THETA, + rd_frac: float = RD_FRAC, + ): + self.params = { + "k25_vcmax": k25_vcmax, + "k25_jmax": k25_jmax, + "Ha_vcmax": Ha_vcmax, + "Hd_vcmax": Hd_vcmax, + "S_vcmax": S_vcmax, + "Ha_jmax": Ha_jmax, + "Hd_jmax": Hd_jmax, + "S_jmax": S_jmax, + "alpha": alpha, + "theta": theta, + "rd_frac": rd_frac, + } + + @staticmethod + def calc_Kc(Tk: float) -> float: + """Michaelis constant for CO2 (Bernacchi et al. 2001), in ppm scale.""" + return np.exp(38.05 - 79430.0 / (R * Tk)) + + @staticmethod + def calc_Ko(Tk: float) -> float: + """Michaelis constant for O2 (Bernacchi et al. 2001).""" + return np.exp(20.30 - 36380.0 / (R * Tk)) * 1000.0 # scale to match OI + + @staticmethod + def calc_gamma_star(Tk: float) -> float: + """CO2 compensation point (Bernacchi et al. 2001), ppm.""" + return np.exp(19.02 - 37830.0 / (R * Tk)) + + def calc_Vcmax(self, Tk: float) -> float: + """Vcmax at leaf temperature (modified Arrhenius, Greer & Weedon 2012).""" + return _modified_arrhenius( + Tk, + self.params["k25_vcmax"], + self.params["Ha_vcmax"], + self.params["Hd_vcmax"], + self.params["S_vcmax"], + ) + + def calc_Jmax(self, Tk: float) -> float: + """Jmax at leaf temperature (modified Arrhenius, Greer & Weedon 2012).""" + return _modified_arrhenius( + Tk, + self.params["k25_jmax"], + self.params["Ha_jmax"], + self.params["Hd_jmax"], + self.params["S_jmax"], + ) + + def calc_electron_transport(self, PAR: float, Jmax: float) -> float: + """Solve theta*J^2 - (alpha*PFD + Jmax)*J + alpha*PFD*Jmax = 0 for J.""" + alpha = self.params["alpha"] + theta = self.params["theta"] + if PAR <= 0 or Jmax <= 0: + return 0.0 + pfd = PAR # umol photons m-2 s-1 + b = alpha * pfd + Jmax + c = alpha * pfd * Jmax + disc = b * b - 4 * theta * c + if disc < 0: + return min(alpha * pfd, Jmax) + j = (b - np.sqrt(disc)) / (2 * theta) + return float(np.clip(j, 0, Jmax)) + + def calc_CWSI( + self, + Tleaf: float, + Tair: float, + VPD: float, + dTmin: Optional[float] = None, + dTmax: Optional[float] = None, + ) -> float: + """Crop Water Stress Index. dTmin/dTmax from data or defaults.""" + dT = Tleaf - Tair + if dTmin is None: + dTmin = -2.0 + if dTmax is None: + dTmax = 8.0 + if dTmax <= dTmin: + return 0.0 + cwsi = (dT - dTmin) / (dTmax - dTmin) + return float(np.clip(cwsi, 0.0, 1.0)) + + def _ci_from_ca(self, ca: float, VPD: float, CWSI: float = 0.0) -> float: + """Intercellular CO2 from ambient; gs reduced by VPD and CWSI. + + Calibrated so ci/ca ~ 0.7 at low VPD (Greer & Weedon 2012 Fig 2c), + declining with increasing VPD and water stress. + """ + vpd_scale = np.exp(-0.3 * max(0, VPD - 1.0)) if VPD is not None else 1.0 + stress = 1.0 - 0.5 * (CWSI if CWSI is not None else 0.0) + gs_factor = 2.1 * vpd_scale * stress + if gs_factor <= 0: + return ca * 0.3 + ci = ca * (1.0 - 1.0 / (1.6 * gs_factor)) + return float(np.clip(ci, ca * 0.3, ca)) + + def _compute_rates( + self, + PAR: float, + Tleaf: float, + CO2: float, + VPD: float, + CWSI: Optional[float] = None, + ) -> tuple[float, float, float]: + """Shared FvCB core: compute Rubisco-limited (Ac), RuBP-limited (Aj), and dark respiration (Rd). + + Returns (Ac, Aj, Rd) — all in umol CO2 m-2 s-1. + """ + Tk = Tleaf + 273.15 + Kc = self.calc_Kc(Tk) + Ko = self.calc_Ko(Tk) + gamma_star = self.calc_gamma_star(Tk) + Vcmax = self.calc_Vcmax(Tk) + Jmax = self.calc_Jmax(Tk) + J = self.calc_electron_transport(PAR, Jmax) + Rd = self.params["rd_frac"] * Vcmax + ci = self._ci_from_ca(CO2, VPD, CWSI) + + Ac = Vcmax * (ci - gamma_star) / (ci + Kc * (1.0 + OI / Ko)) + Aj = J * (ci - gamma_star) / (4.0 * ci + 8.0 * gamma_star) + return Ac, Aj, Rd + + def calc_photosynthesis( + self, + PAR: float, + Tleaf: float, + CO2: float, + VPD: float, + Tair: float, + CWSI: Optional[float] = None, + ) -> float: + """ + Net assimilation A (umol CO2 m-2 s-1). PAR in umol m-2 s-1, T in degC, + CO2 in ppm, VPD in kPa. + """ + Ac, Aj, Rd = self._compute_rates(PAR, Tleaf, CO2, VPD, CWSI) + A = min(Ac, Aj) - Rd + return float(max(0.0, A)) + + def calc_photosynthesis_semillon( + self, + PAR: float, + Tleaf: float, + CO2: float, + VPD: float, + Tair: float, + CWSI: Optional[float] = None, + transition_temp: Optional[float] = None, + ) -> tuple[float, str, bool]: + """ + FvCB with explicit Semillon state transition. + + Returns (A, limiting_state, shading_helps): + - A: net assimilation (umol CO2 m-2 s-1), clipped to >= 0 + - limiting_state: "RuBP_Limited" or "Rubisco_Limited" + - shading_helps: True ONLY when the vine is Rubisco-limited AND + light is abundant relative to Vcmax capacity (Aj > Ac), meaning + reducing PAR would lower Aj toward Ac without reducing A. + When False, shading would reduce A — keep panels tracking. + + Parameters + ---------- + PAR : float + Photosynthetically active radiation (umol photons m-2 s-1). + Tleaf : float + Leaf temperature (°C). + CO2 : float + Ambient CO2 (ppm). + VPD : float + Vapour pressure deficit (kPa). + Tair : float + Air temperature (°C). + CWSI : float, optional + Crop Water Stress Index (0–1). Default 0 (no stress). + transition_temp : float, optional + Semillon RuBP→Rubisco transition temperature (°C). + Default from config: SEMILLON_TRANSITION_TEMP_C. + """ + if transition_temp is None: + from config.settings import SEMILLON_TRANSITION_TEMP_C + transition_temp = SEMILLON_TRANSITION_TEMP_C + + Ac, Aj, Rd = self._compute_rates(PAR, Tleaf, CO2, VPD, CWSI or 0.0) + An = min(Ac, Aj) - Rd + + if Tleaf < transition_temp: + state = "RuBP_Limited" + shading_helps = False + else: + state = "Rubisco_Limited" + shading_helps = (Aj > Ac) + + return float(max(0.0, An)), state, shading_helps + + def compute_all( + self, + df: pd.DataFrame, + par_col: str = "Air1_PAR_ref", + tleaf_col: str = "Air1_leafTemperature_ref", + co2_col: str = "Air1_CO2_ref", + vpd_col: str = "Air1_VPD_ref", + tair_col: str = "Air1_airTemperature_ref", + humidity_col: Optional[str] = "Air1_airHumidity_ref", + ) -> pd.Series: + """ + Compute A for each row. Uses CWSI from Tleaf/Tair/VPD with empirical bounds. + Returns Series of A (umol CO2 m-2 s-1), index aligned to df. + """ + required = [par_col, tleaf_col, co2_col, vpd_col, tair_col] + for c in required: + if c not in df.columns: + return pd.Series(np.nan, index=df.index) + # Empirical CWSI bounds from (Tleaf - Tair) percentiles if enough data + dT = df[tleaf_col] - df[tair_col] + dTmin = float(dT.quantile(0.05)) if len(dT.dropna()) > 10 else -2.0 + dTmax = float(dT.quantile(0.95)) if len(dT.dropna()) > 10 else 8.0 + out = [] + for _, row in df.iterrows(): + try: + par, tleaf, co2, vpd, tair = row[par_col], row[tleaf_col], row[co2_col], row[vpd_col], row[tair_col] + if pd.isna([par, tleaf, co2, vpd, tair]).any(): + out.append(np.nan) + continue + cwsi = self.calc_CWSI(float(tleaf), float(tair), float(vpd), dTmin, dTmax) + a = self.calc_photosynthesis( + float(par), float(tleaf), float(co2), float(vpd), float(tair), CWSI=cwsi + ) + out.append(a) + except (TypeError, ZeroDivisionError, ValueError): + out.append(np.nan) + return pd.Series(out, index=df.index) diff --git a/src/models/phenology.py b/src/models/phenology.py new file mode 100644 index 0000000000000000000000000000000000000000..a6de736fa7276de421ba2d5ef7c933bea0fa3b47 --- /dev/null +++ b/src/models/phenology.py @@ -0,0 +1,519 @@ +""" +Phenology tracker for Semillon grapevines in the Negev (Sde Boker). + +Three estimation methods (highest confidence wins): + 1. **GDD-based** — accumulates Growing Degree Days from IMS temperature data + using base temperature 10°C and thresholds in config/settings.py. + 2. **Camera-based** — sends live vineyard image to Gemini Vision for + visual phenological stage detection (optional, requires API key). + 3. **Calendar-based** — fallback using month-to-stage mapping. + +The public API is unchanged: ``estimate_stage_for_date()`` remains the +primary entry point. New functions ``estimate_stage_by_gdd()`` and +``detect_stage_from_camera()`` are available for higher-confidence estimates. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from datetime import date, datetime +from pathlib import Path +from typing import List, Optional, Tuple + +import numpy as np +import pandas as pd + +from config.settings import PHENOLOGY_GDD_THRESHOLDS + +logger = logging.getLogger(__name__) + +# Base temperature for GDD accumulation (Semillon at Sde Boker) +GDD_BASE_TEMP_C = 10.0 + +# Camera URL for live vineyard feed +CAMERA_URL = "https://app.solarwine.ai/images/yerucham/last_view.jpg" + + +# --------------------------------------------------------------------------- +# Data containers +# --------------------------------------------------------------------------- + +@dataclass(frozen=True) +class PhenologyStage: + """Simple container for phenological stage metadata.""" + + id: str + label: str + description: str + + +@dataclass +class GDDState: + """Cumulative GDD tracking state.""" + + cumulative_gdd: float + stage_id: str + stage_label: str + days_computed: int + season_start: str # ISO date + last_date: str # ISO date + daily_gdd: List[float] = field(default_factory=list) + + +@dataclass +class CameraDetection: + """Result of camera-based phenological stage detection.""" + + detected_stage_id: str + confidence: str # "high", "medium", "low" + observations: str # what Gemini saw + matches_gdd: Optional[bool] = None # whether it agrees with GDD estimate + + +# --------------------------------------------------------------------------- +# Stage definitions (shared across all estimation methods) +# --------------------------------------------------------------------------- + +_STAGE_DEFINITIONS = { + "winter_dormancy": PhenologyStage( + id="winter_dormancy", + label="Winter dormancy", + description=( + "No active canopy or fruit. Vines are resting and rebuilding " + "reserves in trunk and roots. Panels track the sun at full " + "astronomical tracking — maximum energy generation." + ), + ), + "budburst_vegetative": PhenologyStage( + id="budburst_vegetative", + label="Budburst / early vegetative", + description=( + "New shoots and leaves are expanding. Vine is light-limited " + "(RuBP regime) — every photon drives canopy growth. Panels at " + "full tracking; energy and vine interests are fully aligned." + ), + ), + "flowering_fruit_set": PhenologyStage( + id="flowering_fruit_set", + label="Flowering / fruit set", + description=( + "Yield formation is highly sensitive — cluster number and berry " + "set are determined now. Light demand is at its peak. Panels at " + "full tracking; energy generation and vine needs are aligned." + ), + ), + "berry_growth": PhenologyStage( + id="berry_growth", + label="Berry growth", + description=( + "Canopy is substantial and berries are expanding rapidly. " + "Water management is critical. Heat stress may begin to " + "limit photosynthesis on the hottest afternoons (Rubisco regime)." + ), + ), + "veraison_ripening": PhenologyStage( + id="veraison_ripening", + label="Veraison / ripening", + description=( + "Berry ripening and flavour development dominate. Sugar loading " + "depends on upper-canopy photosynthesis. Fruiting zone is most " + "vulnerable to sunburn on hot afternoons (>35C)." + ), + ), + "post_harvest_reserves": PhenologyStage( + id="post_harvest_reserves", + label="Post-harvest reserve building", + description=( + "Berries mostly harvested. Canopy refills carbohydrate reserves " + "for next season. Healthy leaves are essential for reserve " + "accumulation. Energy generation is the primary output." + ), + ), +} + + +# --------------------------------------------------------------------------- +# 1. Calendar-based estimation (original fallback) +# --------------------------------------------------------------------------- + +def _estimate_stage_by_month(month: int) -> PhenologyStage: + """Approximate Semillon stage in the Negev using calendar month.""" + if month in (1, 2, 11, 12): + return _STAGE_DEFINITIONS["winter_dormancy"] + if month in (3, 4): + return _STAGE_DEFINITIONS["budburst_vegetative"] + if month == 5: + return _STAGE_DEFINITIONS["flowering_fruit_set"] + if month in (6, 7): + return _STAGE_DEFINITIONS["berry_growth"] + if month == 8: + return _STAGE_DEFINITIONS["veraison_ripening"] + if month in (9, 10): + return _STAGE_DEFINITIONS["post_harvest_reserves"] + return PhenologyStage(id="unknown", label="Unknown", + description="Phenological stage could not be determined.") + + +# --------------------------------------------------------------------------- +# 2. GDD-based estimation +# --------------------------------------------------------------------------- + +def compute_gdd_from_ims( + ims_df: pd.DataFrame, + season_start_month: int = 3, + base_temp: float = GDD_BASE_TEMP_C, +) -> GDDState: + """Accumulate Growing Degree Days from IMS 15-min temperature data. + + Parameters + ---------- + ims_df : DataFrame + IMS data with columns: timestamp_utc, tdmax_c, tdmin_c + (or air_temperature_c for fallback). + season_start_month : int + Month when GDD accumulation starts (default: March). + base_temp : float + Base temperature for GDD calculation (default: 10°C). + + Returns + ------- + GDDState with cumulative GDD and estimated stage. + """ + df = ims_df.copy() + if "timestamp_utc" in df.columns: + df["timestamp_utc"] = pd.to_datetime(df["timestamp_utc"], utc=True) + df = df.set_index("timestamp_utc") + + # Determine current year's season start + now = df.index.max() + year = now.year if now.month >= season_start_month else now.year - 1 + season_start = pd.Timestamp(f"{year}-{season_start_month:02d}-01", tz="UTC") + df_season = df[df.index >= season_start] + + if df_season.empty: + return GDDState( + cumulative_gdd=0.0, stage_id="winter_dormancy", + stage_label="Winter dormancy", days_computed=0, + season_start=str(season_start.date()), last_date=str(now.date()), + ) + + # Daily aggregation: use tdmax/tdmin if available, else air_temperature_c + if "tdmax_c" in df_season.columns and "tdmin_c" in df_season.columns: + daily = df_season.resample("D").agg({ + "tdmax_c": "max", + "tdmin_c": "min", + }).dropna() + daily["tavg"] = (daily["tdmax_c"] + daily["tdmin_c"]) / 2.0 + elif "air_temperature_c" in df_season.columns: + daily = df_season.resample("D").agg({ + "air_temperature_c": ["max", "min"], + }).dropna() + daily.columns = ["tmax", "tmin"] + daily["tavg"] = (daily["tmax"] + daily["tmin"]) / 2.0 + else: + return GDDState( + cumulative_gdd=0.0, stage_id="unknown", + stage_label="Unknown", days_computed=0, + season_start=str(season_start.date()), + last_date=str(now.date()), + ) + + # GDD per day: max(0, tavg - base_temp) + daily["gdd"] = np.maximum(0.0, daily["tavg"] - base_temp) + daily["cumulative_gdd"] = daily["gdd"].cumsum() + + cumulative = float(daily["cumulative_gdd"].iloc[-1]) + stage = estimate_stage_by_gdd(cumulative) + + return GDDState( + cumulative_gdd=round(cumulative, 1), + stage_id=stage.id, + stage_label=stage.label, + days_computed=len(daily), + season_start=str(season_start.date()), + last_date=str(daily.index[-1].date()), + daily_gdd=[round(g, 2) for g in daily["gdd"].tolist()], + ) + + +def estimate_stage_by_gdd(cumulative_gdd: float) -> PhenologyStage: + """Estimate phenological stage from cumulative Growing Degree Days. + + Uses thresholds from config/settings.py PHENOLOGY_GDD_THRESHOLDS. + """ + # Sort thresholds descending to find highest matched stage + sorted_stages = sorted( + PHENOLOGY_GDD_THRESHOLDS.items(), + key=lambda x: x[1], + reverse=True, + ) + + # Map GDD threshold names to stage IDs + gdd_to_stage = { + "harvest": "post_harvest_reserves", + "veraison": "veraison_ripening", + "fruit_set": "berry_growth", + "flowering": "flowering_fruit_set", + "budburst": "budburst_vegetative", + } + + for stage_name, threshold in sorted_stages: + if cumulative_gdd >= threshold: + stage_id = gdd_to_stage.get(stage_name, "budburst_vegetative") + if stage_id in _STAGE_DEFINITIONS: + return _STAGE_DEFINITIONS[stage_id] + + # Below budburst threshold + return _STAGE_DEFINITIONS["winter_dormancy"] + + +# --------------------------------------------------------------------------- +# 3. Camera-based detection (Gemini Vision) +# --------------------------------------------------------------------------- + +def detect_stage_from_camera( + camera_url: str = CAMERA_URL, + gdd_stage_hint: Optional[str] = None, + api_key: Optional[str] = None, + timeout: float = 15.0, +) -> Optional[CameraDetection]: + """Analyze live vineyard camera image for phenological stage using Gemini Vision. + + Parameters + ---------- + camera_url : str + URL of the live camera image. + gdd_stage_hint : str, optional + Current GDD-based stage estimate (provides context to Gemini). + api_key : str, optional + Google API key. If None, resolved from environment/Streamlit secrets. + timeout : float + HTTP timeout for fetching the camera image. + + Returns + ------- + CameraDetection or None if the image cannot be fetched or analyzed. + """ + import requests + + # Fetch camera image + try: + resp = requests.get(camera_url, timeout=timeout) + resp.raise_for_status() + image_bytes = resp.content + except Exception as exc: + logger.warning("Camera image fetch failed: %s", exc) + return None + + # Initialize Gemini client + try: + from src.genai.utils import get_genai_client + from google.genai import types + + client = get_genai_client(api_key) + except Exception as exc: + logger.warning("Gemini client init failed: %s", exc) + return None + + # Build prompt + hint_text = "" + if gdd_stage_hint: + hint_text = ( + f"\nCurrent GDD-based estimate: {gdd_stage_hint}. " + "Does the visual evidence match this estimate?" + ) + + prompt_text = ( + "You are a viticulture expert analyzing a live camera image from an " + "agrivoltaic vineyard in Yeruham, Negev desert, Israel. " + "The grape variety is Chenin Blanc trained on a VSP trellis under solar panels.\n\n" + "Analyze the image and determine the current phenological (growth) stage. " + "Look for:\n" + "- Bare canes with no leaves → winter dormancy\n" + "- Small green shoots emerging from buds → budburst\n" + "- Tiny flower clusters (inflorescences) visible → flowering\n" + "- Small green berries visible on clusters → fruit set / berry growth\n" + "- Berries changing color (green to yellow/translucent) → veraison\n" + "- Ripe colored berries, some leaf senescence → harvest / post-harvest\n" + "- Full canopy with large green leaves but no visible fruit → vegetative growth\n" + f"{hint_text}\n\n" + "Respond in exactly this JSON format (no other text):\n" + '{\n' + ' "detected_stage": "one of: winter_dormancy, budburst_vegetative, ' + 'flowering_fruit_set, berry_growth, veraison_ripening, post_harvest_reserves",\n' + ' "confidence": "high, medium, or low",\n' + ' "observations": "brief description of what you see in the image"\n' + '}' + ) + + # Call Gemini with image + try: + image_part = types.Part.from_bytes(data=image_bytes, mime_type="image/jpeg") + response = client.models.generate_content( + model="gemini-2.5-flash", + contents=[prompt_text, image_part], + ) + text = response.text + except Exception as exc: + logger.warning("Gemini vision call failed: %s", exc) + return None + + # Parse JSON response + try: + from src.genai.utils import extract_json_object + result = extract_json_object(text) + except (ValueError, Exception): + logger.warning("Could not parse Gemini vision response: %s", text[:200]) + return None + + detected_id = result.get("detected_stage", "unknown") + confidence = result.get("confidence", "low") + observations = result.get("observations", "") + + matches_gdd = None + if gdd_stage_hint: + matches_gdd = detected_id == gdd_stage_hint + + return CameraDetection( + detected_stage_id=detected_id, + confidence=confidence, + observations=observations, + matches_gdd=matches_gdd, + ) + + +# --------------------------------------------------------------------------- +# Public API (backward-compatible) +# --------------------------------------------------------------------------- + +def estimate_stage_for_date( + d: date, + ims_df: Optional[pd.DataFrame] = None, +) -> PhenologyStage: + """Estimate phenological stage for a given calendar date. + + Uses GDD-based estimation if IMS data is provided, otherwise + falls back to calendar-based estimation. + + Parameters + ---------- + d : datetime.date + Local calendar date at the vineyard. + ims_df : DataFrame, optional + IMS temperature data for GDD computation. If None, uses calendar. + """ + if ims_df is not None and not ims_df.empty: + try: + gdd_state = compute_gdd_from_ims(ims_df) + if gdd_state.cumulative_gdd > 0: + return estimate_stage_by_gdd(gdd_state.cumulative_gdd) + except Exception as exc: + logger.warning("GDD estimation failed, falling back to calendar: %s", exc) + + return _estimate_stage_by_month(d.month) + + +def estimate_stage_for_timestamp(ts: datetime) -> PhenologyStage: + """Estimate phenological stage for a datetime (local or UTC). + + The calendar date component is used; timezone is ignored for stage. + """ + return estimate_stage_for_date(ts.date()) + + +def stage_id_and_description_for_date(d: date) -> Tuple[str, str]: + """Convenience wrapper returning (id, description) for Streamlit/UI use.""" + stage = estimate_stage_for_date(d) + return stage.id, stage.description + + +# Month boundaries for each stage (first month of each stage) +_STAGE_MONTH_RANGES = [ + (1, 2, "winter_dormancy", "Winter dormancy"), + (3, 4, "budburst_vegetative", "Budburst / early vegetative"), + (5, 5, "flowering_fruit_set", "Flowering / fruit set"), + (6, 7, "berry_growth", "Berry growth"), + (8, 8, "veraison_ripening", "Veraison / ripening"), + (9, 10, "post_harvest_reserves", "Post-harvest reserve building"), + (11, 12, "winter_dormancy", "Winter dormancy"), +] + + +def next_stage_for_date(d: date) -> Tuple[str, str, int]: + """Return (next_stage_label, next_stage_id, days_until) for a given date. + + Walks the calendar forward from current month to find the first stage + boundary that differs from the current stage. + """ + current = estimate_stage_for_date(d) + # Find which range block we are in, then look at the next one + for i, (m_start, m_end, sid, label) in enumerate(_STAGE_MONTH_RANGES): + if m_start <= d.month <= m_end: + # Next block (wrap around) + nxt = _STAGE_MONTH_RANGES[(i + 1) % len(_STAGE_MONTH_RANGES)] + next_month_start = nxt[0] + # Build the date of the 1st of that month + if next_month_start > d.month: + next_date = date(d.year, next_month_start, 1) + else: + next_date = date(d.year + 1, next_month_start, 1) + days_until = (next_date - d).days + return nxt[3], nxt[2], days_until + # Fallback + return "Unknown", "unknown", 0 + + +# --------------------------------------------------------------------------- +# Combined estimation (all three methods) +# --------------------------------------------------------------------------- + +def estimate_stage_combined( + d: date, + ims_df: Optional[pd.DataFrame] = None, + use_camera: bool = False, + api_key: Optional[str] = None, +) -> Tuple[PhenologyStage, dict]: + """Estimate phenological stage using all available methods. + + Returns (stage, metadata) where metadata contains: + - method: "gdd", "camera", or "calendar" + - gdd_state: GDDState if computed + - camera_detection: CameraDetection if attempted + - calendar_stage: always present (fallback) + + Priority: camera (high confidence) > GDD > calendar. + """ + metadata: dict = {"method": "calendar"} + + # Calendar (always computed as baseline) + calendar_stage = _estimate_stage_by_month(d.month) + metadata["calendar_stage"] = calendar_stage.id + best_stage = calendar_stage + + # GDD (if IMS data available) + gdd_state: Optional[GDDState] = None + if ims_df is not None and not ims_df.empty: + try: + gdd_state = compute_gdd_from_ims(ims_df) + metadata["gdd_state"] = gdd_state + if gdd_state.cumulative_gdd > 0: + best_stage = estimate_stage_by_gdd(gdd_state.cumulative_gdd) + metadata["method"] = "gdd" + except Exception as exc: + logger.warning("GDD estimation failed: %s", exc) + + # Camera (if requested and Gemini available) + if use_camera: + gdd_hint = gdd_state.stage_id if gdd_state else None + detection = detect_stage_from_camera( + gdd_stage_hint=gdd_hint, api_key=api_key, + ) + if detection: + metadata["camera_detection"] = detection + # Camera overrides GDD/calendar only if confidence is high + if detection.confidence == "high" and detection.detected_stage_id in _STAGE_DEFINITIONS: + best_stage = _STAGE_DEFINITIONS[detection.detected_stage_id] + metadata["method"] = "camera" + + return best_stage, metadata diff --git a/src/operational_modes.py b/src/operational_modes.py new file mode 100644 index 0000000000000000000000000000000000000000..05a8c8a5a8825552bae7f813267050a425438f0b --- /dev/null +++ b/src/operational_modes.py @@ -0,0 +1,234 @@ +""" +OperationalModes: weather protection, heat shield, and harvest mode. + +These override normal engine output at the P1/P2 priority level +in the CommandArbiter. Each mode returns a dict that the arbiter +recognises as ``weather_override`` or ``harvest_active``. + +Modes +----- +- **WindStow**: panels go flat (0°) when wind exceeds threshold. +- **HailStow**: panels go flat (0°) during hail events. +- **HeatShield**: maximum shading offset regardless of budget + when air temperature AND CWSI exceed emergency thresholds. +- **HarvestMode**: panels park vertical (90°) for machine clearance. + Activated/deactivated manually or by schedule. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass +from datetime import date, datetime, time, timezone +from typing import Optional + +from config.settings import ( + HEAT_SHIELD_CWSI, + HEAT_SHIELD_TEMP_C, + WIND_STOW_SPEED_MS, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Mode results +# --------------------------------------------------------------------------- + +@dataclass +class ModeOverride: + """Result from an operational mode check.""" + + active: bool + mode: str # "wind_stow" | "hail_stow" | "heat_shield" | "harvest" + target_angle: float # angle to command + reason: str = "" + bypass_budget: bool = False # True → ignore energy budget + bypass_hysteresis: bool = False # True → skip hysteresis filter + + def to_weather_override(self) -> Optional[dict]: + """Convert to the dict format CommandArbiter.arbitrate() expects.""" + if not self.active: + return None + return { + "target_angle": self.target_angle, + "reason": self.reason, + } + + +# --------------------------------------------------------------------------- +# Individual mode checkers +# --------------------------------------------------------------------------- + +def check_wind_stow( + wind_speed_ms: float, + threshold: float = WIND_STOW_SPEED_MS, +) -> ModeOverride: + """Flat stow if wind exceeds threshold.""" + if wind_speed_ms >= threshold: + return ModeOverride( + active=True, + mode="wind_stow", + target_angle=0.0, + reason=f"wind stow: {wind_speed_ms:.1f} m/s >= {threshold:.0f} m/s", + bypass_budget=True, + bypass_hysteresis=True, + ) + return ModeOverride(active=False, mode="wind_stow", target_angle=0.0) + + +def check_hail_stow(hail_detected: bool) -> ModeOverride: + """Flat stow during hail.""" + if hail_detected: + return ModeOverride( + active=True, + mode="hail_stow", + target_angle=0.0, + reason="hail detected — flat stow", + bypass_budget=True, + bypass_hysteresis=True, + ) + return ModeOverride(active=False, mode="hail_stow", target_angle=0.0) + + +def check_heat_shield( + air_temp_c: float, + cwsi: Optional[float] = None, + temp_threshold: float = HEAT_SHIELD_TEMP_C, + cwsi_threshold: float = HEAT_SHIELD_CWSI, + max_offset_deg: float = 20.0, + theta_astro: float = 0.0, +) -> ModeOverride: + """Emergency heat shield: maximum shade offset regardless of budget. + + Activates when BOTH air temperature AND CWSI exceed their thresholds. + If CWSI is unavailable, activates on temperature alone at +2°C above threshold. + """ + temp_exceeded = air_temp_c >= temp_threshold + + if cwsi is not None: + cwsi_exceeded = cwsi >= cwsi_threshold + activate = temp_exceeded and cwsi_exceeded + reason = (f"heat shield: T={air_temp_c:.1f}°C >= {temp_threshold:.0f}°C, " + f"CWSI={cwsi:.2f} >= {cwsi_threshold:.2f}") + else: + # Without CWSI, require a higher temperature + activate = air_temp_c >= temp_threshold + 2.0 + reason = (f"heat shield (no CWSI): T={air_temp_c:.1f}°C >= " + f"{temp_threshold + 2.0:.0f}°C") + + if activate: + return ModeOverride( + active=True, + mode="heat_shield", + target_angle=theta_astro + max_offset_deg, + reason=reason, + bypass_budget=True, + bypass_hysteresis=False, + ) + return ModeOverride(active=False, mode="heat_shield", target_angle=0.0) + + +# --------------------------------------------------------------------------- +# Harvest mode +# --------------------------------------------------------------------------- + +class HarvestMode: + """Manages harvest parking state. + + Harvest mode is a manual toggle (operator activates it before + sending machines into the vineyard). Can also be scheduled. + """ + + def __init__(self): + self._active = False + self._scheduled_dates: list[date] = [] + + def activate(self) -> None: + self._active = True + logger.info("Harvest mode ACTIVATED — panels will park vertical") + + def deactivate(self) -> None: + self._active = False + logger.info("Harvest mode DEACTIVATED — normal control resumed") + + def set_schedule(self, dates: list[date]) -> None: + self._scheduled_dates = sorted(dates) + logger.info("Harvest schedule set: %s", [str(d) for d in dates]) + + def check(self, current_date: Optional[date] = None) -> ModeOverride: + today = current_date or date.today() + active = self._active or today in self._scheduled_dates + if active: + return ModeOverride( + active=True, + mode="harvest", + target_angle=90.0, + reason="harvest mode — panels parked vertical for machine clearance", + bypass_budget=True, + bypass_hysteresis=True, + ) + return ModeOverride(active=False, mode="harvest", target_angle=0.0) + + +# --------------------------------------------------------------------------- +# Composite checker +# --------------------------------------------------------------------------- + +class OperationalModeChecker: + """Run all mode checks in priority order. + + Returns the highest-priority active mode, or None if all clear. + + Priority: wind_stow > hail_stow > harvest > heat_shield + """ + + def __init__(self): + self.harvest = HarvestMode() + + def check_all( + self, + wind_speed_ms: Optional[float] = None, + hail_detected: bool = False, + air_temp_c: Optional[float] = None, + cwsi: Optional[float] = None, + theta_astro: float = 0.0, + current_date: Optional[date] = None, + ) -> Optional[ModeOverride]: + """Check all operational modes in priority order. + + Returns the first active override, or None. + """ + # P1a: Wind stow + if wind_speed_ms is not None: + result = check_wind_stow(wind_speed_ms) + if result.active: + logger.warning("Mode override: %s", result.reason) + return result + + # P1b: Hail stow + if hail_detected: + result = check_hail_stow(True) + if result.active: + logger.warning("Mode override: %s", result.reason) + return result + + # P2: Harvest + harvest_result = self.harvest.check(current_date) + if harvest_result.active: + logger.info("Mode override: %s", harvest_result.reason) + return harvest_result + + # Heat shield (lower priority than harvest — don't shade + # while machines are in the vineyard) + if air_temp_c is not None: + result = check_heat_shield( + air_temp_c=air_temp_c, + cwsi=cwsi, + theta_astro=theta_astro, + ) + if result.active: + logger.warning("Mode override: %s", result.reason) + return result + + return None diff --git a/src/phenology.py b/src/phenology.py new file mode 100644 index 0000000000000000000000000000000000000000..0e3ab0f7a90c8890231d55d578c580cbcd476959 --- /dev/null +++ b/src/phenology.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.models.phenology.""" +from src.models.phenology import * # noqa: F401, F403 diff --git a/src/predictor.py b/src/predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..273df86e10ed17a3712c304d338b5186fc94f6e3 --- /dev/null +++ b/src/predictor.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.forecasting.predictor.""" +from src.forecasting.predictor import * # noqa: F401, F403 diff --git a/src/preprocessor.py b/src/preprocessor.py new file mode 100644 index 0000000000000000000000000000000000000000..cfb913de91fc7ffdfbe847a6b42e3c91ce2493e9 --- /dev/null +++ b/src/preprocessor.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.forecasting.preprocessor.""" +from src.forecasting.preprocessor import * # noqa: F401, F403 diff --git a/src/pvlib_tracker.py b/src/pvlib_tracker.py new file mode 100644 index 0000000000000000000000000000000000000000..f303b9e6776e81a32185185599a08bf1e362448d --- /dev/null +++ b/src/pvlib_tracker.py @@ -0,0 +1,200 @@ +""" +PvlibTracker: lightweight single-axis tracker angle calculator using pvlib. + +Provides GPS-based axis azimuth computation and pvlib single-axis tracking +as a complement / validation layer for the main ShadowModel. + +Adapted from the tracker repo's AsyncSolarTrackerSystem — made synchronous +and simplified for Baseline integration. +""" + +from __future__ import annotations + +import logging +from datetime import datetime +from typing import Optional + +import numpy as np +import pandas as pd +from pvlib import location, tracking + +from config.settings import ( + ROW_AZIMUTH, + SITE_ALTITUDE, + SITE_LATITUDE, + SITE_LONGITUDE, + TRACKER_GCR, + TRACKER_MAX_ANGLE, +) + +logger = logging.getLogger(__name__) + + +def axis_azimuth_from_gps( + head: tuple[float, float], + tail: tuple[float, float], +) -> float: + """Compute tracker axis azimuth from head/tail GPS coordinates. + + Uses the initial bearing (Haversine formula) between two points + along the tracker rail to determine the compass direction. + + Parameters + ---------- + head : (lat, lon) + GPS coordinates of the tracker head (north end). + tail : (lat, lon) + GPS coordinates of the tracker tail (south end). + + Returns + ------- + float + Axis azimuth in degrees (0–360, clockwise from north). + """ + lat1, lon1 = np.radians(head) + lat2, lon2 = np.radians(tail) + dlon = lon2 - lon1 + y = np.sin(dlon) * np.cos(lat2) + x = np.cos(lat1) * np.sin(lat2) - np.sin(lat1) * np.cos(lat2) * np.cos(dlon) + bearing = np.degrees(np.arctan2(y, x)) + return (bearing + 360) % 360 + + +class PvlibTracker: + """Single-axis tracker using pvlib for solar position and orientation. + + Parameters + ---------- + latitude, longitude, altitude : float + Site coordinates. Defaults to Yeruham vineyard. + timezone : str + IANA timezone. + axis_azimuth : float, optional + Tracker axis direction (degrees CW from north). + If None, computed from ``head_gps`` / ``tail_gps``. + head_gps, tail_gps : tuple[float, float], optional + GPS coordinates (lat, lon) of tracker endpoints. + Used to compute axis_azimuth if not given explicitly. + max_angle : float + Maximum tracker tilt angle (degrees). + gcr : float + Ground coverage ratio (panel width / row spacing). + backtrack : bool + Enable backtracking to avoid inter-row shading. + """ + + # Yeruham vineyard reference GPS coordinates for row axis direction. + # All rows share the same NW-SE orientation; individual row offsets + # are cross-row only and don't affect the axis azimuth. + REFERENCE_GPS = { + "head": (30.980222, 34.908192), + "tail": (30.979471, 34.909118), + } + + def __init__( + self, + latitude: float = SITE_LATITUDE, + longitude: float = SITE_LONGITUDE, + altitude: float = SITE_ALTITUDE, + timezone: str = "Asia/Jerusalem", + axis_azimuth: Optional[float] = None, + head_gps: Optional[tuple[float, float]] = None, + tail_gps: Optional[tuple[float, float]] = None, + max_angle: float = TRACKER_MAX_ANGLE, + gcr: float = TRACKER_GCR, + backtrack: bool = True, + ): + self.site = location.Location( + latitude, longitude, timezone, altitude, "Yeruham Vineyard", + ) + self.timezone = timezone + self.max_angle = max_angle + self.gcr = gcr + self.backtrack = backtrack + + if axis_azimuth is not None: + self.axis_azimuth = axis_azimuth + elif head_gps and tail_gps: + self.axis_azimuth = axis_azimuth_from_gps(head_gps, tail_gps) + else: + # Default: use the configured row azimuth (consistent with ShadowModel) + self.axis_azimuth = ROW_AZIMUTH + + def get_solar_position(self, timestamp: datetime) -> dict: + """Get solar position for a single timestamp. + + Returns dict with ``solar_elevation``, ``solar_azimuth``, + ``apparent_zenith``. + """ + ts = pd.Timestamp(timestamp) + if ts.tzinfo is None: + ts = ts.tz_localize(self.timezone) + times = pd.DatetimeIndex([ts]) + sp = self.site.get_solarposition(times) + return { + "solar_elevation": float(sp["apparent_elevation"].iloc[0]), + "solar_azimuth": float(sp["azimuth"].iloc[0]), + "apparent_zenith": float(sp["apparent_zenith"].iloc[0]), + } + + def get_tracking_angle(self, timestamp: datetime) -> float: + """Compute the optimal single-axis tracker tilt for a timestamp. + + Returns the tracker theta in degrees. + Returns 0.0 when sun is below the horizon. + """ + sp = self.get_solar_position(timestamp) + if sp["solar_elevation"] <= 0: + return 0.0 + + result = tracking.singleaxis( + sp["apparent_zenith"], + sp["solar_azimuth"], + axis_tilt=0, + axis_azimuth=self.axis_azimuth, + max_angle=self.max_angle, + backtrack=self.backtrack, + gcr=self.gcr, + ) + theta = result["tracker_theta"] + if pd.isna(theta): + return 0.0 + return float(theta) + + def get_day_profile( + self, + target_date: datetime | None = None, + freq: str = "15min", + ) -> pd.DataFrame: + """Compute tracker angles for an entire day. + + Returns DataFrame with columns: ``tracker_theta``, ``solar_elevation``, + ``solar_azimuth``, indexed by timestamp. + """ + if target_date is None: + target_date = pd.Timestamp.now(tz=self.timezone).normalize() + else: + target_date = pd.Timestamp(target_date) + if target_date.tzinfo is None: + target_date = target_date.tz_localize(self.timezone) + target_date = target_date.normalize() + + end = target_date + pd.Timedelta(days=1) + times = pd.date_range(target_date, end, freq=freq, tz=self.timezone) + + sp = self.site.get_solarposition(times) + tr = tracking.singleaxis( + sp["apparent_zenith"], + sp["azimuth"], + axis_tilt=0, + axis_azimuth=self.axis_azimuth, + max_angle=self.max_angle, + backtrack=self.backtrack, + gcr=self.gcr, + ) + + return pd.DataFrame({ + "tracker_theta": tr["tracker_theta"], + "solar_elevation": sp["apparent_elevation"], + "solar_azimuth": sp["azimuth"], + }, index=times) diff --git a/src/roi_service.py b/src/roi_service.py new file mode 100644 index 0000000000000000000000000000000000000000..8c8c8e02224ae8bc0a3676d259de76af83672487 --- /dev/null +++ b/src/roi_service.py @@ -0,0 +1,283 @@ +""" +ROIService: budget utilisation, intervention statistics, and LER computation. + +Provides the data layer for the ROI Dashboard tab. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from datetime import date, datetime, timedelta +from typing import Dict, List, Optional + +from config.settings import ( + MAX_ENERGY_REDUCTION_PCT, + TARGET_LER, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Data containers +# --------------------------------------------------------------------------- + +@dataclass +class InterventionStats: + """Statistics for a single period (day/week/month).""" + + period_label: str + total_slots: int = 0 # number of 15-min slots + intervention_slots: int = 0 # slots where offset > 0 + avg_offset_deg: float = 0.0 # mean offset across intervention slots + max_offset_deg: float = 0.0 + energy_sacrificed_kwh: float = 0.0 + budget_allocated_kwh: float = 0.0 + budget_utilisation_pct: float = 0.0 # sacrificed / allocated * 100 + + @property + def intervention_rate_pct(self) -> float: + if self.total_slots == 0: + return 0.0 + return self.intervention_slots / self.total_slots * 100 + + +@dataclass +class LERResult: + """Land Equivalent Ratio computation.""" + + energy_fraction: float # E_agri / E_mono (agrivoltaic / monoculture PV) + crop_fraction: float # Y_agri / Y_mono (agrivoltaic / monoculture vineyard) + ler: float # energy_fraction + crop_fraction + meets_target: bool = False + + def summary(self) -> str: + status = "MEETS" if self.meets_target else "BELOW" + return ( + f"LER = {self.ler:.2f} ({status} target {TARGET_LER:.1f}): " + f"energy {self.energy_fraction:.2f} + crop {self.crop_fraction:.2f}" + ) + + +@dataclass +class BudgetStatus: + """Current budget utilisation snapshot.""" + + annual_budget_kwh: float + annual_spent_kwh: float + annual_remaining_kwh: float + monthly_budget_kwh: float = 0.0 + monthly_spent_kwh: float = 0.0 + weekly_budget_kwh: float = 0.0 + weekly_spent_kwh: float = 0.0 + daily_budget_kwh: float = 0.0 + daily_spent_kwh: float = 0.0 + + @property + def annual_utilisation_pct(self) -> float: + if self.annual_budget_kwh == 0: + return 0.0 + return self.annual_spent_kwh / self.annual_budget_kwh * 100 + + def is_over_budget(self) -> bool: + return self.annual_spent_kwh > self.annual_budget_kwh + + +# --------------------------------------------------------------------------- +# ROI Service +# --------------------------------------------------------------------------- + +class ROIService: + """Compute budget utilisation, intervention stats, and LER from tick logs. + + Parameters + ---------- + annual_generation_kwh : float + Expected total annual PV generation (kWh). + Default: 48 kW × 1800 peak-sun-hours ≈ 86,400 kWh. + max_reduction_pct : float + Maximum energy sacrifice ceiling (%). + """ + + def __init__( + self, + annual_generation_kwh: float = 86_400.0, + max_reduction_pct: float = MAX_ENERGY_REDUCTION_PCT, + ): + self.annual_gen = annual_generation_kwh + self.annual_budget = annual_generation_kwh * max_reduction_pct / 100.0 + self._tick_log: List[dict] = [] + + def load_tick_log(self, tick_log: List[dict]) -> None: + """Load a tick log (list of TickResult.to_dict() entries).""" + self._tick_log = list(tick_log) + logger.info("Loaded %d tick entries", len(self._tick_log)) + + # ------------------------------------------------------------------ + # Budget status + # ------------------------------------------------------------------ + + def get_budget_status( + self, + target_date: Optional[date] = None, + ) -> BudgetStatus: + """Compute current budget utilisation from the tick log.""" + today = target_date or date.today() + year = today.year + + annual_spent = 0.0 + monthly_spent = 0.0 + weekly_spent = 0.0 + daily_spent = 0.0 + + week_start = today - timedelta(days=today.weekday()) + + for tick in self._tick_log: + cost = tick.get("energy_cost_kwh", 0.0) or 0.0 + ts = tick.get("timestamp", "") + try: + if isinstance(ts, str): + tick_date = datetime.fromisoformat(ts).date() + elif isinstance(ts, datetime): + tick_date = ts.date() + else: + continue + except (ValueError, AttributeError): + continue + + if tick_date.year == year: + annual_spent += cost + if tick_date.year == year and tick_date.month == today.month: + monthly_spent += cost + if tick_date >= week_start: + weekly_spent += cost + if tick_date == today: + daily_spent += cost + + return BudgetStatus( + annual_budget_kwh=self.annual_budget, + annual_spent_kwh=annual_spent, + annual_remaining_kwh=max(0, self.annual_budget - annual_spent), + monthly_spent_kwh=monthly_spent, + weekly_spent_kwh=weekly_spent, + daily_spent_kwh=daily_spent, + ) + + # ------------------------------------------------------------------ + # Intervention statistics + # ------------------------------------------------------------------ + + def compute_intervention_stats( + self, + start_date: Optional[date] = None, + end_date: Optional[date] = None, + label: str = "period", + ) -> InterventionStats: + """Compute intervention statistics for a date range.""" + stats = InterventionStats(period_label=label) + + for tick in self._tick_log: + ts = tick.get("timestamp", "") + try: + if isinstance(ts, str): + tick_date = datetime.fromisoformat(ts).date() + elif isinstance(ts, datetime): + tick_date = ts.date() + else: + continue + except (ValueError, AttributeError): + continue + + if start_date and tick_date < start_date: + continue + if end_date and tick_date > end_date: + continue + + stats.total_slots += 1 + offset = tick.get("plan_offset_deg", 0.0) or 0.0 + if offset > 0: + stats.intervention_slots += 1 + stats.max_offset_deg = max(stats.max_offset_deg, offset) + + stats.energy_sacrificed_kwh += tick.get("energy_cost_kwh", 0.0) or 0.0 + + if stats.intervention_slots > 0: + # Recompute average from individual entries + total_offset = sum( + (t.get("plan_offset_deg", 0.0) or 0.0) + for t in self._tick_log + if (t.get("plan_offset_deg", 0.0) or 0.0) > 0 + ) + stats.avg_offset_deg = total_offset / stats.intervention_slots + + return stats + + # ------------------------------------------------------------------ + # LER computation + # ------------------------------------------------------------------ + + def compute_ler( + self, + actual_energy_kwh: float, + mono_energy_kwh: Optional[float] = None, + actual_crop_yield: float = 1.0, + mono_crop_yield: float = 1.0, + ) -> LERResult: + """Compute Land Equivalent Ratio. + + LER = (E_agri / E_mono) + (Y_agri / Y_mono) + + LER > 1.0 means the combined system is more productive + than growing either crop alone. + + Parameters + ---------- + actual_energy_kwh : float + Actual PV generation under agrivoltaic operation. + mono_energy_kwh : float, optional + Theoretical generation without any shading interventions. + Defaults to annual_generation_kwh. + actual_crop_yield : float + Agrivoltaic crop yield (any unit, must match mono). + mono_crop_yield : float + Monoculture crop yield (same unit). + """ + e_mono = mono_energy_kwh or self.annual_gen + e_frac = actual_energy_kwh / e_mono if e_mono > 0 else 0.0 + c_frac = actual_crop_yield / mono_crop_yield if mono_crop_yield > 0 else 0.0 + ler = e_frac + c_frac + + return LERResult( + energy_fraction=e_frac, + crop_fraction=c_frac, + ler=ler, + meets_target=ler >= TARGET_LER, + ) + + # ------------------------------------------------------------------ + # Summary + # ------------------------------------------------------------------ + + def summary(self, target_date: Optional[date] = None) -> dict: + """Return a combined summary dict for the dashboard.""" + budget = self.get_budget_status(target_date) + stats = self.compute_intervention_stats(label="all_time") + + return { + "budget": { + "annual_budget_kwh": budget.annual_budget_kwh, + "annual_spent_kwh": budget.annual_spent_kwh, + "annual_remaining_kwh": budget.annual_remaining_kwh, + "utilisation_pct": budget.annual_utilisation_pct, + "over_budget": budget.is_over_budget(), + }, + "interventions": { + "total_slots": stats.total_slots, + "intervention_slots": stats.intervention_slots, + "intervention_rate_pct": stats.intervention_rate_pct, + "avg_offset_deg": stats.avg_offset_deg, + "max_offset_deg": stats.max_offset_deg, + "energy_sacrificed_kwh": stats.energy_sacrificed_kwh, + }, + } diff --git a/src/routing_agent.py b/src/routing_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..c2afbea254e8a1981003fe80ff04b45586fcac00 --- /dev/null +++ b/src/routing_agent.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.chatbot.routing_agent.""" +from src.chatbot.routing_agent import * # noqa: F401, F403 diff --git a/src/safety_rails.py b/src/safety_rails.py new file mode 100644 index 0000000000000000000000000000000000000000..34d14814a92a9577b7402e6adbc5b89937a98731 --- /dev/null +++ b/src/safety_rails.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.advisor.safety_rails.""" +from src.advisor.safety_rails import * # noqa: F401, F403 diff --git a/src/sensor_data_loader.py b/src/sensor_data_loader.py new file mode 100644 index 0000000000000000000000000000000000000000..abee9bc4257496c8dd6949805b21656cc8ba808e --- /dev/null +++ b/src/sensor_data_loader.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.data.sensor_data_loader.""" +from src.data.sensor_data_loader import * # noqa: F401, F403 diff --git a/src/shading/__init__.py b/src/shading/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..1306f1586674f21c2ff8926a71145504ae1ee71c --- /dev/null +++ b/src/shading/__init__.py @@ -0,0 +1 @@ +"""Shading: solar geometry, tracker optimizer, 3D scene, tradeoff engine.""" diff --git a/src/shading/solar_geometry.py b/src/shading/solar_geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..ebbfbd5ce2e59d8600a7e07b9c2a3f963483b745 --- /dev/null +++ b/src/shading/solar_geometry.py @@ -0,0 +1,797 @@ +""" +ShadowModel: compute solar position, tracker tilt, and shadow projection +from a solar panel onto a vine canopy grid. + +Coordinate system: +- Row-local: u = along-row direction (azimuth 315°), v = cross-row (perpendicular), z = up +- World: x = East, y = North, z = up +""" + +from __future__ import annotations + +import numpy as np +import pandas as pd +import pvlib + +from config.settings import ( + CANOPY_HEIGHT, + CANOPY_WIDTH, + PANEL_HEIGHT, + PANEL_WIDTH, + ROW_AZIMUTH, + ROW_SPACING, + SITE_ALTITUDE, + SITE_LATITUDE, + SITE_LONGITUDE, + TRACKER_GCR, + TRACKER_MAX_ANGLE, +) + + +class ShadowModel: + """3D shadow projection from a single-axis tracker onto vine canopy rows.""" + + def __init__( + self, + panel_width: float = PANEL_WIDTH, + panel_height: float = PANEL_HEIGHT, + row_spacing: float = ROW_SPACING, + canopy_height: float = CANOPY_HEIGHT, + canopy_width: float = CANOPY_WIDTH, + row_azimuth: float = ROW_AZIMUTH, + lat: float = SITE_LATITUDE, + lon: float = SITE_LONGITUDE, + altitude: float = SITE_ALTITUDE, + n_vertical: int = 3, + n_horizontal: int = 10, + n_rows: int = 5, + ): + self.panel_width = panel_width + self.panel_height = panel_height + self.row_spacing = row_spacing + self.canopy_height = canopy_height + self.canopy_width = canopy_width + self.row_azimuth = row_azimuth + self.lat = lat + self.lon = lon + self.altitude = altitude + self.n_vertical = n_vertical + self.n_horizontal = n_horizontal + self.n_rows = n_rows + + # Row direction unit vectors in world coords (x=East, y=North) + az_rad = np.radians(row_azimuth) + self._row_u = np.array([np.sin(az_rad), np.cos(az_rad)]) # along row + self._row_v = np.array([np.cos(az_rad), -np.sin(az_rad)]) # cross-row (perpendicular, right-hand) + + # Canopy grid in cross-row direction + self._grid_v = np.linspace( + -canopy_width / 2, canopy_width / 2, n_horizontal, + ) + self._grid_z = np.linspace( + canopy_height / n_vertical / 2, + canopy_height - canopy_height / n_vertical / 2, + n_vertical, + ) + # LAI weight per vertical zone: top 50%, mid 35%, bottom 15% + self.lai_weights = np.array([0.15, 0.35, 0.50]) # bottom to top + + def get_solar_position(self, times: pd.DatetimeIndex) -> pd.DataFrame: + """Compute solar elevation and azimuth for site location.""" + loc = pvlib.location.Location(self.lat, self.lon, altitude=self.altitude) + sp = loc.get_solarposition(times) + return sp[["apparent_elevation", "azimuth"]].rename( + columns={"apparent_elevation": "solar_elevation", "azimuth": "solar_azimuth"}, + ) + + def compute_tracker_tilt(self, solar_azimuth: float, solar_elevation: float) -> dict: + """ + Single-axis tracker angle using pvlib.tracking.singleaxis. + Tracker axis is along row_azimuth, maximizing energy (normal tracking). + Returns dict with tracker_theta, aoi, surface_tilt, surface_azimuth. + """ + if solar_elevation <= 0: + return {"tracker_theta": 0.0, "aoi": 90.0, "surface_tilt": 0.0, "surface_azimuth": 0.0} + + zenith = 90.0 - solar_elevation + + # Use positional args for zenith and azimuth so we work with both + # pvlib < 0.13.1 (apparent_azimuth) and >= 0.13.1 (solar_azimuth) + result = pvlib.tracking.singleaxis( + zenith, + solar_azimuth, + axis_tilt=0, + axis_azimuth=self.row_azimuth, + max_angle=TRACKER_MAX_ANGLE, + backtrack=True, + gcr=TRACKER_GCR, + ) + + # pvlib can return arrays; extract scalar for float() and np.isnan() + def _scalar(x): + a = np.asarray(x) + return a.flat[0] if a.size else np.nan + + theta = float(_scalar(result["tracker_theta"])) + if np.isnan(theta): + theta = 0.0 + aoi = _scalar(result["aoi"]) + surf_tilt = _scalar(result["surface_tilt"]) + surf_az = _scalar(result["surface_azimuth"]) + return { + "tracker_theta": theta, + "aoi": 90.0 if np.isnan(aoi) else float(aoi), + "surface_tilt": 0.0 if np.isnan(surf_tilt) else float(surf_tilt), + "surface_azimuth": 0.0 if np.isnan(surf_az) else float(surf_az), + } + + def _panel_corners_local(self, tracker_tilt: float) -> np.ndarray: + """ + 4 panel corners in row-local frame (v=cross-row, u=along-row, z=up). + Returns shape (4, 3) as [v, u, z]. + pvlib convention: positive theta = panel faces +v (NE for axis 315°). + Negate theta so that positive theta lowers the +v edge (panel faces +v). + """ + half_w = self.panel_width / 2 + tilt_rad = np.radians(-tracker_tilt) # negate to match pvlib convention + cos_t, sin_t = np.cos(tilt_rad), np.sin(tilt_rad) + half_len = 1.0 # panel length along row (for visualization) + + # Corners: [v, u, z] — panel tilts in the v-z plane + return np.array([ + [-half_w * cos_t, -half_len, self.panel_height - half_w * sin_t], + [half_w * cos_t, -half_len, self.panel_height + half_w * sin_t], + [half_w * cos_t, half_len, self.panel_height + half_w * sin_t], + [-half_w * cos_t, half_len, self.panel_height - half_w * sin_t], + ]) + + def panel_corners_world(self, tracker_tilt: float, row_offset: float = 0.0) -> np.ndarray: + """ + Panel corners in world coordinates for a row at cross-row offset. + Returns shape (4, 3) as [x, y, z]. + """ + local = self._panel_corners_local(tracker_tilt) + world = np.zeros_like(local) + for i in range(4): + # v -> cross-row, u -> along-row + world[i, 0] = local[i, 0] * self._row_v[0] + local[i, 1] * self._row_u[0] + row_offset * self._row_v[0] + world[i, 1] = local[i, 0] * self._row_v[1] + local[i, 1] * self._row_u[1] + row_offset * self._row_v[1] + world[i, 2] = local[i, 2] + return world + + def vine_box_world(self, row_offset: float = 0.0) -> np.ndarray: + """ + 8 vine canopy box corners in world coordinates. + Returns shape (8, 3) as [x, y, z]. + """ + cw = self.canopy_width / 2 + ch = self.canopy_height + half_len = 1.5 # length along row for visualization + # Local corners [v, u, z] + local = np.array([ + [-cw, -half_len, 0], [cw, -half_len, 0], + [cw, half_len, 0], [-cw, half_len, 0], + [-cw, -half_len, ch], [cw, -half_len, ch], + [cw, half_len, ch], [-cw, half_len, ch], + ]) + world = np.zeros_like(local) + for i in range(8): + world[i, 0] = local[i, 0] * self._row_v[0] + local[i, 1] * self._row_u[0] + row_offset * self._row_v[0] + world[i, 1] = local[i, 0] * self._row_v[1] + local[i, 1] * self._row_u[1] + row_offset * self._row_v[1] + world[i, 2] = local[i, 2] + return world + + def project_shadow( + self, + solar_elevation: float, + solar_azimuth: float, + tracker_tilt: float | None = None, + ) -> np.ndarray: + """ + Combined shadow on a vine row from its own panel AND neighboring panels. + Returns boolean mask of shape (n_vertical, n_horizontal). + True = shaded, False = sunlit. + """ + if solar_elevation <= 2.0: + return np.ones((self.n_vertical, self.n_horizontal), dtype=bool) + + if tracker_tilt is None: + tracker_tilt = self.compute_tracker_tilt(solar_azimuth, solar_elevation)["tracker_theta"] + + # Aggregate shadows from own panel + immediate neighbors (east & west) + mask = np.zeros((self.n_vertical, self.n_horizontal), dtype=bool) + for panel_offset_idx in (-1, 0, 1): + source_offset = panel_offset_idx * self.row_spacing + m = self.project_shadow_on_row( + solar_elevation, solar_azimuth, tracker_tilt, + source_row_offset=source_offset, + target_row_offset=0.0, + ) + mask |= m + return mask + + def project_shadow_on_row( + self, + solar_elevation: float, + solar_azimuth: float, + tracker_tilt: float, + source_row_offset: float, + target_row_offset: float, + ) -> np.ndarray: + """ + Project shadow from panel at source_row_offset onto canopy at target_row_offset. + Uses ray-segment intersection: for each canopy zone, trace a ray toward the sun + and check if it hits the panel. This correctly handles shadow on the top face, + east face, and west face of the canopy rectangle. + Returns boolean mask (n_vertical, n_horizontal). + """ + if solar_elevation <= 2.0: + return np.zeros((self.n_vertical, self.n_horizontal), dtype=bool) + + elev_rad = np.radians(solar_elevation) + sun_horiz = np.array([np.cos(elev_rad) * np.sin(np.radians(solar_azimuth)), + np.cos(elev_rad) * np.cos(np.radians(solar_azimuth))]) + sun_cross = float(np.dot(sun_horiz, self._row_v)) + sun_z = np.sin(elev_rad) + + if sun_z <= 0.01: + return np.zeros((self.n_vertical, self.n_horizontal), dtype=bool) + + # Panel segment in cross-section (v-z plane), in target canopy frame + row_shift = target_row_offset - source_row_offset + corners_local = self._panel_corners_local(tracker_tilt) + # Panel lower and upper edges (2 unique v-z points in cross-section) + p1_v = corners_local[0, 0] - row_shift # lower edge v + p1_z = corners_local[0, 2] # lower edge z + p2_v = corners_local[1, 0] - row_shift # upper edge v + p2_z = corners_local[1, 2] # upper edge z + + # Panel segment direction + dv_p = p2_v - p1_v + dz_p = p2_z - p1_z + + mask = np.zeros((self.n_vertical, self.n_horizontal), dtype=bool) + + # Ray-segment intersection for each canopy zone + # Ray from (v_c, z_c) toward sun: (v_c + sun_cross*t, z_c + sun_z*t) + # Panel segment: (p1_v + s*dv_p, p1_z + s*dz_p), s in [0,1] + # Solve: v_c + sun_cross*t = p1_v + s*dv_p + # z_c + sun_z*t = p1_z + s*dz_p + det = sun_cross * dz_p - sun_z * dv_p + if abs(det) < 1e-10: + return mask # sun ray parallel to panel + + for iz, z_c in enumerate(self._grid_z): + for iv, v_c in enumerate(self._grid_v): + dv = p1_v - v_c + dz = p1_z - z_c + t = (dv * dz_p - dz * dv_p) / det + s = (dv * sun_z - dz * sun_cross) / det + # t > 0: panel is above/toward the sun (not behind canopy) + # 0 <= s <= 1: hit within panel width + mask[iz, iv] = (t > 0) and (0.0 <= s <= 1.0) + + return mask + + def shadow_world_polygon( + self, + solar_elevation: float, + solar_azimuth: float, + tracker_tilt: float, + row_offset: float = 0.0, + ) -> np.ndarray | None: + """ + Shadow polygon on canopy top in world coords for visualization. + Returns (4, 3) array or None if sun below horizon. + """ + if solar_elevation <= 2.0: + return None + + elev_rad = np.radians(solar_elevation) + az_rad = np.radians(solar_azimuth) + sun_x = np.cos(elev_rad) * np.sin(az_rad) + sun_y = np.cos(elev_rad) * np.cos(az_rad) + sun_z = np.sin(elev_rad) + + if sun_z <= 0.01: + return None + + corners = self.panel_corners_world(tracker_tilt, row_offset) + ch = self.canopy_height + shadow = np.zeros((4, 3)) + for i in range(4): + dz = corners[i, 2] - ch + shadow[i, 0] = corners[i, 0] - (sun_x / sun_z) * dz + shadow[i, 1] = corners[i, 1] - (sun_y / sun_z) * dz + shadow[i, 2] = ch + 0.01 + return shadow + + def compute_par_distribution( + self, + total_par: float, + shadow_mask: np.ndarray, + diffuse_fraction: float = 0.15, + solar_elevation: float | None = None, + solar_azimuth: float | None = None, + tracker_tilt: float | None = None, + ) -> np.ndarray: + """PAR per zone accounting for overhead shadow, side light, and diffuse. + + When solar position is provided, zones near the row edge on the + sun-facing side receive additional side light that enters between + panel rows. Vertical variation: top zones get more sky-view diffuse, + bottom zones get less. + + Falls back to the simple sunlit/shaded model when solar position is + not provided. + """ + n_v, n_h = shadow_mask.shape + par = np.full((n_v, n_h), total_par * diffuse_fraction, dtype=float) + + # --- Overhead direct beam: only where NOT shaded --- + par[~shadow_mask] = total_par + + # --- Side light from row gaps (requires solar geometry) --- + if solar_elevation is not None and solar_azimuth is not None and solar_elevation > 2: + sun_cross, sun_z = self._sun_cross_component(solar_elevation, solar_azimuth) + + # Normalise horizontal positions to [-1, +1] across canopy width + v_norm = np.linspace(-1.0, 1.0, n_h) + + # Side-light fraction: zones on the sun-facing edge of the canopy + # receive direct light between panel rows. The fraction decays + # linearly from the edge toward the interior. + # sun_cross > 0 → sun from +v side → right edge lit + # sun_cross < 0 → sun from -v side → left edge lit + if abs(sun_cross) > 0.01: + # How far each horizontal position is from the sun-facing edge (0-1) + if sun_cross > 0: + edge_proximity = np.clip((v_norm + 1) / 2, 0, 1) # 1 at +v edge + else: + edge_proximity = np.clip((1 - v_norm) / 2, 0, 1) # 1 at -v edge + + # Side light penetration depends on sun elevation: + # low sun → more horizontal light between rows + # high sun → less side penetration + horiz_factor = abs(sun_cross) / (abs(sun_cross) + sun_z) + + # Row-gap openness: fraction of sky visible from the side + gap_fraction = 1.0 - self.panel_width / self.row_spacing # ~0.62 + + # Side PAR contribution for each horizontal position + side_par = total_par * horiz_factor * gap_fraction * edge_proximity + + # Apply to shaded zones only (sunlit zones already have full PAR) + for iz in range(n_v): + for ih in range(n_h): + if shadow_mask[iz, ih]: + par[iz, ih] += side_par[ih] + + # --- Vertical variation in diffuse sky view --- + # Top zones see more open sky, bottom zones are self-shaded by + # upper canopy. Sky-view factor from 0.4 (bottom) to 1.0 (top). + z_fracs = np.linspace(0.0, 1.0, n_v) + sky_view = 0.4 + 0.6 * z_fracs # bottom=0.4, top=1.0 + diffuse_sky = total_par * diffuse_fraction * sky_view + for iz in range(n_v): + for ih in range(n_h): + if shadow_mask[iz, ih]: + # Replace base diffuse with sky-view-weighted diffuse + par[iz, ih] = (par[iz, ih] + - total_par * diffuse_fraction + + diffuse_sky[iz]) + + return np.clip(par, 0, total_par) + + def compute_face_par_zones( + self, + total_par: float, + solar_elevation: float, + solar_azimuth: float, + tracker_tilt: float, + diffuse_fraction: float = 0.15, + include_panels: bool = True, + ) -> dict: + """Compute PAR reaching each canopy face at zone level. + + Returns dict with: + east: array(3,) PAR at Bottom/Middle/Top zones on east face + west: array(3,) PAR at Bottom/Middle/Top zones on west face + top: array(3,) PAR at 3 positions across top face (W-edge, Centre, E-edge) + """ + n_z = self.n_vertical # 3 + ch = self.canopy_height + cw2 = self.canopy_width / 2 + + # Zone centres along canopy height + zone_z = np.linspace(ch / n_z / 2, ch - ch / n_z / 2, n_z) + + # Diffuse PAR with sky-view factor per height zone + z_fracs = np.linspace(0.0, 1.0, n_z) + sky_view = 0.4 + 0.6 * z_fracs # bottom=0.4, top=1.0 + diffuse_par = total_par * diffuse_fraction * sky_view + + east_par = diffuse_par.copy() + west_par = diffuse_par.copy() + + if solar_elevation <= 2.0 or total_par <= 0: + top_par = np.full(3, total_par * diffuse_fraction) + return {"east": east_par, "west": west_par, "top": top_par} + + sun_cross, sun_z = self._sun_cross_component(solar_elevation, solar_azimuth) + if sun_z <= 0.01: + top_par = np.full(3, total_par * diffuse_fraction) + return {"east": east_par, "west": west_par, "top": top_par} + + # Panel ray-tracing setup (reuses same logic as compute_face_shading) + if include_panels: + panels = [] + for off_idx in (-1, 0, 1): + row_shift = -off_idx * self.row_spacing + corners = self._panel_corners_local(tracker_tilt) + panels.append((corners[0, 0] - row_shift, corners[0, 2], + corners[1, 0] - row_shift, corners[1, 2])) + + def _shaded(v_c, z_c): + for p1_v, p1_z, p2_v, p2_z in panels: + dv_p, dz_p = p2_v - p1_v, p2_z - p1_z + det = sun_cross * dz_p - sun_z * dv_p + if abs(det) < 1e-10: + continue + dv, dz = p1_v - v_c, p1_z - z_c + t = (dv * dz_p - dz * dv_p) / det + s = (dv * sun_z - dz * sun_cross) / det + if t > 0 and 0.0 <= s <= 1.0: + return True + return False + else: + def _shaded(v_c, z_c): + return False + + # Side-light factor for vertical faces + gap_fraction = 1.0 - self.panel_width / self.row_spacing + horiz_factor = abs(sun_cross) / (abs(sun_cross) + sun_z) + side_direct = total_par * horiz_factor * gap_fraction + + # Self-shading: face normal away from sun receives no direct beam + east_self_shaded = (sun_cross <= 0) + west_self_shaded = (sun_cross >= 0) + + # East face (v = +cw2, normal = +v) + for iz in range(n_z): + if east_self_shaded: + east_par[iz] = diffuse_par[iz] * 0.3 # back face + elif not _shaded(cw2, zone_z[iz]): + east_par[iz] = side_direct + diffuse_par[iz] + # else: panel-shaded, stays at diffuse_par[iz] + + # West face (v = -cw2, normal = -v) + for iz in range(n_z): + if west_self_shaded: + west_par[iz] = diffuse_par[iz] * 0.3 + elif not _shaded(-cw2, zone_z[iz]): + west_par[iz] = side_direct + diffuse_par[iz] + + # Top face: 3 positions across width (W-edge, Centre, E-edge) + top_positions = np.linspace(-cw2, cw2, 3) + top_par = np.empty(3) + for ip, vp in enumerate(top_positions): + # Sample a small region around each position + offsets = np.linspace(vp - cw2 / 3, vp + cw2 / 3, 5) + offsets = np.clip(offsets, -cw2, cw2) + n_sunlit = sum(1 for v in offsets if not _shaded(v, ch)) + frac = n_sunlit / len(offsets) + top_par[ip] = total_par * frac + total_par * diffuse_fraction * (1 - frac) + + east_par = np.clip(east_par, 0, None) + west_par = np.clip(west_par, 0, None) + + return {"east": east_par, "west": west_par, "top": top_par} + + def sunlit_fraction(self, shadow_mask: np.ndarray) -> float: + """Fraction of canopy zones that are sunlit (0-1).""" + return float(1.0 - shadow_mask.mean()) + + def fruiting_zone_shadow( + self, + shadow_mask: np.ndarray, + fruiting_zone_idx: int | None = None, + ) -> dict: + """Report shading specifically on the fruiting zone (mid-canopy). + + Parameters + ---------- + shadow_mask : ndarray of shape (n_vertical, n_horizontal) + Boolean shadow mask (True = shaded). + fruiting_zone_idx : int, optional + Vertical zone index for the fruiting zone. Default from settings. + + Returns + ------- + dict with fruiting_zone_shaded_pct, fruiting_zone_sunlit_pct, + fruiting_zone_mask (boolean array of horizontal positions). + """ + if fruiting_zone_idx is None: + from config.settings import FRUITING_ZONE_INDEX + fruiting_zone_idx = FRUITING_ZONE_INDEX + + fz_row = shadow_mask[fruiting_zone_idx, :] # shape (n_horizontal,) + fz_shaded_fraction = float(fz_row.mean()) + return { + "fruiting_zone_shaded_pct": round(fz_shaded_fraction * 100, 1), + "fruiting_zone_sunlit_pct": round((1 - fz_shaded_fraction) * 100, 1), + "fruiting_zone_mask": fz_row, + } + + def evaluate_candidate_offsets( + self, + solar_elevation: float, + solar_azimuth: float, + theta_astro: float, + offsets: list[int | float] | None = None, + total_par: float = 1500.0, + fruiting_zone_idx: int | None = None, + ) -> dict: + """Evaluate shadow at astronomical angle + each candidate offset. + + For each offset, computes the shadow mask at tilt = theta_astro + offset, + then derives PAR distribution, overall sunlit fraction, and fruiting zone + metrics. Used by the TradeoffEngine to find the minimum effective dose. + + Parameters + ---------- + solar_elevation, solar_azimuth : float + Current sun position (degrees). + theta_astro : float + Astronomical (energy-maximizing) tracker tilt (degrees). + offsets : list of int/float, optional + Candidate offsets to evaluate. Default from settings.CANDIDATE_OFFSETS. + total_par : float + Incoming PAR (umol m-2 s-1). + fruiting_zone_idx : int, optional + Vertical zone index for the fruiting zone. Default from settings. + + Returns + ------- + dict keyed by offset value, each containing: + shadow_mask, par_distribution, sunlit_fraction, fruiting_zone, + top_canopy_sunlit_pct. + """ + if offsets is None: + from config.settings import CANDIDATE_OFFSETS + offsets = CANDIDATE_OFFSETS + + results = {} + for offset in offsets: + theta = theta_astro + offset + mask = self.project_shadow(solar_elevation, solar_azimuth, theta) + par_dist = self.compute_par_distribution( + total_par, mask, + solar_elevation=solar_elevation, + solar_azimuth=solar_azimuth, + tracker_tilt=theta, + ) + fz = self.fruiting_zone_shadow(mask, fruiting_zone_idx) + + # Top canopy (zone 2) sunlit fraction — protect > 70% for photosynthesis + top_zone_idx = self.n_vertical - 1 + top_sunlit_pct = round(float(1.0 - mask[top_zone_idx, :].mean()) * 100, 1) + + # Mean PAR across the fruiting zone + fz_idx = fruiting_zone_idx if fruiting_zone_idx is not None else 1 + fz_mean_par = round(float(par_dist[fz_idx, :].mean()), 1) + + results[offset] = { + "shadow_mask": mask, + "par_distribution": par_dist, + "sunlit_fraction": round(self.sunlit_fraction(mask), 3), + "fruiting_zone": fz, + "top_canopy_sunlit_pct": top_sunlit_pct, + "fruiting_zone_mean_par": fz_mean_par, + } + + return results + + def _sun_cross_component(self, solar_elevation: float, solar_azimuth: float) -> tuple[float, float]: + """Return (sun_cross, sun_z) — sun direction projected onto the cross-row plane.""" + elev_rad = np.radians(solar_elevation) + sun_horiz = np.array([np.cos(elev_rad) * np.sin(np.radians(solar_azimuth)), + np.cos(elev_rad) * np.cos(np.radians(solar_azimuth))]) + sun_cross = float(np.dot(sun_horiz, self._row_v)) + sun_z = np.sin(elev_rad) + return sun_cross, sun_z + + def compute_face_shading( + self, + solar_elevation: float, + solar_azimuth: float, + tracker_tilt: float, + n_samples: int = 20, + include_panels: bool = True, + ) -> dict: + """ + Compute shading on top, east, and west faces of the canopy box. + Includes self-shading: faces whose outward normal points away from the + sun receive no direct light (the vine box is opaque). + + When include_panels=False, only self-shading is computed (reference case). + + Faces: + - Top: z = canopy_height, width = canopy_width (normal = +z) + - East: v = +canopy_width/2, height = canopy_height (normal = +v, NE-facing) + - West: v = -canopy_width/2, height = canopy_height (normal = -v, SW-facing) + """ + cw2 = self.canopy_width / 2 + ch = self.canopy_height + top_area = self.canopy_width # 0.6 m + east_area = ch # 1.2 m + west_area = ch # 1.2 m + total_area = top_area + east_area + west_area + + _zero = { + "top_sunlit": 0.0, "east_sunlit": 0.0, "west_sunlit": 0.0, + "top_area": top_area, "east_area": east_area, "west_area": west_area, + "total_sunlit_area": 0.0, "total_area": total_area, + "sunlit_fraction": 0.0, + } + + if solar_elevation <= 2.0: + return _zero + + sun_cross, sun_z = self._sun_cross_component(solar_elevation, solar_azimuth) + + if sun_z <= 0.01: + return _zero + + # --- Self-shading: faces whose normal faces away from the sun --- + # East face normal = +v → sunlit only when sun_cross > 0 + # West face normal = -v → sunlit only when sun_cross < 0 + # Top face normal = +z → always sunlit during daytime + east_self_shaded = (sun_cross <= 0) + west_self_shaded = (sun_cross >= 0) + + # If a face is self-shaded, it's 0% sunlit regardless of panels + if east_self_shaded: + east_sunlit = 0.0 + if west_self_shaded: + west_sunlit = 0.0 + + if include_panels: + # Panel segments from 3 rows (own + neighbors) in target frame + panels = [] + for off_idx in (-1, 0, 1): + row_shift = -off_idx * self.row_spacing + corners = self._panel_corners_local(tracker_tilt) + p1_v = corners[0, 0] - row_shift + p1_z = corners[0, 2] + p2_v = corners[1, 0] - row_shift + p2_z = corners[1, 2] + panels.append((p1_v, p1_z, p2_v, p2_z)) + + def _is_panel_shaded(v_c, z_c): + """Check if a point is shaded by any panel.""" + for p1_v, p1_z, p2_v, p2_z in panels: + dv_p = p2_v - p1_v + dz_p = p2_z - p1_z + det = sun_cross * dz_p - sun_z * dv_p + if abs(det) < 1e-10: + continue + dv = p1_v - v_c + dz = p1_z - z_c + t = (dv * dz_p - dz * dv_p) / det + s = (dv * sun_z - dz * sun_cross) / det + if t > 0 and 0.0 <= s <= 1.0: + return True + return False + else: + def _is_panel_shaded(v_c, z_c): + return False + + # Sample top face: z = ch, v from -cw2 to +cw2 (never self-shaded) + top_v = np.linspace(-cw2, cw2, n_samples) + top_sunlit_count = sum(1 for v in top_v if not _is_panel_shaded(v, ch)) + top_sunlit = top_sunlit_count / n_samples + + # East face: skip sampling if self-shaded + if not east_self_shaded: + east_z = np.linspace(0, ch, n_samples) + east_sunlit_count = sum(1 for z in east_z if not _is_panel_shaded(cw2, z)) + east_sunlit = east_sunlit_count / n_samples + + # West face: skip sampling if self-shaded + if not west_self_shaded: + west_z = np.linspace(0, ch, n_samples) + west_sunlit_count = sum(1 for z in west_z if not _is_panel_shaded(-cw2, z)) + west_sunlit = west_sunlit_count / n_samples + + total_sunlit_area = top_sunlit * top_area + east_sunlit * east_area + west_sunlit * west_area + + return { + "top_sunlit": top_sunlit, + "east_sunlit": east_sunlit, + "west_sunlit": west_sunlit, + "top_area": top_area, + "east_area": east_area, + "west_area": west_area, + "total_sunlit_area": total_sunlit_area, + "total_area": total_area, + "sunlit_fraction": total_sunlit_area / total_area, + } + + def compute_face_shadow_bounds( + self, + solar_elevation: float, + solar_azimuth: float, + tracker_tilt: float, + n_samples: int = 30, + include_panels: bool = True, + ) -> dict: + """ + Compute shadow boundaries on each face for 3D visualization. + Includes self-shading: faces whose normal faces away from the sun + are entirely shaded. When include_panels=False, only self-shading. + Returns dict with shaded z-ranges on east/west faces and v-range on top face. + """ + cw2 = self.canopy_width / 2 + ch = self.canopy_height + + _all_shaded = {"top_shaded": (-cw2, cw2), "east_shaded": (0, ch), "west_shaded": (0, ch)} + + if solar_elevation <= 2.0: + return _all_shaded + + sun_cross, sun_z = self._sun_cross_component(solar_elevation, solar_azimuth) + + if sun_z <= 0.01: + return _all_shaded + + # Self-shading: faces whose normal faces away from the sun + east_self_shaded = (sun_cross <= 0) + west_self_shaded = (sun_cross >= 0) + + if include_panels: + panels = [] + for off_idx in (-1, 0, 1): + row_shift = -off_idx * self.row_spacing + corners = self._panel_corners_local(tracker_tilt) + panels.append((corners[0, 0] - row_shift, corners[0, 2], + corners[1, 0] - row_shift, corners[1, 2])) + + def _is_panel_shaded(v_c, z_c): + for p1_v, p1_z, p2_v, p2_z in panels: + dv_p, dz_p = p2_v - p1_v, p2_z - p1_z + det = sun_cross * dz_p - sun_z * dv_p + if abs(det) < 1e-10: + continue + dv, dz = p1_v - v_c, p1_z - z_c + t = (dv * dz_p - dz * dv_p) / det + s = (dv * sun_z - dz * sun_cross) / det + if t > 0 and 0.0 <= s <= 1.0: + return True + return False + else: + def _is_panel_shaded(v_c, z_c): + return False + + # Top face: find shaded v-range (never self-shaded) + top_v = np.linspace(-cw2, cw2, n_samples) + top_shaded = [v for v in top_v if _is_panel_shaded(v, ch)] + top_bounds = (min(top_shaded), max(top_shaded)) if top_shaded else None + + # East face: entirely self-shaded when sun_cross <= 0 + if east_self_shaded: + east_bounds = (0, ch) + else: + east_z = np.linspace(0, ch, n_samples) + east_shaded = [z for z in east_z if _is_panel_shaded(cw2, z)] + east_bounds = (min(east_shaded), max(east_shaded)) if east_shaded else None + + # West face: entirely self-shaded when sun_cross >= 0 + if west_self_shaded: + west_bounds = (0, ch) + else: + west_z = np.linspace(0, ch, n_samples) + west_shaded = [z for z in west_z if _is_panel_shaded(-cw2, z)] + west_bounds = (min(west_shaded), max(west_shaded)) if west_shaded else None + + return {"top_shaded": top_bounds, "east_shaded": east_bounds, "west_shaded": west_bounds} diff --git a/src/shading/tracker_optimizer.py b/src/shading/tracker_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..ed56d4e084cbcf6d1399f7cc1c979063a109eebd --- /dev/null +++ b/src/shading/tracker_optimizer.py @@ -0,0 +1,267 @@ +""" +Tracker Optimizer: simulate agrivoltaic shading scenarios. +Uses FarquharModel + ShadowModel to compute A at different tracker tilt +angles, then finds the optimal energy/crop tradeoff. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import numpy as np +import pandas as pd + +PROJECT_ROOT = Path(__file__).resolve().parent.parent +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +from src.farquhar_model import FarquharModel +from src.solar_geometry import ShadowModel + +_model = FarquharModel() +_shadow = ShadowModel() + + +def load_sensor_data() -> pd.DataFrame: + """Load sensor sample, filter daytime PAR > 50, add helper columns.""" + from src.sensor_data_loader import SensorDataLoader + + loader = SensorDataLoader() + df = loader.load() + df = loader.filter_daytime(df) + df["time"] = pd.to_datetime(df["time"], utc=True) + df["hour"] = df["time"].dt.hour + df["time"].dt.minute / 60 + df["date"] = df["time"].dt.date + df["delta_t"] = df["Air1_leafTemperature_ref"] - df["Air1_airTemperature_ref"] + return df + + +def compute_stress_heatmap(df: pd.DataFrame) -> pd.DataFrame: + """Pivot table: hour-of-day (int) vs date, values = mean deltaT. + Restricted to daytime hours (5:00-19:00 UTC) — no stress at night.""" + tmp = df.copy() + tmp["hour_int"] = tmp["time"].dt.hour + # Keep only daytime hours (sunrise ~5 UTC, sunset ~19 UTC for Sde Boker) + tmp = tmp[(tmp["hour_int"] >= 5) & (tmp["hour_int"] <= 19)] + pivot = tmp.pivot_table( + values="delta_t", index="hour_int", columns="date", aggfunc="mean", + ) + # Ensure all daytime hours are represented even if some have no data + full_hours = list(range(5, 20)) + pivot = pivot.reindex(full_hours) + return pivot + + +def _compute_A_at_par(row: pd.Series, par_factor: float) -> float: + """Compute A for a single row with PAR scaled by par_factor.""" + par = float(row["Air1_PAR_ref"]) * par_factor + if par <= 0: + return 0.0 + return _model.calc_photosynthesis( + PAR=par, + Tleaf=float(row["Air1_leafTemperature_ref"]), + CO2=float(row["Air1_CO2_ref"]), + VPD=float(row["Air1_VPD_ref"]), + Tair=float(row["Air1_airTemperature_ref"]), + ) + + +def _compute_A_at_par_value(row: pd.Series, par_value: float) -> float: + """Compute A for a single row with an absolute PAR value.""" + if par_value <= 0: + return 0.0 + return _model.calc_photosynthesis( + PAR=par_value, + Tleaf=float(row["Air1_leafTemperature_ref"]), + CO2=float(row["Air1_CO2_ref"]), + VPD=float(row["Air1_VPD_ref"]), + Tair=float(row["Air1_airTemperature_ref"]), + ) + + +def simulate_tilt_angles( + df: pd.DataFrame, + angles: list[int] | None = None, +) -> pd.DataFrame: + """ + For each tilt angle offset from astronomical, compute mean A and energy + fraction across the dataset using the shadow model. + Returns DataFrame with columns: angle, energy_pct, mean_A, A_pct. + """ + if angles is None: + angles = [0, 5, 10, 15, 20, 25, 30, 35, 40, 45] + + # Precompute solar positions for all timestamps + times = pd.DatetimeIndex(df["time"]) + solar_pos = _shadow.get_solar_position(times) + + # Baseline A at astronomical tracking (offset=0) + baseline_A_values = [] + for idx, (_, row) in enumerate(df.iterrows()): + elev = solar_pos["solar_elevation"].iloc[idx] + azim = solar_pos["solar_azimuth"].iloc[idx] + if elev <= 2: + baseline_A_values.append(0.0) + continue + tracker = _shadow.compute_tracker_tilt(azim, elev) + theta_astro = tracker["tracker_theta"] + mask = _shadow.project_shadow(elev, azim, theta_astro) + par_dist = _shadow.compute_par_distribution( + float(row["Air1_PAR_ref"]), mask, + solar_elevation=elev, solar_azimuth=azim, tracker_tilt=theta_astro) + # Canopy-average PAR (weighted by LAI) + par_avg = float(np.average(par_dist, weights=_shadow.lai_weights, axis=0).mean()) + baseline_A_values.append(_compute_A_at_par_value(row, par_avg)) + baseline_A = np.mean(baseline_A_values) + + results = [] + for angle_offset in angles: + A_values = [] + energy_factors = [] + for idx, (_, row) in enumerate(df.iterrows()): + elev = solar_pos["solar_elevation"].iloc[idx] + azim = solar_pos["solar_azimuth"].iloc[idx] + if elev <= 2: + A_values.append(0.0) + energy_factors.append(1.0) + continue + tracker = _shadow.compute_tracker_tilt(azim, elev) + theta_astro = tracker["tracker_theta"] + aoi_astro = tracker["aoi"] + + # Apply offset + theta_shade = theta_astro + angle_offset + mask = _shadow.project_shadow(elev, azim, theta_shade) + par_dist = _shadow.compute_par_distribution( + float(row["Air1_PAR_ref"]), mask, + solar_elevation=elev, solar_azimuth=azim, tracker_tilt=theta_shade) + par_avg = float(np.average(par_dist, weights=_shadow.lai_weights, axis=0).mean()) + A_values.append(_compute_A_at_par_value(row, par_avg)) + + # Energy: cos(aoi) ratio between offset and astronomical + cos_astro = max(0.0, np.cos(np.radians(aoi_astro))) + cos_offset = max(0.0, np.cos(np.radians(aoi_astro + angle_offset))) + energy_factors.append( + cos_offset / cos_astro if cos_astro > 0.01 else 1.0) + + mean_A = np.mean(A_values) + energy_pct = np.mean(energy_factors) * 100 + A_pct = (mean_A / baseline_A * 100) if baseline_A > 0 else 0 + results.append({ + "angle": angle_offset, + "energy_pct": energy_pct, + "mean_A": mean_A, + "A_pct": A_pct, + }) + return pd.DataFrame(results) + + +def compute_daily_schedule( + df: pd.DataFrame, + stress_threshold: float = 2.0, + shade_angle: int = 20, +) -> pd.DataFrame: + """ + For each 15-min slot: compute astronomical tracking angle (pvlib), + and if deltaT > threshold, offset by shade_angle to shade the vine. + Computes A for both strategies using the shadow model. + """ + times = pd.DatetimeIndex(df["time"]) + solar_pos = _shadow.get_solar_position(times) + + records = [] + for idx, (_, row) in enumerate(df.iterrows()): + dt = float(row["delta_t"]) if pd.notna(row["delta_t"]) else 0.0 + stressed = dt > stress_threshold + elev = solar_pos["solar_elevation"].iloc[idx] + azim = solar_pos["solar_azimuth"].iloc[idx] + + if elev <= 2: + records.append({ + "time": row["time"], + "hour": row["hour"], + "delta_t": dt, + "stressed": stressed, + "tracker_angle": 0.0, + "recommended_angle": 0.0, + "A_baseline": 0.0, + "A_smart": 0.0, + "energy_fraction": 1.0, + }) + continue + + # Astronomical tracking angle (full sun-following) + tracker = _shadow.compute_tracker_tilt(azim, elev) + theta_astro = tracker["tracker_theta"] + aoi_astro = tracker["aoi"] + + # Baseline: full astronomical tracking + mask_baseline = _shadow.project_shadow(elev, azim, theta_astro) + par_dist_baseline = _shadow.compute_par_distribution( + float(row["Air1_PAR_ref"]), mask_baseline, + solar_elevation=elev, solar_azimuth=azim, tracker_tilt=theta_astro) + par_avg_baseline = float( + np.average(par_dist_baseline, weights=_shadow.lai_weights, axis=0).mean()) + A_baseline = _compute_A_at_par_value(row, par_avg_baseline) + + # Smart: offset by shade_angle when stressed + if stressed: + theta_smart = theta_astro + shade_angle + mask_smart = _shadow.project_shadow(elev, azim, theta_smart) + par_dist_smart = _shadow.compute_par_distribution( + float(row["Air1_PAR_ref"]), mask_smart, + solar_elevation=elev, solar_azimuth=azim, tracker_tilt=theta_smart) + par_avg_smart = float( + np.average(par_dist_smart, weights=_shadow.lai_weights, axis=0).mean()) + A_smart = _compute_A_at_par_value(row, par_avg_smart) + + cos_astro = max(0.0, np.cos(np.radians(aoi_astro))) + cos_offset = max(0.0, np.cos(np.radians(aoi_astro + shade_angle))) + energy_frac = cos_offset / cos_astro if cos_astro > 0.01 else 1.0 + else: + theta_smart = theta_astro + A_smart = A_baseline + energy_frac = 1.0 + + records.append({ + "time": row["time"], + "hour": row["hour"], + "delta_t": dt, + "stressed": stressed, + "tracker_angle": theta_astro, + "recommended_angle": theta_smart, + "A_baseline": A_baseline, + "A_smart": A_smart, + "energy_fraction": energy_frac, + }) + return pd.DataFrame(records) + + +def compute_season_summary(schedule: pd.DataFrame) -> dict: + """Aggregate season totals from the daily schedule.""" + total_slots = len(schedule) + stress_slots = schedule["stressed"].sum() + stress_hours = stress_slots * 0.25 # 15-min slots + + energy_baseline = total_slots # each slot = 1 unit at full tracking + energy_smart = schedule["energy_fraction"].sum() + energy_pct = (energy_smart / energy_baseline * 100) if energy_baseline > 0 else 100 + + A_baseline_total = schedule["A_baseline"].sum() + A_smart_total = schedule["A_smart"].sum() + + A_change_pct = ((A_smart_total - A_baseline_total) / A_baseline_total * 100) if A_baseline_total > 0 else 0 + + # Water savings estimate: each stress hour shaded reduces transpiration demand + water_savings_pct = min(30.0, stress_hours * 0.08) + + return { + "energy_pct": energy_pct, + "A_baseline_total": A_baseline_total, + "A_smart_total": A_smart_total, + "A_change_pct": A_change_pct, + "stress_hours": stress_hours, + "total_hours": total_slots * 0.25, + "water_savings_pct": water_savings_pct, + } diff --git a/src/shading/tradeoff_engine.py b/src/shading/tradeoff_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..b5ecd07e82abc93c6daf8f9ec60256631b55b867 --- /dev/null +++ b/src/shading/tradeoff_engine.py @@ -0,0 +1,557 @@ +""" +TradeoffEngine: InterventionGate + minimum-dose search for SolarWine 2.0. + +Design philosophy +----------------- +The tracker's default is ALWAYS full astronomical tracking (maximum energy +generation). Shading is an exception, not a rule. + +The decision logic has two clean responsibilities: + + InterventionGate — "Is the vine significantly stressed?" + Asks only physiological questions — time of day and month are NOT checked + here. The sun geometry handles those cases naturally: + 1. Is there meaningful sunlight? (GHI > MIN_MEANINGFUL_GHI — night/cloud guard) + 2. Is the leaf temperature above the Rubisco transition? (Tleaf ≥ 30°C) + 3. Is water stress confirmed by sensors? (CWSI ≥ 0.4) + 4. Is irradiance high enough to cause real heat load? (GHI ≥ 400 W/m²) + 5. Does the FvCB model agree shading would help? (shading_helps = True) + + TradeoffEngine — "Does any offset actually help the fruiting zone right now?" + Uses the 3D ShadowModel to ray-trace each candidate offset and checks: + (a) fruiting-zone PAR drops below FRUITING_ZONE_TARGET_PAR (400 µmol) + (b) top-canopy PAR stays ≥ 70% of ambient (apical leaves remain productive) + (c) energy sacrifice ≤ remaining per-slot budget + Returns the SMALLEST offset satisfying all three, or offset=0 (stay put). + +Why manual time/month rules are replaced by geometry +---------------------------------------------------- +- Morning (9:00, sun in east): astronomical tilt already faces east. No positive + offset places the shadow over the fruiting zone; find_minimum_dose() returns + no effective dose naturally. +- May (fruit-set, low stress): CWSI < 0.4 and Tleaf < 30°C → gate blocks. + In rare extreme heat: gate passes, but if the geometry still doesn't deliver + shade to the fruiting zone, no dose is selected. +- Overcast (GHI < 100 W/m²): MIN_MEANINGFUL_GHI guard fires. +- Every other edge case: geometry decides, not the calendar. +""" + +from __future__ import annotations + +import math +from dataclasses import dataclass, field +from datetime import datetime +from typing import List, Optional + +from config.settings import ( + CANDIDATE_OFFSETS, + FRUITING_ZONE_INDEX, + FRUITING_ZONE_TARGET_PAR, + MIN_MEANINGFUL_GHI, + SHADE_ELIGIBLE_CWSI_ABOVE, + SHADE_ELIGIBLE_GHI_ABOVE, + SHADE_ELIGIBLE_TLEAF_ABOVE, +) + +# Top-canopy zone index in the 3-zone ShadowModel (0=basal, 1=fruiting, 2=apical) +_TOP_ZONE_INDEX = 2 + +# Relative thresholds used in find_minimum_dose (relative to astronomical baseline) +# The panel at astronomical tracking already shades the canopy substantially at high +# sun elevations. These thresholds compare offset vs. baseline, not vs. ambient. +_FRUITING_IMPROVEMENT_MIN = 0.85 # sun-side fruiting PAR must drop ≥ 15% below astro baseline +_TOP_CANOPY_TOLERANCE = 0.85 # top canopy must retain ≥ 85% of its astro-baseline PAR + + +# --------------------------------------------------------------------------- +# InterventionGate +# --------------------------------------------------------------------------- + +@dataclass +class GateDecision: + """Result of the InterventionGate evaluation.""" + + passed: bool + rejection_reason: Optional[str] = None + + # Diagnostic flags (for SimulationLog explainability) + no_meaningful_sun: bool = False + tleaf_below_threshold: bool = False + cwsi_below_threshold: bool = False + ghi_below_threshold: bool = False + biology_says_shade_helps: bool = False + + def decision_tags(self) -> List[str]: + if not self.passed and self.rejection_reason: + return [f"gate_blocked:{self.rejection_reason}"] + return ["gate_passed"] + + +class InterventionGate: + """ + Physiological pass/fail check — answers "is the vine stressed enough to + consider intervention?" + + Time of day, month, and sun angle are NOT evaluated here. + The 3D ShadowModel in TradeoffEngine determines whether a candidate offset + can geometrically deliver shade to the fruiting zone for any given sun position. + + Default answer: NO (full astronomical tracking). + Gate opens only when ALL stress conditions are simultaneously met. + """ + + def __init__( + self, + min_meaningful_ghi: float = MIN_MEANINGFUL_GHI, + shade_eligible_tleaf_above: float = SHADE_ELIGIBLE_TLEAF_ABOVE, + shade_eligible_cwsi_above: float = SHADE_ELIGIBLE_CWSI_ABOVE, + shade_eligible_ghi_above: float = SHADE_ELIGIBLE_GHI_ABOVE, + ) -> None: + self.min_meaningful_ghi = min_meaningful_ghi + self.shade_eligible_tleaf_above = shade_eligible_tleaf_above + self.shade_eligible_cwsi_above = shade_eligible_cwsi_above + self.shade_eligible_ghi_above = shade_eligible_ghi_above + + def evaluate( + self, + tleaf_c: Optional[float], + ghi_w_m2: Optional[float], + cwsi: Optional[float], + shading_helps: Optional[bool], + dt: Optional[datetime] = None, # accepted but not used; preserved for logging + ) -> GateDecision: + """ + Evaluate whether the vine is significantly stressed. + + Parameters + ---------- + tleaf_c : leaf temperature (°C) — from SensorRaw or forecast + ghi_w_m2 : global horizontal irradiance (W/m²) — from IMS or TB + cwsi : Crop Water Stress Index [0–1] — from TB or computed + shading_helps : output of FarquharModel — True only when Rubisco-limited + AND reducing PAR would increase net A + dt : slot datetime (optional; used only for logging tags) + + Returns + ------- + GateDecision + passed=True only when all physiological stress conditions are met. + The caller then passes to TradeoffEngine.find_minimum_dose() to + determine whether the current sun geometry allows effective shading. + """ + dec = GateDecision(passed=False) + + # 1. Night / deep overcast guard — no useful sun, skip shadow computation + if ghi_w_m2 is not None and ghi_w_m2 < self.min_meaningful_ghi: + dec.no_meaningful_sun = True + dec.rejection_reason = ( + f"no_meaningful_sun:GHI={ghi_w_m2:.0f} W/m² " + f"< {self.min_meaningful_ghi:.0f}" + ) + return dec + + # 2. Leaf temperature below Rubisco transition — vine is light-limited, + # reducing PAR would hurt photosynthesis + if tleaf_c is not None and tleaf_c < self.shade_eligible_tleaf_above: + dec.tleaf_below_threshold = True + dec.rejection_reason = ( + f"no_heat_stress:Tleaf={tleaf_c:.1f}°C " + f"< {self.shade_eligible_tleaf_above:.0f}°C (Rubisco transition)" + ) + return dec + + # 3. Water stress not confirmed — vine is well-watered, no urgent need + if cwsi is not None and cwsi < self.shade_eligible_cwsi_above: + dec.cwsi_below_threshold = True + dec.rejection_reason = ( + f"no_water_stress:CWSI={cwsi:.2f} " + f"< {self.shade_eligible_cwsi_above:.2f}" + ) + return dec + + # 4. Radiation load not high enough to cause meaningful heat build-up + if ghi_w_m2 is not None and ghi_w_m2 < self.shade_eligible_ghi_above: + dec.ghi_below_threshold = True + dec.rejection_reason = ( + f"low_radiation:GHI={ghi_w_m2:.0f} W/m² " + f"< {self.shade_eligible_ghi_above:.0f} W/m²" + ) + return dec + + # 5. Biology confirms shading would help — FvCB model is Rubisco-limited + # and A would increase if PAR on the fruiting zone drops + dec.biology_says_shade_helps = bool(shading_helps) + if not shading_helps: + dec.rejection_reason = ( + "biology:shading_helps=False — vine is RuBP-limited despite high Tleaf; " + "possibly declining afternoon PAR or unusual conditions" + ) + return dec + + # All stress conditions met — pass to TradeoffEngine for geometry check + dec.passed = True + return dec + + +# --------------------------------------------------------------------------- +# TradeoffEngine +# --------------------------------------------------------------------------- + +@dataclass +class DoseResult: + """Result of the minimum-dose offset search.""" + + success: bool + chosen_offset_deg: float = 0.0 + offsets_tested: List[float] = field(default_factory=list) + fruiting_par_at_chosen: Optional[float] = None # µmol m⁻² s⁻¹ + top_par_fraction: Optional[float] = None # top_par / ambient_par + energy_sacrifice_fraction: Optional[float] = None # approx 1 − cos(offset) + rationale: str = "" + + def decision_tags(self) -> List[str]: + if self.success: + tags = [f"dose:{self.chosen_offset_deg:.0f}deg"] + if self.fruiting_par_at_chosen is not None: + tags.append(f"fruiting_par:{self.fruiting_par_at_chosen:.0f}") + return tags + return ["no_effective_dose"] + + +class TradeoffEngine: + """ + Minimum-effective-dose search over candidate tilt offsets. + + For each offset (smallest first), ray-traces the shadow at + θ_astro + offset using the 3D ShadowModel and returns the FIRST offset + that simultaneously: + (a) reduces fruiting-zone PAR below FRUITING_ZONE_TARGET_PAR + (b) keeps top-canopy PAR ≥ 70% of ambient (preserves apical productivity) + (c) costs ≤ the available slot budget (energy sacrifice fraction) + + Falls back to offset=0 (stay at astronomical) if no offset qualifies. + + Conditions (a) and (b) are geometry-only — they naturally encode the + morning/evening cases where the sun angle means any offset either + over-shades the whole canopy or misses the fruiting zone entirely. + """ + + def __init__( + self, + shadow_model=None, + candidate_offsets: Optional[List[float]] = None, + fruiting_zone_target_par: float = FRUITING_ZONE_TARGET_PAR, + fruiting_zone_index: int = FRUITING_ZONE_INDEX, + top_canopy_min_sunlit: float = _TOP_CANOPY_TOLERANCE, + ) -> None: + self._shadow_model = shadow_model + self.candidate_offsets = ( + [o for o in CANDIDATE_OFFSETS if o > 0] + if candidate_offsets is None + else candidate_offsets + ) + self.fruiting_zone_target_par = fruiting_zone_target_par + self.fruiting_zone_index = fruiting_zone_index + self.top_canopy_min_sunlit = top_canopy_min_sunlit + + @property + def shadow_model(self): + if self._shadow_model is None: + from src.solar_geometry import ShadowModel + self._shadow_model = ShadowModel() + return self._shadow_model + + def find_minimum_dose( + self, + ambient_par_umol: float, + solar_elevation_deg: float, + solar_azimuth_deg: float, + astronomical_tilt_deg: float, + max_sacrifice_fraction: float = 1.0, + diffuse_fraction: float = 0.15, + ) -> DoseResult: + """ + Find the smallest tilt offset that meaningfully protects the fruiting zone + without disproportionately sacrificing top-canopy productivity. + + Offset direction + ---------------- + The offset is applied TOWARD HORIZONTAL — i.e. it reduces the absolute + tilt angle. This is the direction that increases overhead shadow footprint + on the vine below the panel. + morning (astro_tilt > 0, panel faces east): trial = astro − offset + afternoon (astro_tilt < 0, panel faces west): trial = astro + offset + near-noon (astro_tilt ≈ 0): panel already near-horizontal; no beneficial offset. + + Conditions (evaluated relative to the astronomical-tracking baseline) + ----------------------------------------------------------------------- + A. Sun-side fruiting-face PAR drops below FRUITING_ZONE_TARGET_PAR (400 µmol). + "Sun-side" = whichever vertical face receives more direct beam right now. + This is the face where sunburn risk is highest. + + B. Top canopy does not lose more than TOP_CANOPY_TOLERANCE (15%) of its + astronomical-baseline PAR. Computed from the horizontal top face of the + canopy (top[]) which is most sensitive to panel tilt changes. + + C. Energy sacrifice (1 − cos(offset)) ≤ max_sacrifice_fraction. + + Parameters + ---------- + ambient_par_umol : total above-canopy PAR (µmol m⁻² s⁻¹) + solar_elevation_deg : solar elevation above horizon (°) + solar_azimuth_deg : solar azimuth (°) + astronomical_tilt_deg : sun-following tilt from pvlib (°, +east / −west) + max_sacrifice_fraction : per-slot energy budget ceiling (fraction of max gen) + diffuse_fraction : diffuse fraction of ambient PAR (default 0.15) + """ + if ambient_par_umol <= 0 or solar_elevation_deg <= 2: + return DoseResult( + success=False, + rationale="Solar elevation ≤ 2° or PAR = 0; no shading meaningful.", + ) + + # Near-noon: panel already near-horizontal; moving toward horizontal + # adds negligible additional shade. Skip entirely. + if abs(astronomical_tilt_deg) < 3.0: + return DoseResult( + success=False, + rationale=( + f"Near-noon: astro_tilt={astronomical_tilt_deg:.1f}° already near-horizontal; " + "no beneficial offset direction." + ), + ) + + # Geometric feasibility pre-check: + # The panel (width=panel_w, center height=panel_h) can only intercept + # direct beam on the vine's vertical face (fruiting zone at fruiting_z) + # when the required horizontal reach is ≤ half the panel width. + # Below this elevation threshold, the direct side-beam always bypasses + # the panel and tilt offsets cannot reduce fruiting-face PAR. + # For this site (panel_w=1.13m, panel_h=2.05m, fruiting_z=0.6m): + # min_elevation ≈ arctan((2.05-0.6)/(1.13/2)) ≈ 68.6° + panel_half = self.shadow_model.panel_width / 2.0 + panel_height = self.shadow_model.panel_height + from config.settings import FRUITING_ZONE_HEIGHT_M + min_elev_for_side_block = math.degrees( + math.atan((panel_height - FRUITING_ZONE_HEIGHT_M) / max(panel_half, 0.001)) + ) + if solar_elevation_deg < min_elev_for_side_block: + return DoseResult( + success=False, + rationale=( + f"Solar elevation {solar_elevation_deg:.1f}° < {min_elev_for_side_block:.1f}° " + f"— direct beam bypasses panel (panel half-width {panel_half:.3f}m reaches only " + f"{(panel_height - FRUITING_ZONE_HEIGHT_M) / math.tan(math.radians(solar_elevation_deg)):.2f}m " + f"vs {panel_half:.3f}m needed). Tracker stays at θ_astro; passive overhead " + "shading provides all available protection." + ), + ) + + # Baseline at astronomical tracking + try: + astro_pz = self.shadow_model.compute_face_par_zones( + total_par=ambient_par_umol, + solar_elevation=solar_elevation_deg, + solar_azimuth=solar_azimuth_deg, + tracker_tilt=astronomical_tilt_deg, + diffuse_fraction=diffuse_fraction, + ) + except Exception as exc: + return DoseResult( + success=False, + rationale=f"Shadow model error at baseline: {exc}", + ) + + # Sun-side face: the face receiving more direct beam has higher sunburn risk + east_astro = float(astro_pz["east"][self.fruiting_zone_index]) + west_astro = float(astro_pz["west"][self.fruiting_zone_index]) + sun_side = "west" if west_astro >= east_astro else "east" + fruiting_par_astro = max(east_astro, west_astro) + + # Top canopy baseline: maximum across the horizontal top face + top_astro = float(max(astro_pz["top"])) + + # Offset direction: toward horizontal to increase overhead shadow + sign_astro = 1 if astronomical_tilt_deg > 0 else -1 + + tested: List[float] = [] + + for offset in self.candidate_offsets: + tested.append(offset) + + # Apply offset toward horizontal + trial_tilt = astronomical_tilt_deg - sign_astro * offset + + try: + trial_pz = self.shadow_model.compute_face_par_zones( + total_par=ambient_par_umol, + solar_elevation=solar_elevation_deg, + solar_azimuth=solar_azimuth_deg, + tracker_tilt=trial_tilt, + diffuse_fraction=diffuse_fraction, + ) + except Exception: + continue + + east_trial = float(trial_pz["east"][self.fruiting_zone_index]) + west_trial = float(trial_pz["west"][self.fruiting_zone_index]) + fruiting_par_trial = east_trial if sun_side == "east" else west_trial + top_par_trial = float(max(trial_pz["top"])) + + top_par_fraction = top_par_trial / ambient_par_umol + sacrifice_fraction = 1.0 - math.cos(math.radians(offset)) + + # Condition A: sun-side fruiting face meaningfully shaded + # - Below absolute sunburn threshold, AND + # - At least 15% reduction from astronomical baseline + cond_a = ( + fruiting_par_trial < self.fruiting_zone_target_par + and fruiting_par_trial <= fruiting_par_astro * _FRUITING_IMPROVEMENT_MIN + ) + + # Condition B: top canopy doesn't lose more than tolerance% vs astronomical + # (panel at astro already shades canopy top substantially; we must not + # make it significantly worse, but the absolute fraction is not the goal) + cond_b = ( + top_astro <= 0 # astronomical already zero → no further degradation check + or top_par_trial >= top_astro * _TOP_CANOPY_TOLERANCE + ) + + # Condition C: energy sacrifice within budget + cond_c = sacrifice_fraction <= max_sacrifice_fraction + + if cond_a and cond_b and cond_c: + return DoseResult( + success=True, + chosen_offset_deg=float(offset), + offsets_tested=tested, + fruiting_par_at_chosen=round(fruiting_par_trial, 1), + top_par_fraction=round(top_par_fraction, 3), + energy_sacrifice_fraction=round(sacrifice_fraction, 5), + rationale=( + f"Offset {offset}° (trial_tilt={trial_tilt:.1f}°): " + f"{sun_side}-face fruiting PAR {fruiting_par_trial:.0f} µmol " + f"(astro={fruiting_par_astro:.0f}, target <{self.fruiting_zone_target_par:.0f}), " + f"top canopy {top_par_trial:.0f}/{top_astro:.0f} µmol " + f"({top_par_trial / max(top_astro, 1) * 100:.0f}% of baseline), " + f"sacrifice {sacrifice_fraction * 100:.2f}%." + ), + ) + + # No offset qualified — build diagnostic + rationale_parts = [ + f"No offset in {self.candidate_offsets}° (toward-horizontal) satisfied conditions. " + f"Baseline: {sun_side}-face fruiting={fruiting_par_astro:.0f} µmol, " + f"top={top_astro:.0f} µmol. Staying at θ_astro." + ] + if tested: + last = tested[-1] + last_tilt = astronomical_tilt_deg - sign_astro * last + try: + pz = self.shadow_model.compute_face_par_zones( + total_par=ambient_par_umol, + solar_elevation=solar_elevation_deg, + solar_azimuth=solar_azimuth_deg, + tracker_tilt=last_tilt, + diffuse_fraction=diffuse_fraction, + ) + fp = pz["east"][self.fruiting_zone_index] if sun_side == "east" else pz["west"][self.fruiting_zone_index] + tp = max(pz["top"]) + sf = 1.0 - math.cos(math.radians(last)) + fails = [] + if not (fp < self.fruiting_zone_target_par and fp <= fruiting_par_astro * _FRUITING_IMPROVEMENT_MIN): + fails.append( + f"fruiting {fp:.0f} µmol (need <{self.fruiting_zone_target_par:.0f} " + f"and ≤{fruiting_par_astro * _FRUITING_IMPROVEMENT_MIN:.0f})" + ) + if top_astro > 0 and tp < top_astro * _TOP_CANOPY_TOLERANCE: + fails.append( + f"top canopy {tp:.0f} µmol < {top_astro * _TOP_CANOPY_TOLERANCE:.0f} " + f"({_TOP_CANOPY_TOLERANCE * 100:.0f}% of baseline {top_astro:.0f})" + ) + if sf > max_sacrifice_fraction: + fails.append(f"sacrifice {sf * 100:.2f}% > budget {max_sacrifice_fraction * 100:.2f}%") + rationale_parts.append(f"At {last}°: {'; '.join(fails) or 'unknown'}.") + except Exception: + pass + + return DoseResult( + success=False, + chosen_offset_deg=0.0, + offsets_tested=tested, + rationale=" ".join(rationale_parts), + ) + + @staticmethod + def energy_sacrifice_fraction(offset_deg: float) -> float: + """Approximate per-slot energy sacrifice: 1 − cos(offset_deg).""" + return 1.0 - math.cos(math.radians(offset_deg)) + + +# --------------------------------------------------------------------------- +# CLI smoke test +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + from src.solar_geometry import ShadowModel + + # --- InterventionGate tests --- + gate = InterventionGate() + print("=== InterventionGate (geometry-first) ===\n") + + cases = [ + # (tleaf, ghi, cwsi, helps, label) + (33.0, 800, 0.5, True, "All stress conditions met → PASS (geometry decides next)"), + (25.0, 800, 0.5, True, "Tleaf < 30°C → no heat stress"), + (33.0, 50, 0.5, True, "GHI < 100 → night/deep cloud"), + (33.0, 800, 0.2, True, "CWSI < 0.4 → vine healthy"), + (33.0, 300, 0.5, True, "GHI < 400 → low radiation load"), + (33.0, 800, 0.5, False, "FvCB says shading hurts → RuBP-limited"), + # Cases that were previously hard-blocked by time/month rules: + (33.0, 800, 0.5, True, "9:00 morning (no longer blocked — geometry will decide)"), + (33.0, 800, 0.5, True, "May heat wave (no longer blocked — geometry will decide)"), + ] + for tleaf, ghi, cwsi, helps, label in cases: + dec = gate.evaluate(tleaf_c=tleaf, ghi_w_m2=ghi, cwsi=cwsi, shading_helps=helps) + status = "PASS" if dec.passed else "BLOCK" + reason = dec.rejection_reason or "—" + print(f" [{status}] {label}") + print(f" {reason}\n") + + # --- TradeoffEngine test with real Sde Boker summer day --- + print("=== TradeoffEngine — real July-15 Sde Boker trajectories ===\n") + import pandas as pd + import pvlib + + shadow = ShadowModel() + engine = TradeoffEngine(shadow_model=shadow) + + loc = pvlib.location.Location(30.87, 34.79, tz='Asia/Jerusalem', altitude=475) + times = pd.date_range('2025-07-15 06:00', '2025-07-15 19:00', freq='2h', tz='Asia/Jerusalem') + sol = loc.get_solarposition(times) + + print(f" {'Time':>6} {'Elev':>6} {'Azim':>6} {'Astro':>6} {'Result':>12} Notes") + print(f" {'-'*70}") + for t in times: + elev = float(sol.loc[t, 'apparent_elevation']) + azim = float(sol.loc[t, 'azimuth']) + if elev < 5: + continue + tr = shadow.compute_tracker_tilt(azim, elev) + astro = tr['tracker_theta'] + par = min(elev * 15, 1100) + res = engine.find_minimum_dose( + ambient_par_umol=par, + solar_elevation_deg=elev, + solar_azimuth_deg=azim, + astronomical_tilt_deg=astro, + max_sacrifice_fraction=0.08, + ) + status = f"offset={res.chosen_offset_deg:.0f}°" if res.success else "no dose" + print(f" {t.strftime('%H:%M'):>6} {elev:>6.1f} {azim:>6.1f} {astro:>6.1f} {status:>12} {res.rationale[:60]}") + + print() + print(" Energy sacrifice by offset:") + for off in [0, 3, 5, 8, 10, 15, 20]: + s = TradeoffEngine.energy_sacrifice_fraction(off) + print(f" {off:2d}° → {s * 100:.2f}%") diff --git a/src/shading/vine_3d_scene.py b/src/shading/vine_3d_scene.py new file mode 100644 index 0000000000000000000000000000000000000000..9260abfe040d3b12fc6949b97602ca09bc4149a6 --- /dev/null +++ b/src/shading/vine_3d_scene.py @@ -0,0 +1,305 @@ +""" +3D scene data and HTML generator for vine, tracker, sun and photosynthesis. + +Builds JSON-serializable scene data from ShadowModel + CanopyPhotosynthesisModel, +and renders an interactive Three.js scene showing which parts of the vine +are doing how much photosynthesis (A per zone, colored by rate). +""" + +from __future__ import annotations + +import json +from datetime import date +from typing import Any + +import numpy as np +import pandas as pd + + +def build_scene_data( + hour: int = 12, + date_str: str | None = None, + par: float = 1800.0, + tleaf: float = 32.0, + co2: float = 400.0, + vpd: float = 2.5, + tair: float = 33.0, +) -> dict[str, Any]: + """ + Build scene data for the 3D visualization: sun, tracker, vine geometry, + shadow mask, PAR and A per zone. + + Returns a dict suitable for JSON serialization and for build_scene_html(). + """ + from src.canopy_photosynthesis import CanopyPhotosynthesisModel + from src.solar_geometry import ShadowModel + + shadow = ShadowModel() + canopy = CanopyPhotosynthesisModel(shadow_model=shadow) + + dt_str = date_str or str(date.today()) + try: + dt = pd.Timestamp(f"{dt_str} {hour:02d}:00:00", tz="Asia/Jerusalem") + except Exception: + dt = pd.Timestamp(f"{date.today()} {hour:02d}:00:00", tz="Asia/Jerusalem") + + solar_pos = shadow.get_solar_position(pd.DatetimeIndex([dt])) + elev = float(solar_pos["solar_elevation"].iloc[0]) + azim = float(solar_pos["solar_azimuth"].iloc[0]) + + # Sun direction (world: x=East, y=North, z=up), unit vector toward sun + elev_rad = np.radians(elev) + azim_rad = np.radians(azim) + sun_x = np.cos(elev_rad) * np.sin(azim_rad) + sun_y = np.cos(elev_rad) * np.cos(azim_rad) + sun_z = np.sin(elev_rad) + sun_dir = [float(sun_x), float(sun_y), float(sun_z)] + + if elev <= 2.0: + # Night: still return geometry, zero A + tracker_theta = 0.0 + shadow_mask = np.ones((shadow.n_vertical, shadow.n_horizontal), dtype=bool) + par_zones = np.full((shadow.n_vertical, shadow.n_horizontal), par * 0.15) + A_zones = np.zeros((shadow.n_vertical, shadow.n_horizontal)) + A_vine = 0.0 + sunlit_fraction = 0.0 + else: + tracker = shadow.compute_tracker_tilt(azim, elev) + tracker_theta = float(tracker["tracker_theta"]) + shadow_mask = shadow.project_shadow(elev, azim, tracker_theta) + vine_result = canopy.compute_vine_A( + par=par, Tleaf=tleaf, CO2=co2, VPD=vpd, Tair=tair, + shadow_mask=shadow_mask, solar_elevation=elev, + solar_azimuth=azim, tracker_tilt=tracker_theta, + ) + par_zones = vine_result["par_zones"] + A_zones = vine_result["A_zones"] + A_vine = float(vine_result["A_vine"]) + sunlit_fraction = float(vine_result["sunlit_fraction"]) + + # Panel and vine box in world coords (x=East, y=North, z=up) + panel_corners = shadow.panel_corners_world(tracker_theta, row_offset=0.0) + vine_box = shadow.vine_box_world(row_offset=0.0) + + # Grid for zone centres (for positioning vine cells in 3D) + grid_v = shadow._grid_v.tolist() + grid_z = shadow._grid_z.tolist() + + def to_list(a: np.ndarray) -> list: + if a.dtype == bool: + return [[bool(x) for x in row] for row in a.tolist()] + return [[float(x) for x in row] for row in a.tolist()] + + return { + "hour": hour, + "date": dt_str, + "sun_elevation": round(elev, 2), + "sun_azimuth": round(azim, 2), + "sun_direction": sun_dir, + "tracker_theta": round(tracker_theta, 2), + "panel_corners": panel_corners.tolist(), + "vine_box": vine_box.tolist(), + "n_vertical": shadow.n_vertical, + "n_horizontal": shadow.n_horizontal, + "grid_v": grid_v, + "grid_z": grid_z, + "canopy_width": shadow.canopy_width, + "canopy_height": shadow.canopy_height, + "shadow_mask": to_list(shadow_mask), + "par_zones": to_list(par_zones), + "A_zones": to_list(A_zones), + "A_vine": round(A_vine, 3), + "sunlit_fraction": round(sunlit_fraction, 3), + } + + +def build_scene_html(scene_data: dict[str, Any], height_px: int = 480) -> str: + """ + Generate a self-contained HTML file with a Three.js scene: sun, tracker panel, + vine canopy grid colored by photosynthesis rate (A). + """ + # Three.js uses Y-up; world is x=East, y=North, z=up → we use (x, z, y) for Three + def w2t(w: list[float]) -> list[float]: + return [w[0], w[2], w[1]] + + A_zones = scene_data["A_zones"] + n_v = scene_data["n_vertical"] + n_h = scene_data["n_horizontal"] + grid_v = scene_data["grid_v"] + grid_z = scene_data["grid_z"] + cw = scene_data["canopy_width"] + ch = scene_data["canopy_height"] + sun_dir = scene_data["sun_direction"] + panel_corners = scene_data["panel_corners"] + vine_box = scene_data["vine_box"] + shadow_mask = scene_data["shadow_mask"] + + A_flat = [A_zones[iz][ih] for iz in range(n_v) for ih in range(n_h)] + A_min = min(A_flat) if A_flat else 0 + A_max = max(A_flat) if A_flat else 1 + A_range = (A_max - A_min) or 1 + + # Color gradient: dark green (low A) -> bright green (high A); shaded can be darker + def color_for(iz: int, ih: int) -> list[float]: + a = A_zones[iz][ih] + shaded = shadow_mask[iz][ih] + t = (a - A_min) / A_range if A_range else 0 + # 0–1 green gradient; shaded dimmed + g = 0.2 + 0.7 * t + r = 0.1 + b = 0.1 + if shaded: + g *= 0.6 + r *= 0.6 + b *= 0.6 + return [r, g, b] + + # Zone cell size + dv = (cw / n_h) if n_h else 0.1 + dz = (ch / n_v) if n_v else 0.1 + half_len = 0.4 + + cells_json = [] + for iz in range(n_v): + for ih in range(n_h): + v_c = grid_v[ih] + z_c = grid_z[iz] + # World position of cell centre (row-local v,z; u=0 at centre) + # In world, row is along u; v is cross-row. We use row_offset=0 so vine at origin. + # shadow._row_v, _row_u: world x = v*_row_v[0]+u*_row_u[0], same for y. z = z_c + # For centre of row segment: u=0, v=v_c, z=z_c → world (v_c*_row_v[0], v_c*_row_v[1], z_c) + # We don't have _row_v in scene_data; approximate: vine_box gives us extent. + # Simpler: use local v,z and assume row_u points along -Y (315°), row_v along -X + # So world x ≈ -v_c*cos(45°)= -v_c*0.707, y ≈ v_c*0.707, z=z_c. Actually from settings row_azimuth=315. + # 315°: along-row = sin(315), cos(315) = -0.707, 0.707. So u direction in world is (-0.707, 0.707, 0). + # v direction (cross-row) = cos(315), -sin(315) = 0.707, 0.707. So world = (v*0.707, v*0.707, z). + wx = v_c * 0.707 + wy = v_c * 0.707 + wz = z_c + cells_json.append({ + "pos": [wx, wz, wy], + "color": color_for(iz, ih), + "A": A_zones[iz][ih], + "shaded": shadow_mask[iz][ih], + }) + + panel_t3 = [w2t(p) for p in panel_corners] + sun_t3 = w2t(sun_dir) + + # Sun sphere position (far along sun direction) + sun_dist = 8.0 + sun_pos = [sun_t3[0] * sun_dist, sun_t3[1] * sun_dist, sun_t3[2] * sun_dist] + + scene_json = json.dumps({ + "cells": cells_json, + "panel": panel_t3, + "sun_pos": sun_pos, + "sun_dir": sun_t3, + "vine_box": [w2t(v) for v in vine_box], + "A_vine": scene_data["A_vine"], + "sunlit_fraction": scene_data["sunlit_fraction"], + "hour": scene_data["hour"], + "date": scene_data["date"], + "A_max": A_max, + "A_min": A_min, + }) + + html = f""" + + + + Vine photosynthesis 3D + + + +
Hour: {scene_data["hour"]:02d}:00 | Date: {scene_data["date"]} | A_vine: {scene_data["A_vine"]:.2f} µmol/m²/s | Sunlit: {scene_data["sunlit_fraction"]*100:.0f}%
+
Green = photosynthesis rate (dark = low, bright = high). Shaded zones are dimmer.
+ + + +""" + return html diff --git a/src/solar_geometry.py b/src/solar_geometry.py new file mode 100644 index 0000000000000000000000000000000000000000..16e4dc9986ba0ba7ffd12907178236c7d8045d23 --- /dev/null +++ b/src/solar_geometry.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.shading.solar_geometry.""" +from src.shading.solar_geometry import * # noqa: F401, F403 diff --git a/src/spectral_aggregator.py b/src/spectral_aggregator.py new file mode 100644 index 0000000000000000000000000000000000000000..dce9350154d386fb62f35800106eeecb80502ee8 --- /dev/null +++ b/src/spectral_aggregator.py @@ -0,0 +1,233 @@ +""" +SpectralAggregator: batch preprocessing of CWSI, NDVI, and PRI indices. + +Consumes raw sensor columns (from ThingsBoard or Seymour CSVs) and produces +cleaned, gap-filled spectral indices ready for the 15-min control loop. + +Design: stateless functions, not a service. The control loop calls +``aggregate_spectral()`` each slot with raw sensor readings; the function +returns validated indices with quality flags. + +Sensor sources +-------------- +- NDVI / PRI: Air1 reference station (``Air1_NDVI_ref``, ``Air1_PRI_ref``) + and per-panel Crop devices (ThingsBoard). +- CWSI: computed from air–leaf temperature delta (proxy) or explicit + ThingsBoard telemetry if available. +- rNDVI / RENDVI: optional red-edge indices from Air1. + +Physical bounds (Sde Boker, Semillon grapevine) +------------------------------------------------ +- NDVI: [0.1, 0.95] — bare soil ~0.1, healthy canopy 0.7–0.9 +- PRI: [-0.2, 0.1] — stressed < -0.05, unstressed > 0.0 +- CWSI: [0.0, 1.0] — well-watered 0.0, severe stress 1.0 +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional + +import numpy as np +import pandas as pd + +from src.utils import cwsi_from_delta_t + + +# --------------------------------------------------------------------------- +# Physical plausibility bounds +# --------------------------------------------------------------------------- + +_BOUNDS = { + "ndvi": (0.1, 0.95), + "pri": (-0.2, 0.1), + "cwsi": (0.0, 1.0), + "rndvi": (0.1, 0.95), + "rendvi": (0.1, 0.95), +} + + +# --------------------------------------------------------------------------- +# Result container +# --------------------------------------------------------------------------- + +@dataclass +class SpectralResult: + """Validated spectral indices for a single timestep.""" + + ndvi: Optional[float] = None + pri: Optional[float] = None + cwsi: float = 0.0 + rndvi: Optional[float] = None + rendvi: Optional[float] = None + quality_flags: list[str] = field(default_factory=list) + + @property + def is_stressed(self) -> bool: + """Quick stress check: CWSI ≥ 0.4 indicates meaningful water stress.""" + return self.cwsi >= 0.4 + + @property + def pri_stress(self) -> bool: + """PRI below -0.05 indicates photosynthetic down-regulation.""" + return self.pri is not None and self.pri < -0.05 + + +# --------------------------------------------------------------------------- +# Core aggregation function +# --------------------------------------------------------------------------- + +def aggregate_spectral( + *, + ndvi: Optional[float] = None, + pri: Optional[float] = None, + air_temp_c: Optional[float] = None, + leaf_temp_c: Optional[float] = None, + cwsi_explicit: Optional[float] = None, + vpd_kpa: Optional[float] = None, + rndvi: Optional[float] = None, + rendvi: Optional[float] = None, +) -> SpectralResult: + """Validate and aggregate spectral indices for one timestep. + + Parameters + ---------- + ndvi : float, optional + Raw NDVI reading (Air1 or Crop device). + pri : float, optional + Raw PRI reading. + air_temp_c : float, optional + Air temperature (°C) — for CWSI proxy calculation. + leaf_temp_c : float, optional + Leaf temperature (°C) — for CWSI proxy calculation. + cwsi_explicit : float, optional + Direct CWSI measurement from ThingsBoard (overrides proxy). + vpd_kpa : float, optional + Vapour pressure deficit — secondary stress indicator. + rndvi : float, optional + Red-edge NDVI. + rendvi : float, optional + Red-edge NDVI (alternative band). + + Returns + ------- + SpectralResult + Validated indices with quality flags. + """ + flags: list[str] = [] + + # --- NDVI --- + clean_ndvi = _clip_or_flag(ndvi, "ndvi", flags) + + # --- PRI --- + clean_pri = _clip_or_flag(pri, "pri", flags) + + # --- CWSI --- + if cwsi_explicit is not None: + clean_cwsi = _clip_value(cwsi_explicit, *_BOUNDS["cwsi"]) + if cwsi_explicit != clean_cwsi: + flags.append("cwsi_clipped") + elif leaf_temp_c is not None and air_temp_c is not None: + clean_cwsi = cwsi_from_delta_t(leaf_temp_c, air_temp_c) + flags.append("cwsi_from_delta_t") + elif vpd_kpa is not None: + # Last-resort VPD-based proxy: high VPD → likely stress + # VPD 1-2 kPa normal, >3 kPa high stress in Negev + raw_cwsi = _clip_value((vpd_kpa - 1.0) / 4.0, 0.0, 1.0) + clean_cwsi = raw_cwsi + flags.append("cwsi_from_vpd") + else: + clean_cwsi = 0.0 + flags.append("cwsi_missing") + + # --- Optional red-edge indices --- + clean_rndvi = _clip_or_flag(rndvi, "rndvi", flags) + clean_rendvi = _clip_or_flag(rendvi, "rendvi", flags) + + return SpectralResult( + ndvi=clean_ndvi, + pri=clean_pri, + cwsi=clean_cwsi, + rndvi=clean_rndvi, + rendvi=clean_rendvi, + quality_flags=flags, + ) + + +# --------------------------------------------------------------------------- +# Batch processing for DataFrames +# --------------------------------------------------------------------------- + +def aggregate_spectral_df( + df: pd.DataFrame, + *, + ndvi_col: str = "Air1_NDVI_ref", + pri_col: str = "Air1_PRI_ref", + air_temp_col: str = "Air1_airTemperature_ref", + leaf_temp_col: str = "Air1_leafTemperature_ref", + vpd_col: str = "Air1_VPD_ref", + cwsi_col: Optional[str] = None, + rndvi_col: str = "Air1_rNDVI_ref", + rendvi_col: str = "Air1_RENDVI_ref", +) -> pd.DataFrame: + """Process a DataFrame of raw sensor data into cleaned spectral indices. + + Returns a DataFrame with columns: ndvi, pri, cwsi, rndvi, rendvi, quality_flags. + Index is aligned to the input DataFrame. + """ + records = [] + for _, row in df.iterrows(): + result = aggregate_spectral( + ndvi=_safe_float(row, ndvi_col), + pri=_safe_float(row, pri_col), + air_temp_c=_safe_float(row, air_temp_col), + leaf_temp_c=_safe_float(row, leaf_temp_col), + cwsi_explicit=_safe_float(row, cwsi_col) if cwsi_col else None, + vpd_kpa=_safe_float(row, vpd_col), + rndvi=_safe_float(row, rndvi_col), + rendvi=_safe_float(row, rendvi_col), + ) + records.append({ + "ndvi": result.ndvi, + "pri": result.pri, + "cwsi": result.cwsi, + "rndvi": result.rndvi, + "rendvi": result.rendvi, + "is_stressed": result.is_stressed, + "pri_stress": result.pri_stress, + "quality_flags": ",".join(result.quality_flags), + }) + return pd.DataFrame(records, index=df.index) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _clip_value(val: float, lo: float, hi: float) -> float: + return max(lo, min(hi, val)) + + +def _clip_or_flag( + val: Optional[float], + name: str, + flags: list[str], +) -> Optional[float]: + """Clip value to physical bounds; flag if out-of-range or missing.""" + if val is None or (isinstance(val, float) and np.isnan(val)): + return None + lo, hi = _BOUNDS[name] + clipped = _clip_value(float(val), lo, hi) + if float(val) < lo or float(val) > hi: + flags.append(f"{name}_clipped") + return clipped + + +def _safe_float(row: pd.Series, col: str) -> Optional[float]: + """Extract a float from a DataFrame row, returning None if missing.""" + if col not in row.index: + return None + v = row[col] + if pd.isna(v): + return None + return float(v) diff --git a/src/thingsboard_client.py b/src/thingsboard_client.py new file mode 100644 index 0000000000000000000000000000000000000000..764997d980a5bdd47a8c557943bc84f36af02da4 --- /dev/null +++ b/src/thingsboard_client.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.data.thingsboard_client.""" +from src.data.thingsboard_client import * # noqa: F401, F403 diff --git a/src/time_features.py b/src/time_features.py new file mode 100644 index 0000000000000000000000000000000000000000..a16335e82d10c76580f5577a3ee29b6f76166415 --- /dev/null +++ b/src/time_features.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.forecasting.time_features.""" +from src.forecasting.time_features import * # noqa: F401, F403 diff --git a/src/tracker_dispatcher.py b/src/tracker_dispatcher.py new file mode 100644 index 0000000000000000000000000000000000000000..5bf3913b881e4a04c89750d783668992f00a0b3f --- /dev/null +++ b/src/tracker_dispatcher.py @@ -0,0 +1,236 @@ +""" +TrackerDispatcher: send tilt commands to physical trackers and verify execution. + +Sits downstream of the CommandArbiter. When an ArbiterDecision has +``dispatch=True``, the dispatcher: + 1. Sends the target angle to all 4 trackers via ThingsBoard RPC. + 2. Waits briefly, then reads actual ``angle`` telemetry. + 3. Confirms |actual − target| < tolerance for each tracker. + 4. Returns a DispatchResult with per-tracker status. + +If RPC is unavailable (e.g. customer-level API), falls back to +shared-attribute writes (``setAngle``, ``setMode``). +""" + +from __future__ import annotations + +import logging +import time +from dataclasses import dataclass, field +from datetime import datetime, timezone +from typing import Dict, List, Optional + +from config.settings import ANGLE_TOLERANCE_DEG, TRACKER_ID_MAP + +logger = logging.getLogger(__name__) + +# All tracker device names from the canonical ID map +TRACKER_NAMES = list(TRACKER_ID_MAP.values()) + + +# --------------------------------------------------------------------------- +# Result containers +# --------------------------------------------------------------------------- + +@dataclass +class TrackerResult: + """Execution result for a single tracker.""" + + device_name: str + target_angle: float + actual_angle: Optional[float] = None + error: Optional[str] = None + verified: bool = False # True if |actual − target| < tolerance + method: str = "unknown" # "rpc", "attribute", "dry_run" + + +@dataclass +class DispatchResult: + """Aggregate result for dispatching to all trackers.""" + + timestamp: datetime + target_angle: float + source: str # from ArbiterDecision.source + trackers: List[TrackerResult] = field(default_factory=list) + all_verified: bool = False + dry_run: bool = False + + @property + def n_success(self) -> int: + return sum(1 for t in self.trackers if t.verified) + + @property + def n_failed(self) -> int: + return sum(1 for t in self.trackers if t.error) + + def summary(self) -> str: + if self.dry_run: + return f"[DRY RUN] target={self.target_angle:.1f}°, source={self.source}" + return ( + f"target={self.target_angle:.1f}°, source={self.source}, " + f"verified={self.n_success}/{len(self.trackers)}" + ) + + +# --------------------------------------------------------------------------- +# Dispatcher +# --------------------------------------------------------------------------- + +class TrackerDispatcher: + """Send angle commands to trackers and verify execution. + + Parameters + ---------- + tb_client : ThingsBoardClient, optional + Lazy-initialised if not provided. + tracker_names : list[str], optional + Override which trackers to control (default: all 4). + verify_timeout_sec : float + How long to wait before reading back actual angles. + angle_tolerance_deg : float + Maximum acceptable |actual − target| for verification. + dry_run : bool + If True, log commands but don't actually send them. + """ + + def __init__( + self, + tb_client=None, + tracker_names: Optional[List[str]] = None, + verify_timeout_sec: float = 5.0, + angle_tolerance_deg: float = ANGLE_TOLERANCE_DEG, + dry_run: bool = False, + ): + self._tb = tb_client + self.tracker_names = tracker_names or TRACKER_NAMES + self.verify_timeout = verify_timeout_sec + self.tolerance = angle_tolerance_deg + self.dry_run = dry_run + + def _client(self): + if self._tb is None: + from src.data.thingsboard_client import ThingsBoardClient + self._tb = ThingsBoardClient() + return self._tb + + # ------------------------------------------------------------------ + # Public API + # ------------------------------------------------------------------ + + def dispatch( + self, + decision, + angle_overrides: Optional[Dict[str, float]] = None, + ) -> DispatchResult: + """Send an ArbiterDecision to all trackers. + + Parameters + ---------- + decision : ArbiterDecision + Must have ``dispatch=True`` and ``angle`` set. + angle_overrides : dict, optional + Per-tracker angle overrides (device_name → angle). + Trackers not in this dict get the default ``decision.angle``. + Typically empty — all trackers follow the same pattern. + + Returns + ------- + DispatchResult with per-tracker verification status. + """ + now = datetime.now(tz=timezone.utc) + result = DispatchResult( + timestamp=now, + target_angle=decision.angle, + source=str(decision.source), + dry_run=self.dry_run, + ) + + if not decision.dispatch: + logger.debug("Decision dispatch=False, skipping: %s", decision) + return result + + for name in self.tracker_names: + angle = decision.angle + if angle_overrides and name in angle_overrides: + angle = angle_overrides[name] + tr = self._send_to_tracker(name, angle) + result.trackers.append(tr) + + # Verify after a brief wait (let controller process the command) + if not self.dry_run and any(t.error is None for t in result.trackers): + time.sleep(self.verify_timeout) + self._verify_all(result) + + result.all_verified = all(t.verified for t in result.trackers) + logger.info("Dispatch: %s", result.summary()) + return result + + def read_current_angles(self) -> Dict[str, Optional[float]]: + """Read actual angle from all trackers. Useful for status display.""" + angles = {} + for name in self.tracker_names: + try: + vals = self._client().get_latest_telemetry(name, ["angle"]) + angles[name] = vals.get("angle") + except Exception as exc: + logger.warning("Failed to read %s angle: %s", name, exc) + angles[name] = None + return angles + + # ------------------------------------------------------------------ + # Internal + # ------------------------------------------------------------------ + + def _send_to_tracker(self, device_name: str, angle: float) -> TrackerResult: + """Send angle command to a single tracker.""" + tr = TrackerResult(device_name=device_name, target_angle=angle) + + if self.dry_run: + tr.method = "dry_run" + tr.verified = True + logger.info("[DRY RUN] %s → %.1f°", device_name, angle) + return tr + + client = self._client() + + # Shared attribute write is the reliable method — tracker controllers + # poll setAngle/setMode from shared attributes on their update cycle. + # RPC requires the device to be online for real-time communication, + # which is not guaranteed. + try: + client.set_device_attributes(device_name, {"setAngle": angle, "setMode": "manual"}) + tr.method = "attribute" + return tr + except Exception as attr_exc: + logger.warning("Attribute write failed for %s: %s, trying RPC", device_name, attr_exc) + + # Fallback: try RPC (may timeout if device is offline) + try: + client.send_rpc_command(device_name, "setAngle", angle) + tr.method = "rpc" + return tr + except Exception as rpc_exc: + tr.error = f"attribute and RPC both failed: {attr_exc}; {rpc_exc}" + logger.error("Cannot send to %s: %s", device_name, tr.error) + return tr + + def _verify_all(self, result: DispatchResult) -> None: + """Read actual angles and compare to target.""" + for tr in result.trackers: + if tr.error: + continue + try: + vals = self._client().get_latest_telemetry(tr.device_name, ["angle"]) + tr.actual_angle = vals.get("angle") + if tr.actual_angle is not None: + diff = abs(tr.actual_angle - tr.target_angle) + tr.verified = diff <= self.tolerance + if not tr.verified: + logger.warning( + "%s: actual=%.1f° target=%.1f° (diff=%.1f° > tol=%.1f°)", + tr.device_name, tr.actual_angle, tr.target_angle, + diff, self.tolerance, + ) + except Exception as exc: + logger.warning("Verify failed for %s: %s", tr.device_name, exc) + tr.error = f"verification read failed: {exc}" diff --git a/src/tracker_fleet.py b/src/tracker_fleet.py new file mode 100644 index 0000000000000000000000000000000000000000..13ab68525f86f1009ce1d99791bb68d396f69ed6 --- /dev/null +++ b/src/tracker_fleet.py @@ -0,0 +1,293 @@ +""" +TrackerFleet: fleet-level plan assignment and tracker management. + +Manages which tracking plan is assigned to which tracker, with a +three-level scope hierarchy: + + Tracker-level > Line-level > Vineyard-level + +The highest-specificity, highest-priority active assignment wins. + +This module provides both: +- A file-based assignment store (JSON) for standalone / development use +- A DB-ready interface matching the tracker repo's PlanAssignmentManager schema + +Adapted from the tracker repo's TrackerManager + PlanAssignmentManager, +stripped of async MySQL dependency — uses file-based storage by default. +""" + +from __future__ import annotations + +import json +import logging +from dataclasses import dataclass, field +from datetime import datetime, timezone +from pathlib import Path +from typing import Dict, List, Optional + +from config.settings import DATA_DIR, TRACKER_ID_MAP + +logger = logging.getLogger(__name__) + +ASSIGNMENTS_FILE = DATA_DIR / "tracker_assignments.json" + + +# --------------------------------------------------------------------------- +# Data model +# --------------------------------------------------------------------------- + +@dataclass +class PlanAssignment: + """A plan assigned to a scope (tracker / line / vineyard).""" + + assignment_id: int + plan_id: str # plan name or ID + plan_file: Optional[str] = None # JSON plan filename + vineyard_id: int = 1 # default vineyard + line_id: Optional[int] = None # None = vineyard-level + tracker_id: Optional[int] = None # None = line or vineyard-level + priority: int = 0 + active_from: Optional[str] = None # ISO datetime + active_until: Optional[str] = None # ISO datetime, None = no expiry + reason: str = "" + + @property + def scope(self) -> str: + if self.tracker_id is not None: + return "tracker" + if self.line_id is not None: + return "line" + return "vineyard" + + @property + def scope_priority(self) -> int: + """Higher = more specific.""" + if self.tracker_id is not None: + return 2 + if self.line_id is not None: + return 1 + return 0 + + def is_active(self, now: Optional[datetime] = None) -> bool: + now = now or datetime.now(tz=timezone.utc) + if self.active_from: + start = datetime.fromisoformat(self.active_from) + if now < start: + return False + if self.active_until: + end = datetime.fromisoformat(self.active_until) + if now >= end: + return False + return True + + def to_dict(self) -> dict: + return { + "assignment_id": self.assignment_id, + "plan_id": self.plan_id, + "plan_file": self.plan_file, + "vineyard_id": self.vineyard_id, + "line_id": self.line_id, + "tracker_id": self.tracker_id, + "priority": self.priority, + "active_from": self.active_from, + "active_until": self.active_until, + "reason": self.reason, + } + + @classmethod + def from_dict(cls, d: dict) -> PlanAssignment: + return cls(**{k: v for k, v in d.items() if k in cls.__dataclass_fields__}) + + +# --------------------------------------------------------------------------- +# Fleet manager (file-based) +# --------------------------------------------------------------------------- + +# Tracker ↔ line mapping for Yeruham vineyard +TRACKER_LINES = { + 501: 501, # Tracker501 on line 501 + 502: 502, + 503: 503, + 509: 509, +} + + +def tracker_id_to_name(tracker_id: int) -> str: + """Convert integer tracker ID to ThingsBoard device name.""" + name = TRACKER_ID_MAP.get(tracker_id) + if name is None: + raise KeyError(f"Unknown tracker ID: {tracker_id}") + return name + + +def tracker_name_to_id(name: str) -> int: + """Convert ThingsBoard device name to integer tracker ID.""" + for tid, tname in TRACKER_ID_MAP.items(): + if tname == name: + return tid + raise KeyError(f"Unknown tracker name: {name}") + + +class TrackerFleet: + """Manage plan assignments for a fleet of trackers. + + File-based storage for development; can be swapped to DB backend. + + Parameters + ---------- + assignments_file : Path + JSON file storing active assignments. + """ + + def __init__(self, assignments_file: Path | str = ASSIGNMENTS_FILE): + self._file = Path(assignments_file) + self._assignments: List[PlanAssignment] = [] + self._next_id = 1 + self._load() + + def _load(self) -> None: + if self._file.exists(): + try: + with open(self._file) as f: + data = json.load(f) + self._assignments = [PlanAssignment.from_dict(d) for d in data] + if self._assignments: + self._next_id = max(a.assignment_id for a in self._assignments) + 1 + logger.info("Loaded %d assignments from %s", + len(self._assignments), self._file) + except Exception as exc: + logger.warning("Failed to load assignments: %s", exc) + self._assignments = [] + else: + self._assignments = [] + + def _save(self) -> None: + self._file.parent.mkdir(parents=True, exist_ok=True) + with open(self._file, "w") as f: + json.dump([a.to_dict() for a in self._assignments], f, indent=2) + + # ------------------------------------------------------------------ + # Query + # ------------------------------------------------------------------ + + def get_active_assignments( + self, + now: Optional[datetime] = None, + ) -> List[PlanAssignment]: + """Return all currently active assignments, sorted by priority.""" + now = now or datetime.now(tz=timezone.utc) + active = [a for a in self._assignments if a.is_active(now)] + active.sort(key=lambda a: (a.priority, a.scope_priority), reverse=True) + return active + + def get_best_assignment( + self, + tracker_id: int, + now: Optional[datetime] = None, + ) -> Optional[PlanAssignment]: + """Return the best active assignment for a specific tracker. + + Resolution order (first match wins): + 1. Tracker-level assignment for this tracker_id + 2. Line-level assignment for this tracker's line + 3. Vineyard-level assignment + """ + now = now or datetime.now(tz=timezone.utc) + active = self.get_active_assignments(now) + line_id = TRACKER_LINES.get(tracker_id) + + for a in active: + # Tracker-level match + if a.tracker_id == tracker_id: + return a + # Line-level match + if a.tracker_id is None and a.line_id == line_id and line_id is not None: + return a + # Vineyard-level match + if a.tracker_id is None and a.line_id is None: + return a + + return None + + def get_all_best_assignments( + self, + now: Optional[datetime] = None, + ) -> Dict[int, Optional[PlanAssignment]]: + """Return the best assignment for each known tracker.""" + return { + t_id: self.get_best_assignment(t_id, now) + for t_id in TRACKER_LINES + } + + # ------------------------------------------------------------------ + # Mutate + # ------------------------------------------------------------------ + + def assign( + self, + plan_id: str, + plan_file: Optional[str] = None, + tracker_id: Optional[int] = None, + line_id: Optional[int] = None, + vineyard_id: int = 1, + priority: int = 0, + reason: str = "", + active_until: Optional[str] = None, + ) -> PlanAssignment: + """Create a new plan assignment.""" + assignment = PlanAssignment( + assignment_id=self._next_id, + plan_id=plan_id, + plan_file=plan_file, + vineyard_id=vineyard_id, + line_id=line_id, + tracker_id=tracker_id, + priority=priority, + active_from=datetime.now(tz=timezone.utc).isoformat(), + active_until=active_until, + reason=reason, + ) + self._next_id += 1 + self._assignments.append(assignment) + self._save() + logger.info("Created assignment %d: plan=%s scope=%s tracker=%s", + assignment.assignment_id, plan_id, assignment.scope, tracker_id) + return assignment + + def expire(self, assignment_id: int) -> bool: + """Expire an assignment by setting its active_until to now.""" + for a in self._assignments: + if a.assignment_id == assignment_id: + a.active_until = datetime.now(tz=timezone.utc).isoformat() + self._save() + logger.info("Expired assignment %d", assignment_id) + return True + return False + + def expire_all(self) -> int: + """Expire all active assignments.""" + now = datetime.now(tz=timezone.utc).isoformat() + count = 0 + for a in self._assignments: + if a.active_until is None: + a.active_until = now + count += 1 + if count: + self._save() + return count + + def summary(self) -> dict: + """Return a summary of current assignments.""" + best = self.get_all_best_assignments() + return { + "total_assignments": len(self._assignments), + "active_assignments": len(self.get_active_assignments()), + "trackers": { + t_id: { + "plan": a.plan_id if a else None, + "scope": a.scope if a else None, + "priority": a.priority if a else None, + } + for t_id, a in best.items() + }, + } diff --git a/src/tracker_optimizer.py b/src/tracker_optimizer.py new file mode 100644 index 0000000000000000000000000000000000000000..d4b9164c759e8a87851fd5cc5a378fdc4696a4b1 --- /dev/null +++ b/src/tracker_optimizer.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.shading.tracker_optimizer.""" +from src.shading.tracker_optimizer import * # noqa: F401, F403 diff --git a/src/tracker_scheduler.py b/src/tracker_scheduler.py new file mode 100644 index 0000000000000000000000000000000000000000..4b7e9fa374da2ada2859654dca80c9fd9eabbfa3 --- /dev/null +++ b/src/tracker_scheduler.py @@ -0,0 +1,205 @@ +""" +TrackerScheduler: time-based tracking plan with sunrise/sunset resolution. + +Loads a JSON tracking plan (timeline of events) and resolves the active +event for any given timestamp. Supports literal times ("10:30") and +astronomical events ("sunrise", "sunset") via the ``astral`` library. + +Adapted from the tracker repo's AsyncSolarTrackerScheduler — made synchronous +and integrated with Baseline's site config. + +Plan JSON format +---------------- +:: + + { + "timeline": [ + {"start": "sunrise", "mode": "tracking", "angle": null}, + {"start": "10:00", "mode": "antiTracking", "angle": 15}, + {"start": "16:00", "mode": "tracking", "angle": null}, + {"start": "sunset", "mode": "fixed_angle", "angle": 180} + ] + } + +Modes +----- +- ``tracking`` — follow the sun (astronomical tracking) +- ``antiTracking`` — offset from astronomical position by ``angle`` degrees +- ``fixed_angle`` — hold a fixed tilt angle +""" + +from __future__ import annotations + +import json +import logging +from datetime import datetime, date +from pathlib import Path +from typing import Optional +from zoneinfo import ZoneInfo + +from astral import LocationInfo +from astral.sun import sun + +from config.settings import SITE_LATITUDE, SITE_LONGITUDE + +logger = logging.getLogger(__name__) + +# Default timezone for the Yeruham site +_SITE_TZ = ZoneInfo("Asia/Jerusalem") + + +class TrackerScheduler: + """Load a JSON tracking plan and resolve events by time. + + Parameters + ---------- + plan_file : Path or str, optional + Path to the JSON plan file. Either this or ``plan_data`` must be given. + plan_data : dict, optional + Pre-loaded plan dict (must contain ``"timeline"`` key). + latitude, longitude : float + Site coordinates for sunrise/sunset calculation. + timezone : str + IANA timezone name. + """ + + def __init__( + self, + plan_file: Optional[Path | str] = None, + plan_data: Optional[dict] = None, + latitude: float = SITE_LATITUDE, + longitude: float = SITE_LONGITUDE, + timezone: str = "Asia/Jerusalem", + ): + self.latitude = latitude + self.longitude = longitude + self.tz = ZoneInfo(timezone) + self.location = LocationInfo( + timezone=timezone, + latitude=latitude, + longitude=longitude, + ) + self.timeline: list[dict] = [] + + if plan_data is not None: + self.timeline = plan_data.get("timeline", []) + elif plan_file is not None: + self._load_plan(Path(plan_file)) + + def _load_plan(self, path: Path) -> None: + with open(path) as f: + data = json.load(f) + self.timeline = data.get("timeline", []) + logger.info("Loaded plan %s: %d events", path.name, len(self.timeline)) + + def _resolve_time(self, event_start: str, ref_date: date) -> datetime: + """Resolve an event start string to a timezone-aware datetime.""" + if event_start in ("sunrise", "sunset"): + s = sun(self.location.observer, date=ref_date, tzinfo=self.tz) + return s[event_start] + # Parse "HH:MM" literal time + t = datetime.strptime(event_start, "%H:%M").time() + return datetime.combine(ref_date, t, tzinfo=self.tz) + + def get_event(self, current_time: Optional[datetime] = None) -> Optional[dict]: + """Return the active event for the given time. + + Walks the timeline in reverse and returns the first event + whose start time is <= current_time. If no event matches, + returns the last event in the timeline (wrap-around). + + Returns + ------- + dict with keys: ``start``, ``mode``, ``angle`` (may be None). + None if the timeline is empty. + """ + if not self.timeline: + return None + + now = current_time or datetime.now(self.tz) + ref_date = now.date() if hasattr(now, "date") else now + + for event in reversed(self.timeline): + try: + event_dt = self._resolve_time(event["start"], ref_date) + if now >= event_dt: + return event + except Exception as exc: + logger.warning("Failed to resolve event %s: %s", event, exc) + continue + + # Before any event today — use the last event (wrap from yesterday) + return self.timeline[-1] + + def get_all_events(self, ref_date: Optional[date] = None) -> list[dict]: + """Return all events with resolved timestamps for a given date. + + Useful for display / debugging. + """ + today = ref_date or date.today() + result = [] + for event in self.timeline: + try: + dt = self._resolve_time(event["start"], today) + result.append({ + "start_raw": event["start"], + "start_resolved": dt.isoformat(), + "mode": event.get("mode"), + "angle": event.get("angle"), + }) + except Exception as exc: + result.append({ + "start_raw": event["start"], + "error": str(exc), + }) + return result + + +# --------------------------------------------------------------------------- +# Plan library — built-in plans from the tracker repo +# --------------------------------------------------------------------------- + +PLAN_LIBRARY = { + "night-east": { + "description": "Track sun during day, face east at night", + "timeline": [ + {"start": "sunrise", "mode": "tracking", "angle": None}, + {"start": "sunset", "mode": "fixed_angle", "angle": 180}, + ], + }, + "day-max": { + "description": "Fixed east-facing during day (max morning light), track at night", + "timeline": [ + {"start": "sunrise", "mode": "fixed_angle", "angle": 180}, + {"start": "sunset", "mode": "tracking", "angle": None}, + ], + }, + "day-mid": { + "description": "Fixed mid position during day, track at night", + "timeline": [ + {"start": "sunrise", "mode": "fixed_angle", "angle": 90}, + {"start": "sunset", "mode": "tracking", "angle": None}, + ], + }, + "full-tracking": { + "description": "Full astronomical tracking 24/7 (default)", + "timeline": [ + {"start": "sunrise", "mode": "tracking", "angle": None}, + ], + }, + "shading-midday": { + "description": "Track morning/evening, anti-track during midday heat", + "timeline": [ + {"start": "sunrise", "mode": "tracking", "angle": None}, + {"start": "10:00", "mode": "antiTracking", "angle": 15}, + {"start": "16:00", "mode": "tracking", "angle": None}, + ], + }, +} + + +def get_plan(name: str) -> dict: + """Look up a built-in plan by name.""" + if name not in PLAN_LIBRARY: + raise KeyError(f"Unknown plan: {name}. Available: {list(PLAN_LIBRARY.keys())}") + return PLAN_LIBRARY[name] diff --git a/src/tradeoff_engine.py b/src/tradeoff_engine.py new file mode 100644 index 0000000000000000000000000000000000000000..89e8e0a17906bdca8907a3e4a08e14bfbb5ff53a --- /dev/null +++ b/src/tradeoff_engine.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.shading.tradeoff_engine.""" +from src.shading.tradeoff_engine import * # noqa: F401, F403 diff --git a/src/ts_predictor.py b/src/ts_predictor.py new file mode 100644 index 0000000000000000000000000000000000000000..23e5b8ea015449b827ddae2e810301fa0f79bf94 --- /dev/null +++ b/src/ts_predictor.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.forecasting.ts_predictor.""" +from src.forecasting.ts_predictor import * # noqa: F401, F403 diff --git a/src/utils.py b/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..82ed551c6b5bcd90a0eb54ac6eff8de63c429965 --- /dev/null +++ b/src/utils.py @@ -0,0 +1,25 @@ +"""Shared utility functions for SolarWine.""" + +from __future__ import annotations + +from typing import Optional + + +def cwsi_from_delta_t( + leaf_temp_c: Optional[float] = None, + air_temp_c: Optional[float] = None, + *, + delta_t: Optional[float] = None, +) -> float: + """Compute CWSI proxy from leaf–air temperature differential. + + Scale: 0°C delta → CWSI=0, 10°C delta → CWSI=1. + Clamped to [0.0, 1.0]. + + Can be called with (leaf_temp_c, air_temp_c) or with delta_t= directly. + """ + if delta_t is None: + if leaf_temp_c is None or air_temp_c is None: + raise ValueError("Provide (leaf_temp_c, air_temp_c) or delta_t=") + delta_t = leaf_temp_c - air_temp_c + return max(0.0, min(1.0, delta_t / 10.0)) diff --git a/src/vine_3d_scene.py b/src/vine_3d_scene.py new file mode 100644 index 0000000000000000000000000000000000000000..0ec1808edc7f083bed22d2ff5d570e74f4364019 --- /dev/null +++ b/src/vine_3d_scene.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.shading.vine_3d_scene.""" +from src.shading.vine_3d_scene import * # noqa: F401, F403 diff --git a/src/vineyard_chatbot.py b/src/vineyard_chatbot.py new file mode 100644 index 0000000000000000000000000000000000000000..dadd6e5c702e87526aabaa49d1e1913c185b8502 --- /dev/null +++ b/src/vineyard_chatbot.py @@ -0,0 +1,2 @@ +"""Backward-compatible re-export from src.chatbot.vineyard_chatbot.""" +from src.chatbot.vineyard_chatbot import * # noqa: F401, F403