Spaces:

Hodfa71
/

argus-mlops

Sleeping

File size: 17,846 Bytes

"""Dashboard diagnostic tests.

Covers two layers:
  1. Unit tests  — data-loading functions work independently of Streamlit
  2. Selenium UI tests — the rendered dashboard shows the expected elements

Usage:
    pytest tests/test_dashboard.py -v
    pytest tests/test_dashboard.py -v -k unit          # unit tests only
    pytest tests/test_dashboard.py -v -k selenium      # UI tests only (needs running dashboard)

The Selenium tests expect the Streamlit dashboard at DASHBOARD_URL (default
http://localhost:8501).  Start it first with:
    .venv/Scripts/python run.py --no-sim
"""
from __future__ import annotations

import json
import os
import time
from pathlib import Path

import pandas as pd
import pytest

ROOT = Path(__file__).resolve().parent.parent
DASHBOARD_URL = os.environ.get("DASHBOARD_URL", "http://localhost:8501")
LOG_PATHS = {
    "performance": ROOT / "data" / "logs" / "performance.jsonl",
    "drift":       ROOT / "data" / "logs" / "drift_reports.jsonl",
    "retrain":     ROOT / "data" / "logs" / "retraining.jsonl",
    "predictions": ROOT / "data" / "logs" / "predictions.jsonl",
}


# ---------------------------------------------------------------------------
# Helpers (mirror dashboard logic without Streamlit dependency)
# ---------------------------------------------------------------------------

def _load_jsonl(path: Path, limit: int = 2000) -> pd.DataFrame:
    if not path.exists():
        return pd.DataFrame()
    lines = path.read_text(encoding="utf-8").splitlines()[-limit:]
    records = []
    for line in lines:
        try:
            records.append(json.loads(line))
        except json.JSONDecodeError:
            pass
    return pd.DataFrame(records) if records else pd.DataFrame()


# ---------------------------------------------------------------------------
# Unit tests
# ---------------------------------------------------------------------------

class TestDataLoading:
    """Verify that log files exist and load correctly."""

    def test_performance_file_exists(self):
        assert LOG_PATHS["performance"].exists(), (
            "performance.jsonl not found — run the simulation first: "
            "python scripts/simulate_drift.py"
        )

    def test_performance_file_has_rmse_column(self):
        df = _load_jsonl(LOG_PATHS["performance"])
        assert not df.empty, "performance.jsonl is empty"
        assert "rmse" in df.columns, f"Expected 'rmse' column; got {list(df.columns)}"

    def test_performance_rmse_values_are_positive(self):
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("No performance data yet")
        assert (df["rmse"] > 0).all(), "RMSE values must be positive"

    def test_performance_file_has_required_columns(self):
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("No performance data yet")
        required = {"rmse", "mae", "r2", "n_samples", "timestamp"}
        missing = required - set(df.columns)
        assert not missing, f"Missing columns in performance log: {missing}"

    def test_predictions_file_exists_and_has_data(self):
        assert LOG_PATHS["predictions"].exists(), "predictions.jsonl not found"
        df = _load_jsonl(LOG_PATHS["predictions"])
        assert not df.empty, "predictions.jsonl is empty"

    def test_drift_file_structure(self):
        if not LOG_PATHS["drift"].exists():
            pytest.skip("No drift reports yet")
        df = _load_jsonl(LOG_PATHS["drift"])
        assert not df.empty
        assert "drift_detected" in df.columns, (
            f"Expected 'drift_detected'; got {list(df.columns)}"
        )

    def test_path_resolution_is_correct(self):
        """PROJECT_ROOT computed from dashboard/app.py must point to repo root."""
        dashboard_file = ROOT / "dashboard" / "app.py"
        resolved_root = dashboard_file.resolve().parent.parent
        assert resolved_root == ROOT.resolve(), (
            f"Path mismatch: dashboard resolves to {resolved_root}, "
            f"expected {ROOT.resolve()}"
        )

    def test_load_jsonl_returns_dataframe_not_empty_when_file_has_data(self):
        path = LOG_PATHS["performance"]
        if not path.exists():
            pytest.skip("No performance data yet")
        df = _load_jsonl(path)
        assert isinstance(df, pd.DataFrame)
        assert not df.empty
        assert len(df) > 0

    def test_load_jsonl_handles_missing_file_gracefully(self):
        df = _load_jsonl(ROOT / "data" / "logs" / "nonexistent.jsonl")
        assert isinstance(df, pd.DataFrame)
        assert df.empty

    def test_load_jsonl_handles_corrupted_lines_gracefully(self):
        import tempfile
        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
        ) as f:
            f.write('{"rmse": 1.5, "mae": 1.2}\n')
            f.write("NOT JSON\n")
            f.write('{"rmse": 1.6, "mae": 1.3}\n')
            tmp = Path(f.name)
        try:
            df = _load_jsonl(tmp)
            assert len(df) == 2, "Should skip corrupted lines and keep valid ones"
            assert list(df["rmse"]) == [1.5, 1.6]
        finally:
            tmp.unlink()


# ---------------------------------------------------------------------------
# Selenium UI tests
# ---------------------------------------------------------------------------

def _get_driver():
    """Return a headless Chrome driver via webdriver-manager."""
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.chrome.service import Service
    from webdriver_manager.chrome import ChromeDriverManager

    opts = Options()
    opts.add_argument("--headless=new")
    opts.add_argument("--no-sandbox")
    opts.add_argument("--disable-dev-shm-usage")
    opts.add_argument("--window-size=1600,900")
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=opts)


def _dashboard_reachable() -> bool:
    try:
        import urllib.request
        urllib.request.urlopen(DASHBOARD_URL, timeout=3)
        return True
    except Exception:
        return False


@pytest.fixture(scope="module")
def driver():
    if not _dashboard_reachable():
        pytest.skip(f"Dashboard not running at {DASHBOARD_URL}")
    drv = _get_driver()
    drv.get(DASHBOARD_URL)
    time.sleep(6)
    yield drv
    drv.quit()


@pytest.mark.selenium
class TestDashboardUI:
    """Selenium tests against the live Streamlit dashboard."""

    def test_page_title_is_argus(self, driver):
        assert "Argus" in driver.title, (
            f"Expected 'Argus' in page title, got: {driver.title!r}"
        )

    def test_sidebar_is_visible(self, driver):
        from selenium.webdriver.common.by import By
        sidebar = driver.find_elements(By.CSS_SELECTOR, "[data-testid='stSidebar']")
        assert sidebar, "Sidebar element not found"

    def test_api_status_shown_in_sidebar(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        assert any(kw in body_text for kw in ("API Online", "API Offline")), (
            "Expected API status badge in sidebar"
        )

    def test_navigation_pages_present(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        for page in ("Overview", "Drift Analysis", "Feature Insights",
                     "Retraining Log", "Live Demo"):
            assert page in body_text, f"Navigation option '{page}' not found"

    def test_overview_metrics_rendered(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        for label in ("Rolling RMSE", "Baseline RMSE", "Labeled Samples"):
            assert label in body_text, f"Metric '{label}' not visible on Overview"

    def test_no_python_traceback_on_page(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        assert "Traceback (most recent call last)" not in body_text, (
            "Python traceback found on dashboard page"
        )

    def test_chart_renders_when_data_present(self, driver):
        """If performance data exists, the RMSE chart must be visible (not 'No data')."""
        if not LOG_PATHS["performance"].exists():
            pytest.skip("No performance data — chart absence is expected")
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("performance.jsonl is empty — chart absence is expected")

        from selenium.webdriver.common.by import By

        body_text = driver.find_element(By.TAG_NAME, "body").text
        no_data_msg = "No performance data yet"
        assert no_data_msg not in body_text, (
            f"Dashboard shows '{no_data_msg}' but performance.jsonl has "
            f"{len(df)} rows. Root cause: auto-refresh clears the cache "
            "BEFORE chart code runs, causing an infinite blank loop."
        )

    def test_refresh_now_button_exists(self, driver):
        from selenium.webdriver.common.by import By
        buttons = driver.find_elements(By.TAG_NAME, "button")
        labels = [b.text.strip() for b in buttons]
        assert "Refresh Now" in labels, (
            f"'Refresh Now' button not found. Available buttons: {labels}"
        )

    def test_clicking_refresh_loads_chart(self, driver):
        """Click Refresh Now and verify the chart appears within 10 seconds."""
        if not LOG_PATHS["performance"].exists():
            pytest.skip("No performance data")
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("performance.jsonl is empty")

        from selenium.webdriver.common.by import By
        from selenium.webdriver.support.ui import WebDriverWait
        from selenium.webdriver.support import expected_conditions as EC

        buttons = driver.find_elements(By.TAG_NAME, "button")
        for btn in buttons:
            if btn.text.strip() == "Refresh Now":
                btn.click()
                break

        time.sleep(8)
        body_text = driver.find_element(By.TAG_NAME, "body").text
        no_data_msg = "No performance data yet"
        assert no_data_msg not in body_text, (
            "Chart still absent after clicking Refresh Now"
        )

    def test_screenshot_on_failure(self, driver, request):
        """Save a screenshot to assets/test_screenshot.png for inspection."""
        screenshot_path = ROOT / "assets" / "test_screenshot.png"
        driver.save_screenshot(str(screenshot_path))


# ---------------------------------------------------------------------------
# Unit tests: fix #1 — baseline RMSE must not use iloc[0] from the log
# ---------------------------------------------------------------------------

class TestBaselineRmseLogic:
    """
    Verify that the baseline hline calculation uses api_metrics baseline_rmse
    rather than the first row of the performance log.

    Before the fix:  bsl = perf_df["rmse"].iloc[0]
    After the fix:   bsl = baseline or perf_df["rmse"].min()

    If the log starts mid-drift (high RMSE), iloc[0] would have been wrong.
    """

    def _bsl(self, api_baseline, perf_rmse_values: list) -> float:
        """Replicate the fixed dashboard bsl calculation."""
        import numpy as np
        df = pd.DataFrame({"rmse": perf_rmse_values})
        baseline = api_baseline
        return baseline if baseline else float(df["rmse"].min())

    def test_uses_api_baseline_when_available(self):
        # Log starts at a high value (simulating mid-drift start)
        rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9]
        bsl = self._bsl(api_baseline=2.1, perf_rmse_values=rmse_series)
        assert bsl == 2.1, (
            f"Expected api baseline 2.1, got {bsl}. "
            "Fix is not applied: bsl must come from api_metrics, not iloc[0]."
        )

    def test_falls_back_to_min_when_api_unavailable(self):
        rmse_series = [1.8, 2.1, 5.3, 9.0, 3.2]
        bsl = self._bsl(api_baseline=None, perf_rmse_values=rmse_series)
        assert bsl == 1.8, (
            f"Fallback should be min(rmse)=1.8, got {bsl}."
        )

    def test_old_iloc0_would_have_failed_mid_drift(self):
        """Demonstrate the old bug: iloc[0] on a mid-drift log gives wrong baseline."""
        rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9]
        df = pd.DataFrame({"rmse": rmse_series})
        old_bsl = df["rmse"].iloc[0]   # old (broken) logic
        assert old_bsl == 10.5, "Setup check: old logic picks high value"
        # The old bsl would set the baseline hline at 10.5 instead of ~2.1,
        # causing the chart to look flat (everything near or above "baseline")
        assert old_bsl > 5.0, (
            "Old baseline would have been unreasonably high — confirms the bug."
        )

    def test_alert_threshold_is_correct_fraction_of_bsl(self):
        """Alert hline must be 15% above baseline."""
        bsl = 2.131
        alert = bsl * 1.15
        assert abs(alert - 2.451) < 0.01, f"Alert threshold wrong: {alert:.3f}"


# ---------------------------------------------------------------------------
# Unit tests: fix #2 — R² y-axis must accommodate negative values
# ---------------------------------------------------------------------------

class TestR2AxisScaling:
    """
    Verify that the R² chart y-axis lower bound scales to include negative R²
    instead of clipping at 0.

    Before the fix:  range=[0, 1.05]  (negative values invisible)
    After the fix:   range=[r2_floor, 1.05]  where r2_floor < 0 when data dips negative
    """

    def _r2_floor(self, r2_values: list) -> float:
        """Replicate the fixed dashboard r2_floor calculation."""
        r2_min = min(r2_values)
        return min(r2_min - 0.05, -0.1) if r2_min < 0 else -0.05

    def test_negative_r2_produces_negative_floor(self):
        r2_series = [0.91, 0.60, -0.49, -1.22, 0.83]
        floor = self._r2_floor(r2_series)
        assert floor < 0, f"r2_floor must be negative when data goes below 0, got {floor}"
        assert floor <= -1.22 - 0.05, (
            f"Floor {floor} is not low enough to show min r2=-1.22 "
            "(should be min - 0.05 = -1.27)"
        )

    def test_all_positive_r2_uses_small_negative_floor(self):
        r2_series = [0.91, 0.88, 0.93, 0.85]
        floor = self._r2_floor(r2_series)
        assert floor == -0.05, (
            f"When all R² > 0, floor should be -0.05 for breathing room, got {floor}"
        )

    def test_floor_is_below_min_r2(self):
        """Floor must always be below the minimum R² value so no data is clipped."""
        for min_r2 in [-0.05, -0.5, -1.0, -1.22]:
            r2_series = [0.9, min_r2]
            floor = self._r2_floor(r2_series)
            assert floor <= min_r2, (
                f"At min_r2={min_r2}, floor={floor} clips data (must be <= min_r2)"
            )

    def test_old_hardcoded_range_clipped_negative_r2(self):
        """Show that the old range=[0, 1.05] would have hidden the negative data."""
        old_range_min = 0
        r2_min_in_data = -1.22
        assert r2_min_in_data < old_range_min, (
            "Confirms bug: min R² in data is below old y-axis floor of 0"
        )


# ---------------------------------------------------------------------------
# Selenium: verify chart renders correctly with fixed logic
# ---------------------------------------------------------------------------

@pytest.mark.selenium
class TestChartFixes:
    """End-to-end Selenium tests verifying the two chart fixes in production."""

    def test_overview_chart_section_visible(self, driver):
        from selenium.webdriver.common.by import By
        body = driver.find_element(By.TAG_NAME, "body").text
        assert "Prediction Error Over Time" in body, (
            "RMSE chart section heading not visible on Overview"
        )

    def test_baseline_annotation_present_in_chart(self, driver):
        """
        The 'Baseline' hline annotation must appear in the rendered SVG.
        If bsl was computed from a high iloc[0], the annotation would still
        appear but at the wrong Y level — this confirms it's rendered at all.
        """
        from selenium.webdriver.common.by import By
        page_source = driver.page_source
        assert "Baseline" in page_source, (
            "Baseline annotation not found in rendered page source. "
            "Chart may not have rendered."
        )

    def test_alert_annotation_present_in_chart(self, driver):
        from selenium.webdriver.common.by import By
        page_source = driver.page_source
        assert "Alert" in page_source or "+15%" in page_source, (
            "Alert +15% annotation not found in rendered chart."
        )

    def test_r2_chart_section_visible(self, driver):
        from selenium.webdriver.common.by import By
        page_source = driver.page_source
        # R² label should appear as an axis title in the SVG
        assert "R²" in page_source or "R\u00b2" in page_source, (
            "R² chart axis label not found — chart may not have rendered."
        )

    def test_no_traceback_on_overview(self, driver):
        from selenium.webdriver.common.by import By
        assert "Traceback (most recent call last)" not in \
            driver.find_element(By.TAG_NAME, "body").text

    def test_overview_screenshot_with_fixes(self, driver):
        """Save a screenshot showing the fixed chart for visual verification."""
        screenshot_path = ROOT / "assets" / "overview_chart_fixed.png"
        driver.save_screenshot(str(screenshot_path))