"""Dashboard diagnostic tests. Covers two layers: 1. Unit tests — data-loading functions work independently of Streamlit 2. Selenium UI tests — the rendered dashboard shows the expected elements Usage: pytest tests/test_dashboard.py -v pytest tests/test_dashboard.py -v -k unit # unit tests only pytest tests/test_dashboard.py -v -k selenium # UI tests only (needs running dashboard) The Selenium tests expect the Streamlit dashboard at DASHBOARD_URL (default http://localhost:8501). Start it first with: .venv/Scripts/python run.py --no-sim """ from __future__ import annotations import json import os import time from pathlib import Path import pandas as pd import pytest ROOT = Path(__file__).resolve().parent.parent DASHBOARD_URL = os.environ.get("DASHBOARD_URL", "http://localhost:8501") LOG_PATHS = { "performance": ROOT / "data" / "logs" / "performance.jsonl", "drift": ROOT / "data" / "logs" / "drift_reports.jsonl", "retrain": ROOT / "data" / "logs" / "retraining.jsonl", "predictions": ROOT / "data" / "logs" / "predictions.jsonl", } # --------------------------------------------------------------------------- # Helpers (mirror dashboard logic without Streamlit dependency) # --------------------------------------------------------------------------- def _load_jsonl(path: Path, limit: int = 2000) -> pd.DataFrame: if not path.exists(): return pd.DataFrame() lines = path.read_text(encoding="utf-8").splitlines()[-limit:] records = [] for line in lines: try: records.append(json.loads(line)) except json.JSONDecodeError: pass return pd.DataFrame(records) if records else pd.DataFrame() # --------------------------------------------------------------------------- # Unit tests # --------------------------------------------------------------------------- class TestDataLoading: """Verify that log files exist and load correctly.""" def test_performance_file_exists(self): assert LOG_PATHS["performance"].exists(), ( "performance.jsonl not found — run the simulation first: " "python scripts/simulate_drift.py" ) def test_performance_file_has_rmse_column(self): df = _load_jsonl(LOG_PATHS["performance"]) assert not df.empty, "performance.jsonl is empty" assert "rmse" in df.columns, f"Expected 'rmse' column; got {list(df.columns)}" def test_performance_rmse_values_are_positive(self): df = _load_jsonl(LOG_PATHS["performance"]) if df.empty: pytest.skip("No performance data yet") assert (df["rmse"] > 0).all(), "RMSE values must be positive" def test_performance_file_has_required_columns(self): df = _load_jsonl(LOG_PATHS["performance"]) if df.empty: pytest.skip("No performance data yet") required = {"rmse", "mae", "r2", "n_samples", "timestamp"} missing = required - set(df.columns) assert not missing, f"Missing columns in performance log: {missing}" def test_predictions_file_exists_and_has_data(self): assert LOG_PATHS["predictions"].exists(), "predictions.jsonl not found" df = _load_jsonl(LOG_PATHS["predictions"]) assert not df.empty, "predictions.jsonl is empty" def test_drift_file_structure(self): if not LOG_PATHS["drift"].exists(): pytest.skip("No drift reports yet") df = _load_jsonl(LOG_PATHS["drift"]) assert not df.empty assert "drift_detected" in df.columns, ( f"Expected 'drift_detected'; got {list(df.columns)}" ) def test_path_resolution_is_correct(self): """PROJECT_ROOT computed from dashboard/app.py must point to repo root.""" dashboard_file = ROOT / "dashboard" / "app.py" resolved_root = dashboard_file.resolve().parent.parent assert resolved_root == ROOT.resolve(), ( f"Path mismatch: dashboard resolves to {resolved_root}, " f"expected {ROOT.resolve()}" ) def test_load_jsonl_returns_dataframe_not_empty_when_file_has_data(self): path = LOG_PATHS["performance"] if not path.exists(): pytest.skip("No performance data yet") df = _load_jsonl(path) assert isinstance(df, pd.DataFrame) assert not df.empty assert len(df) > 0 def test_load_jsonl_handles_missing_file_gracefully(self): df = _load_jsonl(ROOT / "data" / "logs" / "nonexistent.jsonl") assert isinstance(df, pd.DataFrame) assert df.empty def test_load_jsonl_handles_corrupted_lines_gracefully(self): import tempfile with tempfile.NamedTemporaryFile( mode="w", suffix=".jsonl", delete=False, encoding="utf-8" ) as f: f.write('{"rmse": 1.5, "mae": 1.2}\n') f.write("NOT JSON\n") f.write('{"rmse": 1.6, "mae": 1.3}\n') tmp = Path(f.name) try: df = _load_jsonl(tmp) assert len(df) == 2, "Should skip corrupted lines and keep valid ones" assert list(df["rmse"]) == [1.5, 1.6] finally: tmp.unlink() # --------------------------------------------------------------------------- # Selenium UI tests # --------------------------------------------------------------------------- def _get_driver(): """Return a headless Chrome driver via webdriver-manager.""" from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager opts = Options() opts.add_argument("--headless=new") opts.add_argument("--no-sandbox") opts.add_argument("--disable-dev-shm-usage") opts.add_argument("--window-size=1600,900") service = Service(ChromeDriverManager().install()) return webdriver.Chrome(service=service, options=opts) def _dashboard_reachable() -> bool: try: import urllib.request urllib.request.urlopen(DASHBOARD_URL, timeout=3) return True except Exception: return False @pytest.fixture(scope="module") def driver(): if not _dashboard_reachable(): pytest.skip(f"Dashboard not running at {DASHBOARD_URL}") drv = _get_driver() drv.get(DASHBOARD_URL) time.sleep(6) yield drv drv.quit() @pytest.mark.selenium class TestDashboardUI: """Selenium tests against the live Streamlit dashboard.""" def test_page_title_is_argus(self, driver): assert "Argus" in driver.title, ( f"Expected 'Argus' in page title, got: {driver.title!r}" ) def test_sidebar_is_visible(self, driver): from selenium.webdriver.common.by import By sidebar = driver.find_elements(By.CSS_SELECTOR, "[data-testid='stSidebar']") assert sidebar, "Sidebar element not found" def test_api_status_shown_in_sidebar(self, driver): from selenium.webdriver.common.by import By body_text = driver.find_element(By.TAG_NAME, "body").text assert any(kw in body_text for kw in ("API Online", "API Offline")), ( "Expected API status badge in sidebar" ) def test_navigation_pages_present(self, driver): from selenium.webdriver.common.by import By body_text = driver.find_element(By.TAG_NAME, "body").text for page in ("Overview", "Drift Analysis", "Feature Insights", "Retraining Log", "Live Demo"): assert page in body_text, f"Navigation option '{page}' not found" def test_overview_metrics_rendered(self, driver): from selenium.webdriver.common.by import By body_text = driver.find_element(By.TAG_NAME, "body").text for label in ("Rolling RMSE", "Baseline RMSE", "Labeled Samples"): assert label in body_text, f"Metric '{label}' not visible on Overview" def test_no_python_traceback_on_page(self, driver): from selenium.webdriver.common.by import By body_text = driver.find_element(By.TAG_NAME, "body").text assert "Traceback (most recent call last)" not in body_text, ( "Python traceback found on dashboard page" ) def test_chart_renders_when_data_present(self, driver): """If performance data exists, the RMSE chart must be visible (not 'No data').""" if not LOG_PATHS["performance"].exists(): pytest.skip("No performance data — chart absence is expected") df = _load_jsonl(LOG_PATHS["performance"]) if df.empty: pytest.skip("performance.jsonl is empty — chart absence is expected") from selenium.webdriver.common.by import By body_text = driver.find_element(By.TAG_NAME, "body").text no_data_msg = "No performance data yet" assert no_data_msg not in body_text, ( f"Dashboard shows '{no_data_msg}' but performance.jsonl has " f"{len(df)} rows. Root cause: auto-refresh clears the cache " "BEFORE chart code runs, causing an infinite blank loop." ) def test_refresh_now_button_exists(self, driver): from selenium.webdriver.common.by import By buttons = driver.find_elements(By.TAG_NAME, "button") labels = [b.text.strip() for b in buttons] assert "Refresh Now" in labels, ( f"'Refresh Now' button not found. Available buttons: {labels}" ) def test_clicking_refresh_loads_chart(self, driver): """Click Refresh Now and verify the chart appears within 10 seconds.""" if not LOG_PATHS["performance"].exists(): pytest.skip("No performance data") df = _load_jsonl(LOG_PATHS["performance"]) if df.empty: pytest.skip("performance.jsonl is empty") from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC buttons = driver.find_elements(By.TAG_NAME, "button") for btn in buttons: if btn.text.strip() == "Refresh Now": btn.click() break time.sleep(8) body_text = driver.find_element(By.TAG_NAME, "body").text no_data_msg = "No performance data yet" assert no_data_msg not in body_text, ( "Chart still absent after clicking Refresh Now" ) def test_screenshot_on_failure(self, driver, request): """Save a screenshot to assets/test_screenshot.png for inspection.""" screenshot_path = ROOT / "assets" / "test_screenshot.png" driver.save_screenshot(str(screenshot_path)) # --------------------------------------------------------------------------- # Unit tests: fix #1 — baseline RMSE must not use iloc[0] from the log # --------------------------------------------------------------------------- class TestBaselineRmseLogic: """ Verify that the baseline hline calculation uses api_metrics baseline_rmse rather than the first row of the performance log. Before the fix: bsl = perf_df["rmse"].iloc[0] After the fix: bsl = baseline or perf_df["rmse"].min() If the log starts mid-drift (high RMSE), iloc[0] would have been wrong. """ def _bsl(self, api_baseline, perf_rmse_values: list) -> float: """Replicate the fixed dashboard bsl calculation.""" import numpy as np df = pd.DataFrame({"rmse": perf_rmse_values}) baseline = api_baseline return baseline if baseline else float(df["rmse"].min()) def test_uses_api_baseline_when_available(self): # Log starts at a high value (simulating mid-drift start) rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9] bsl = self._bsl(api_baseline=2.1, perf_rmse_values=rmse_series) assert bsl == 2.1, ( f"Expected api baseline 2.1, got {bsl}. " "Fix is not applied: bsl must come from api_metrics, not iloc[0]." ) def test_falls_back_to_min_when_api_unavailable(self): rmse_series = [1.8, 2.1, 5.3, 9.0, 3.2] bsl = self._bsl(api_baseline=None, perf_rmse_values=rmse_series) assert bsl == 1.8, ( f"Fallback should be min(rmse)=1.8, got {bsl}." ) def test_old_iloc0_would_have_failed_mid_drift(self): """Demonstrate the old bug: iloc[0] on a mid-drift log gives wrong baseline.""" rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9] df = pd.DataFrame({"rmse": rmse_series}) old_bsl = df["rmse"].iloc[0] # old (broken) logic assert old_bsl == 10.5, "Setup check: old logic picks high value" # The old bsl would set the baseline hline at 10.5 instead of ~2.1, # causing the chart to look flat (everything near or above "baseline") assert old_bsl > 5.0, ( "Old baseline would have been unreasonably high — confirms the bug." ) def test_alert_threshold_is_correct_fraction_of_bsl(self): """Alert hline must be 15% above baseline.""" bsl = 2.131 alert = bsl * 1.15 assert abs(alert - 2.451) < 0.01, f"Alert threshold wrong: {alert:.3f}" # --------------------------------------------------------------------------- # Unit tests: fix #2 — R² y-axis must accommodate negative values # --------------------------------------------------------------------------- class TestR2AxisScaling: """ Verify that the R² chart y-axis lower bound scales to include negative R² instead of clipping at 0. Before the fix: range=[0, 1.05] (negative values invisible) After the fix: range=[r2_floor, 1.05] where r2_floor < 0 when data dips negative """ def _r2_floor(self, r2_values: list) -> float: """Replicate the fixed dashboard r2_floor calculation.""" r2_min = min(r2_values) return min(r2_min - 0.05, -0.1) if r2_min < 0 else -0.05 def test_negative_r2_produces_negative_floor(self): r2_series = [0.91, 0.60, -0.49, -1.22, 0.83] floor = self._r2_floor(r2_series) assert floor < 0, f"r2_floor must be negative when data goes below 0, got {floor}" assert floor <= -1.22 - 0.05, ( f"Floor {floor} is not low enough to show min r2=-1.22 " "(should be min - 0.05 = -1.27)" ) def test_all_positive_r2_uses_small_negative_floor(self): r2_series = [0.91, 0.88, 0.93, 0.85] floor = self._r2_floor(r2_series) assert floor == -0.05, ( f"When all R² > 0, floor should be -0.05 for breathing room, got {floor}" ) def test_floor_is_below_min_r2(self): """Floor must always be below the minimum R² value so no data is clipped.""" for min_r2 in [-0.05, -0.5, -1.0, -1.22]: r2_series = [0.9, min_r2] floor = self._r2_floor(r2_series) assert floor <= min_r2, ( f"At min_r2={min_r2}, floor={floor} clips data (must be <= min_r2)" ) def test_old_hardcoded_range_clipped_negative_r2(self): """Show that the old range=[0, 1.05] would have hidden the negative data.""" old_range_min = 0 r2_min_in_data = -1.22 assert r2_min_in_data < old_range_min, ( "Confirms bug: min R² in data is below old y-axis floor of 0" ) # --------------------------------------------------------------------------- # Selenium: verify chart renders correctly with fixed logic # --------------------------------------------------------------------------- @pytest.mark.selenium class TestChartFixes: """End-to-end Selenium tests verifying the two chart fixes in production.""" def test_overview_chart_section_visible(self, driver): from selenium.webdriver.common.by import By body = driver.find_element(By.TAG_NAME, "body").text assert "Prediction Error Over Time" in body, ( "RMSE chart section heading not visible on Overview" ) def test_baseline_annotation_present_in_chart(self, driver): """ The 'Baseline' hline annotation must appear in the rendered SVG. If bsl was computed from a high iloc[0], the annotation would still appear but at the wrong Y level — this confirms it's rendered at all. """ from selenium.webdriver.common.by import By page_source = driver.page_source assert "Baseline" in page_source, ( "Baseline annotation not found in rendered page source. " "Chart may not have rendered." ) def test_alert_annotation_present_in_chart(self, driver): from selenium.webdriver.common.by import By page_source = driver.page_source assert "Alert" in page_source or "+15%" in page_source, ( "Alert +15% annotation not found in rendered chart." ) def test_r2_chart_section_visible(self, driver): from selenium.webdriver.common.by import By page_source = driver.page_source # R² label should appear as an axis title in the SVG assert "R²" in page_source or "R\u00b2" in page_source, ( "R² chart axis label not found — chart may not have rendered." ) def test_no_traceback_on_overview(self, driver): from selenium.webdriver.common.by import By assert "Traceback (most recent call last)" not in \ driver.find_element(By.TAG_NAME, "body").text def test_overview_screenshot_with_fixes(self, driver): """Save a screenshot showing the fixed chart for visual verification.""" screenshot_path = ROOT / "assets" / "overview_chart_fixed.png" driver.save_screenshot(str(screenshot_path))