File size: 17,846 Bytes
1aa566a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
baf1ee1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
"""Dashboard diagnostic tests.

Covers two layers:
  1. Unit tests  — data-loading functions work independently of Streamlit
  2. Selenium UI tests — the rendered dashboard shows the expected elements

Usage:
    pytest tests/test_dashboard.py -v
    pytest tests/test_dashboard.py -v -k unit          # unit tests only
    pytest tests/test_dashboard.py -v -k selenium      # UI tests only (needs running dashboard)

The Selenium tests expect the Streamlit dashboard at DASHBOARD_URL (default
http://localhost:8501).  Start it first with:
    .venv/Scripts/python run.py --no-sim
"""
from __future__ import annotations

import json
import os
import time
from pathlib import Path

import pandas as pd
import pytest

ROOT = Path(__file__).resolve().parent.parent
DASHBOARD_URL = os.environ.get("DASHBOARD_URL", "http://localhost:8501")
LOG_PATHS = {
    "performance": ROOT / "data" / "logs" / "performance.jsonl",
    "drift":       ROOT / "data" / "logs" / "drift_reports.jsonl",
    "retrain":     ROOT / "data" / "logs" / "retraining.jsonl",
    "predictions": ROOT / "data" / "logs" / "predictions.jsonl",
}


# ---------------------------------------------------------------------------
# Helpers (mirror dashboard logic without Streamlit dependency)
# ---------------------------------------------------------------------------

def _load_jsonl(path: Path, limit: int = 2000) -> pd.DataFrame:
    if not path.exists():
        return pd.DataFrame()
    lines = path.read_text(encoding="utf-8").splitlines()[-limit:]
    records = []
    for line in lines:
        try:
            records.append(json.loads(line))
        except json.JSONDecodeError:
            pass
    return pd.DataFrame(records) if records else pd.DataFrame()


# ---------------------------------------------------------------------------
# Unit tests
# ---------------------------------------------------------------------------

class TestDataLoading:
    """Verify that log files exist and load correctly."""

    def test_performance_file_exists(self):
        assert LOG_PATHS["performance"].exists(), (
            "performance.jsonl not found — run the simulation first: "
            "python scripts/simulate_drift.py"
        )

    def test_performance_file_has_rmse_column(self):
        df = _load_jsonl(LOG_PATHS["performance"])
        assert not df.empty, "performance.jsonl is empty"
        assert "rmse" in df.columns, f"Expected 'rmse' column; got {list(df.columns)}"

    def test_performance_rmse_values_are_positive(self):
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("No performance data yet")
        assert (df["rmse"] > 0).all(), "RMSE values must be positive"

    def test_performance_file_has_required_columns(self):
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("No performance data yet")
        required = {"rmse", "mae", "r2", "n_samples", "timestamp"}
        missing = required - set(df.columns)
        assert not missing, f"Missing columns in performance log: {missing}"

    def test_predictions_file_exists_and_has_data(self):
        assert LOG_PATHS["predictions"].exists(), "predictions.jsonl not found"
        df = _load_jsonl(LOG_PATHS["predictions"])
        assert not df.empty, "predictions.jsonl is empty"

    def test_drift_file_structure(self):
        if not LOG_PATHS["drift"].exists():
            pytest.skip("No drift reports yet")
        df = _load_jsonl(LOG_PATHS["drift"])
        assert not df.empty
        assert "drift_detected" in df.columns, (
            f"Expected 'drift_detected'; got {list(df.columns)}"
        )

    def test_path_resolution_is_correct(self):
        """PROJECT_ROOT computed from dashboard/app.py must point to repo root."""
        dashboard_file = ROOT / "dashboard" / "app.py"
        resolved_root = dashboard_file.resolve().parent.parent
        assert resolved_root == ROOT.resolve(), (
            f"Path mismatch: dashboard resolves to {resolved_root}, "
            f"expected {ROOT.resolve()}"
        )

    def test_load_jsonl_returns_dataframe_not_empty_when_file_has_data(self):
        path = LOG_PATHS["performance"]
        if not path.exists():
            pytest.skip("No performance data yet")
        df = _load_jsonl(path)
        assert isinstance(df, pd.DataFrame)
        assert not df.empty
        assert len(df) > 0

    def test_load_jsonl_handles_missing_file_gracefully(self):
        df = _load_jsonl(ROOT / "data" / "logs" / "nonexistent.jsonl")
        assert isinstance(df, pd.DataFrame)
        assert df.empty

    def test_load_jsonl_handles_corrupted_lines_gracefully(self):
        import tempfile
        with tempfile.NamedTemporaryFile(
            mode="w", suffix=".jsonl", delete=False, encoding="utf-8"
        ) as f:
            f.write('{"rmse": 1.5, "mae": 1.2}\n')
            f.write("NOT JSON\n")
            f.write('{"rmse": 1.6, "mae": 1.3}\n')
            tmp = Path(f.name)
        try:
            df = _load_jsonl(tmp)
            assert len(df) == 2, "Should skip corrupted lines and keep valid ones"
            assert list(df["rmse"]) == [1.5, 1.6]
        finally:
            tmp.unlink()


# ---------------------------------------------------------------------------
# Selenium UI tests
# ---------------------------------------------------------------------------

def _get_driver():
    """Return a headless Chrome driver via webdriver-manager."""
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    from selenium.webdriver.chrome.service import Service
    from webdriver_manager.chrome import ChromeDriverManager

    opts = Options()
    opts.add_argument("--headless=new")
    opts.add_argument("--no-sandbox")
    opts.add_argument("--disable-dev-shm-usage")
    opts.add_argument("--window-size=1600,900")
    service = Service(ChromeDriverManager().install())
    return webdriver.Chrome(service=service, options=opts)


def _dashboard_reachable() -> bool:
    try:
        import urllib.request
        urllib.request.urlopen(DASHBOARD_URL, timeout=3)
        return True
    except Exception:
        return False


@pytest.fixture(scope="module")
def driver():
    if not _dashboard_reachable():
        pytest.skip(f"Dashboard not running at {DASHBOARD_URL}")
    drv = _get_driver()
    drv.get(DASHBOARD_URL)
    time.sleep(6)
    yield drv
    drv.quit()


@pytest.mark.selenium
class TestDashboardUI:
    """Selenium tests against the live Streamlit dashboard."""

    def test_page_title_is_argus(self, driver):
        assert "Argus" in driver.title, (
            f"Expected 'Argus' in page title, got: {driver.title!r}"
        )

    def test_sidebar_is_visible(self, driver):
        from selenium.webdriver.common.by import By
        sidebar = driver.find_elements(By.CSS_SELECTOR, "[data-testid='stSidebar']")
        assert sidebar, "Sidebar element not found"

    def test_api_status_shown_in_sidebar(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        assert any(kw in body_text for kw in ("API Online", "API Offline")), (
            "Expected API status badge in sidebar"
        )

    def test_navigation_pages_present(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        for page in ("Overview", "Drift Analysis", "Feature Insights",
                     "Retraining Log", "Live Demo"):
            assert page in body_text, f"Navigation option '{page}' not found"

    def test_overview_metrics_rendered(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        for label in ("Rolling RMSE", "Baseline RMSE", "Labeled Samples"):
            assert label in body_text, f"Metric '{label}' not visible on Overview"

    def test_no_python_traceback_on_page(self, driver):
        from selenium.webdriver.common.by import By
        body_text = driver.find_element(By.TAG_NAME, "body").text
        assert "Traceback (most recent call last)" not in body_text, (
            "Python traceback found on dashboard page"
        )

    def test_chart_renders_when_data_present(self, driver):
        """If performance data exists, the RMSE chart must be visible (not 'No data')."""
        if not LOG_PATHS["performance"].exists():
            pytest.skip("No performance data — chart absence is expected")
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("performance.jsonl is empty — chart absence is expected")

        from selenium.webdriver.common.by import By

        body_text = driver.find_element(By.TAG_NAME, "body").text
        no_data_msg = "No performance data yet"
        assert no_data_msg not in body_text, (
            f"Dashboard shows '{no_data_msg}' but performance.jsonl has "
            f"{len(df)} rows. Root cause: auto-refresh clears the cache "
            "BEFORE chart code runs, causing an infinite blank loop."
        )

    def test_refresh_now_button_exists(self, driver):
        from selenium.webdriver.common.by import By
        buttons = driver.find_elements(By.TAG_NAME, "button")
        labels = [b.text.strip() for b in buttons]
        assert "Refresh Now" in labels, (
            f"'Refresh Now' button not found. Available buttons: {labels}"
        )

    def test_clicking_refresh_loads_chart(self, driver):
        """Click Refresh Now and verify the chart appears within 10 seconds."""
        if not LOG_PATHS["performance"].exists():
            pytest.skip("No performance data")
        df = _load_jsonl(LOG_PATHS["performance"])
        if df.empty:
            pytest.skip("performance.jsonl is empty")

        from selenium.webdriver.common.by import By
        from selenium.webdriver.support.ui import WebDriverWait
        from selenium.webdriver.support import expected_conditions as EC

        buttons = driver.find_elements(By.TAG_NAME, "button")
        for btn in buttons:
            if btn.text.strip() == "Refresh Now":
                btn.click()
                break

        time.sleep(8)
        body_text = driver.find_element(By.TAG_NAME, "body").text
        no_data_msg = "No performance data yet"
        assert no_data_msg not in body_text, (
            "Chart still absent after clicking Refresh Now"
        )

    def test_screenshot_on_failure(self, driver, request):
        """Save a screenshot to assets/test_screenshot.png for inspection."""
        screenshot_path = ROOT / "assets" / "test_screenshot.png"
        driver.save_screenshot(str(screenshot_path))


# ---------------------------------------------------------------------------
# Unit tests: fix #1 — baseline RMSE must not use iloc[0] from the log
# ---------------------------------------------------------------------------

class TestBaselineRmseLogic:
    """
    Verify that the baseline hline calculation uses api_metrics baseline_rmse
    rather than the first row of the performance log.

    Before the fix:  bsl = perf_df["rmse"].iloc[0]
    After the fix:   bsl = baseline or perf_df["rmse"].min()

    If the log starts mid-drift (high RMSE), iloc[0] would have been wrong.
    """

    def _bsl(self, api_baseline, perf_rmse_values: list) -> float:
        """Replicate the fixed dashboard bsl calculation."""
        import numpy as np
        df = pd.DataFrame({"rmse": perf_rmse_values})
        baseline = api_baseline
        return baseline if baseline else float(df["rmse"].min())

    def test_uses_api_baseline_when_available(self):
        # Log starts at a high value (simulating mid-drift start)
        rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9]
        bsl = self._bsl(api_baseline=2.1, perf_rmse_values=rmse_series)
        assert bsl == 2.1, (
            f"Expected api baseline 2.1, got {bsl}. "
            "Fix is not applied: bsl must come from api_metrics, not iloc[0]."
        )

    def test_falls_back_to_min_when_api_unavailable(self):
        rmse_series = [1.8, 2.1, 5.3, 9.0, 3.2]
        bsl = self._bsl(api_baseline=None, perf_rmse_values=rmse_series)
        assert bsl == 1.8, (
            f"Fallback should be min(rmse)=1.8, got {bsl}."
        )

    def test_old_iloc0_would_have_failed_mid_drift(self):
        """Demonstrate the old bug: iloc[0] on a mid-drift log gives wrong baseline."""
        rmse_series = [10.5, 10.8, 11.2, 11.0, 10.9]
        df = pd.DataFrame({"rmse": rmse_series})
        old_bsl = df["rmse"].iloc[0]   # old (broken) logic
        assert old_bsl == 10.5, "Setup check: old logic picks high value"
        # The old bsl would set the baseline hline at 10.5 instead of ~2.1,
        # causing the chart to look flat (everything near or above "baseline")
        assert old_bsl > 5.0, (
            "Old baseline would have been unreasonably high — confirms the bug."
        )

    def test_alert_threshold_is_correct_fraction_of_bsl(self):
        """Alert hline must be 15% above baseline."""
        bsl = 2.131
        alert = bsl * 1.15
        assert abs(alert - 2.451) < 0.01, f"Alert threshold wrong: {alert:.3f}"


# ---------------------------------------------------------------------------
# Unit tests: fix #2 — R² y-axis must accommodate negative values
# ---------------------------------------------------------------------------

class TestR2AxisScaling:
    """
    Verify that the R² chart y-axis lower bound scales to include negative R²
    instead of clipping at 0.

    Before the fix:  range=[0, 1.05]  (negative values invisible)
    After the fix:   range=[r2_floor, 1.05]  where r2_floor < 0 when data dips negative
    """

    def _r2_floor(self, r2_values: list) -> float:
        """Replicate the fixed dashboard r2_floor calculation."""
        r2_min = min(r2_values)
        return min(r2_min - 0.05, -0.1) if r2_min < 0 else -0.05

    def test_negative_r2_produces_negative_floor(self):
        r2_series = [0.91, 0.60, -0.49, -1.22, 0.83]
        floor = self._r2_floor(r2_series)
        assert floor < 0, f"r2_floor must be negative when data goes below 0, got {floor}"
        assert floor <= -1.22 - 0.05, (
            f"Floor {floor} is not low enough to show min r2=-1.22 "
            "(should be min - 0.05 = -1.27)"
        )

    def test_all_positive_r2_uses_small_negative_floor(self):
        r2_series = [0.91, 0.88, 0.93, 0.85]
        floor = self._r2_floor(r2_series)
        assert floor == -0.05, (
            f"When all R² > 0, floor should be -0.05 for breathing room, got {floor}"
        )

    def test_floor_is_below_min_r2(self):
        """Floor must always be below the minimum R² value so no data is clipped."""
        for min_r2 in [-0.05, -0.5, -1.0, -1.22]:
            r2_series = [0.9, min_r2]
            floor = self._r2_floor(r2_series)
            assert floor <= min_r2, (
                f"At min_r2={min_r2}, floor={floor} clips data (must be <= min_r2)"
            )

    def test_old_hardcoded_range_clipped_negative_r2(self):
        """Show that the old range=[0, 1.05] would have hidden the negative data."""
        old_range_min = 0
        r2_min_in_data = -1.22
        assert r2_min_in_data < old_range_min, (
            "Confirms bug: min R² in data is below old y-axis floor of 0"
        )


# ---------------------------------------------------------------------------
# Selenium: verify chart renders correctly with fixed logic
# ---------------------------------------------------------------------------

@pytest.mark.selenium
class TestChartFixes:
    """End-to-end Selenium tests verifying the two chart fixes in production."""

    def test_overview_chart_section_visible(self, driver):
        from selenium.webdriver.common.by import By
        body = driver.find_element(By.TAG_NAME, "body").text
        assert "Prediction Error Over Time" in body, (
            "RMSE chart section heading not visible on Overview"
        )

    def test_baseline_annotation_present_in_chart(self, driver):
        """
        The 'Baseline' hline annotation must appear in the rendered SVG.
        If bsl was computed from a high iloc[0], the annotation would still
        appear but at the wrong Y level — this confirms it's rendered at all.
        """
        from selenium.webdriver.common.by import By
        page_source = driver.page_source
        assert "Baseline" in page_source, (
            "Baseline annotation not found in rendered page source. "
            "Chart may not have rendered."
        )

    def test_alert_annotation_present_in_chart(self, driver):
        from selenium.webdriver.common.by import By
        page_source = driver.page_source
        assert "Alert" in page_source or "+15%" in page_source, (
            "Alert +15% annotation not found in rendered chart."
        )

    def test_r2_chart_section_visible(self, driver):
        from selenium.webdriver.common.by import By
        page_source = driver.page_source
        # R² label should appear as an axis title in the SVG
        assert "R²" in page_source or "R\u00b2" in page_source, (
            "R² chart axis label not found — chart may not have rendered."
        )

    def test_no_traceback_on_overview(self, driver):
        from selenium.webdriver.common.by import By
        assert "Traceback (most recent call last)" not in \
            driver.find_element(By.TAG_NAME, "body").text

    def test_overview_screenshot_with_fixes(self, driver):
        """Save a screenshot showing the fixed chart for visual verification."""
        screenshot_path = ROOT / "assets" / "overview_chart_fixed.png"
        driver.save_screenshot(str(screenshot_path))