"""Unit tests for the analytics engine.

Run with:  uv run pytest

Each stage is tested in isolation so a failure points at the exact function.
Dates are built relative to "now" so the rolling-week buckets always line up
no matter when the suite runs.
"""

from datetime import datetime, timedelta

import pytest

from rate_my_run.analytics import (
    Run,
    WeekStats,
    clean_activities,
    group_by_week,
    compute_trends,
    analyze,
    SPIKE_PCT,
    BREAK_DAYS,
)


# --- helpers -----------------------------------------------------------------

def _iso(days_ago: float, hour: int = 12) -> str:
    """ISO timestamp `days_ago` days before now, pinned to midday to stay
    clear of week-boundary edges."""
    d = (datetime.now() - timedelta(days=days_ago)).replace(
        hour=hour, minute=0, second=0, microsecond=0
    )
    return d.isoformat()


def raw_run(days_ago, distance_m, moving_time_s, type="Run"):
    return {
        "type": type,
        "distance": distance_m,
        "moving_time": moving_time_s,
        "start_date": _iso(days_ago),
    }


def week(week_start, num_runs, distance_km, longest_km, pace):
    """Build a WeekStats directly, for testing compute_trends in isolation."""
    return WeekStats(week_start, num_runs, distance_km, longest_km, pace)


# --- Run --------------------------------------------------------------------

def test_run_unit_conversions():
    run = Run(start=_iso(0), distance_m=5000, moving_time_s=1500)
    assert run.distance_km == 5.0
    assert run.moving_time_m == 25.0
    assert run.pace_min_per_km == pytest.approx(5.0)  # 25 min / 5 km


# --- clean_activities -------------------------------------------------------

def test_clean_keeps_only_real_runs():
    raw = [
        raw_run(1, 5000, 1500),                 # keep
        raw_run(1, 20000, 3600, type="Ride"),   # drop: not a run
        raw_run(1, 0, 0),                        # drop: zero distance/time
        raw_run(1, 30, 7),                       # drop: under min_distance (50 m)
    ]
    runs = clean_activities(raw)
    assert len(runs) == 1
    assert runs[0].distance_m == 5000


def test_clean_min_distance_boundary():
    raw = [
        raw_run(1, 49, 30),   # below 50 m default → drop
        raw_run(1, 60, 30),   # above 50 m → keep
    ]
    runs = clean_activities(raw)
    assert [r.distance_m for r in runs] == [60]


# --- group_by_week ----------------------------------------------------------

def test_group_returns_one_bucket_per_week_newest_last():
    runs = clean_activities([raw_run(1, 5000, 1500)])
    weeks = group_by_week(runs, num_weeks=8)
    assert len(weeks) == 8
    # reversed to oldest -> newest, so the last bucket is the most recent
    assert weeks[-1].week_start > weeks[0].week_start


def test_group_buckets_runs_into_correct_weeks():
    raw = [
        raw_run(1, 5000, 1500), raw_run(3, 6000, 1820), raw_run(5, 5200, 1560),  # this week
        raw_run(8, 5000, 1650), raw_run(11, 5000, 1640),                          # last week
        raw_run(16, 8000, 2700),                                                  # 2 weeks ago
    ]
    weeks = group_by_week(clean_activities(raw), num_weeks=8)
    this_week, last_week, two_ago = weeks[-1], weeks[-2], weeks[-3]

    assert this_week.num_runs == 3
    assert this_week.distance_km == pytest.approx(16.2)
    assert last_week.num_runs == 2
    assert last_week.distance_km == pytest.approx(10.0)
    assert two_ago.num_runs == 1
    assert two_ago.longest_run_km == pytest.approx(8.0)


def test_group_empty_week_has_none_pace():
    weeks = group_by_week(clean_activities([raw_run(1, 5000, 1500)]), num_weeks=8)
    # the oldest bucket has no runs
    assert weeks[0].num_runs == 0
    assert weeks[0].avg_pace_min_per_km is None


def test_group_pace_is_distance_weighted_not_naive_mean():
    # 1 km @ 6:00 + 9 km @ 5:00 -> weighted 5.1, naive mean would be 5.5
    raw = [raw_run(1, 1000, 360), raw_run(2, 9000, 2700)]
    weeks = group_by_week(clean_activities(raw), num_weeks=8)
    assert weeks[-1].avg_pace_min_per_km == pytest.approx(5.1)


# --- compute_trends ---------------------------------------------------------

def _padded(*recent):
    """Pad with empty weeks so the list is 8 long, with `recent` as the tail
    (oldest -> newest ordering)."""
    today = datetime.now().date()
    empties = [week(today - timedelta(days=7 * i), 0, 0.0, 0.0, None)
               for i in range(8 - len(recent))]
    return empties + list(recent)


def test_trends_mileage_spike_and_improving():
    weeks = _padded(
        week(datetime.now().date() - timedelta(days=14), 1, 8.0, 8.0, 5.6),
        week(datetime.now().date() - timedelta(days=7), 2, 10.0, 5.0, 5.5),
        week(datetime.now().date(), 3, 16.2, 6.0, 5.0),
    )
    trends = compute_trends(weeks, days_since_last_run=1)
    assert trends["mileage_change_pct"] == pytest.approx(62.0, abs=0.5)
    assert trends["pace_trend"] == "improving"
    assert "mileage_spike" in trends["signals"]
    assert "increasing_consistency" in trends["signals"]


def test_trends_declining_pace():
    weeks = _padded(
        week(datetime.now().date() - timedelta(days=7), 3, 12.0, 5.0, 5.0),
        week(datetime.now().date(), 3, 12.0, 5.0, 6.0),  # slower than baseline
    )
    trends = compute_trends(weeks, days_since_last_run=1)
    assert trends["pace_trend"] == "declining"


def test_trends_plateau_within_band():
    weeks = _padded(
        week(datetime.now().date() - timedelta(days=7), 3, 12.0, 5.0, 5.50),
        week(datetime.now().date(), 3, 12.0, 5.0, 5.52),  # within PACE_BAND
    )
    trends = compute_trends(weeks, days_since_last_run=1)
    assert trends["pace_trend"] == "plateauing"


def test_trends_mileage_none_when_last_week_empty():
    weeks = _padded(
        week(datetime.now().date() - timedelta(days=7), 0, 0.0, 0.0, None),
        week(datetime.now().date(), 3, 12.0, 5.0, 5.0),
    )
    trends = compute_trends(weeks, days_since_last_run=1)
    assert trends["mileage_change_pct"] is None
    assert "mileage_spike" not in trends["signals"]


def test_trends_returning_after_break_signal():
    weeks = _padded(week(datetime.now().date(), 1, 5.0, 5.0, 5.0))
    trends = compute_trends(weeks, days_since_last_run=BREAK_DAYS + 5)
    assert "returning_after_break" in trends["signals"]


def test_trends_potential_fatigue():
    # more mileage AND slower -> fatigue
    weeks = _padded(
        week(datetime.now().date() - timedelta(days=7), 3, 10.0, 5.0, 5.0),
        week(datetime.now().date(), 3, 13.0, 5.0, 6.0),  # +30% miles, slower
    )
    trends = compute_trends(weeks, days_since_last_run=1)
    assert trends["mileage_change_pct"] > SPIKE_PCT
    assert trends["pace_trend"] == "declining"
    assert "potential_fatigue" in trends["signals"]


# --- analyze (end to end) ---------------------------------------------------

def test_analyze_end_to_end():
    raw = [
        raw_run(1, 5000, 1500), raw_run(3, 6000, 1820), raw_run(5, 5200, 1560),
        raw_run(8, 5000, 1650), raw_run(11, 5000, 1640),
        raw_run(16, 8000, 2700),
        raw_run(2, 30, 7),                       # junk
        raw_run(2, 20000, 3600, type="Ride"),    # junk
    ]
    summary = analyze(raw)
    assert len(summary.weeks) == 8
    assert summary.pace_trend == "improving"
    assert "mileage_spike" in summary.signals
    assert summary.days_since_last_run <= 2


def test_analyze_days_since_uses_most_recent_not_first():
    # Regression: most recent run is NOT first in the input list.
    raw = [
        raw_run(10, 5000, 1500),  # older, listed first
        raw_run(2, 5000, 1500),   # most recent, listed later
    ]
    summary = analyze(raw)
    # must reflect the 2-day-ago run, not the 10-day-ago one
    assert summary.days_since_last_run <= 3


def test_analyze_handles_no_runs():
    summary = analyze([])
    assert summary.days_since_last_run == 999
    assert all(w.num_runs == 0 for w in summary.weeks)
    assert summary.mileage_change_pct is None
    assert summary.pace_trend == "insufficient_data"
    assert "returning_after_break" in summary.signals  # 999 > BREAK_DAYS