"""Benchmark model size + inference latency against the budget.

Budgets:
  - combined .joblib size < 50 MB
  - average latency per message < 200 ms (CPU)
  - p99 latency per message < 200 ms (CPU)

Sample: 100 random messages from data/conversations.csv (one turn each).
"""

from __future__ import annotations

import random
import re
import statistics
import time
from pathlib import Path

import pandas as pd

from src.part2_intent.infer import classify, _load, VEC_PATH, MODEL_PATH

ROOT = Path(__file__).resolve().parents[2]
DATA = ROOT / "data" / "conversations.csv"

SIZE_BUDGET_MB = 50.0
LATENCY_BUDGET_MS = 200.0
N_SAMPLES = 100
SEED = 7

USER_PREFIX = re.compile(r"^\s*User\s*\d+\s*:\s*", re.I)


def sample_messages(n: int, seed: int) -> list[str]:
    df = pd.read_csv(DATA, header=None, names=["conversation"])
    rng = random.Random(seed)
    msgs: list[str] = []
    rows = df["conversation"].dropna().tolist()
    rng.shuffle(rows)
    for conv in rows:
        for line in conv.split("\n"):
            line = USER_PREFIX.sub("", line).strip()
            if line:
                msgs.append(line)
                if len(msgs) >= n:
                    return msgs
    return msgs


def main():
    size_bytes = VEC_PATH.stat().st_size + MODEL_PATH.stat().st_size
    size_mb = size_bytes / (1024 * 1024)

    # warm the model so first-call load doesn't pollute timings
    _load()

    msgs = sample_messages(N_SAMPLES, SEED)
    latencies: list[float] = []
    for m in msgs:
        # use the wall-clock around classify() rather than the internal
        # latency_ms, so we measure end-to-end and not just the inner call
        t0 = time.perf_counter()
        classify(m)
        latencies.append((time.perf_counter() - t0) * 1000.0)

    avg = statistics.mean(latencies)
    p50 = statistics.median(latencies)
    p99 = sorted(latencies)[max(0, int(round(0.99 * len(latencies))) - 1)]
    mx = max(latencies)

    checks = [
        ("model size",       f"{size_mb:.2f} MB",  f"< {SIZE_BUDGET_MB:.0f} MB", size_mb < SIZE_BUDGET_MB),
        ("avg latency",      f"{avg:.2f} ms",     f"< {LATENCY_BUDGET_MS:.0f} ms", avg < LATENCY_BUDGET_MS),
        ("p99 latency",      f"{p99:.2f} ms",     f"< {LATENCY_BUDGET_MS:.0f} ms", p99 < LATENCY_BUDGET_MS),
    ]

    print(f"benchmark over {len(msgs)} messages")
    print(f"  vectorizer.joblib : {VEC_PATH.stat().st_size/1024:.1f} KB")
    print(f"  intent_model.joblib: {MODEL_PATH.stat().st_size/1024:.1f} KB")
    print(f"  combined size      : {size_mb:.2f} MB")
    print()
    print(f"  avg latency : {avg:.2f} ms")
    print(f"  p50 latency : {p50:.2f} ms")
    print(f"  p99 latency : {p99:.2f} ms")
    print(f"  max latency : {mx:.2f} ms")
    print()
    print(f"  {'check':<14s} {'value':>14s} {'budget':>14s} {'result':>8s}")
    print(f"  {'-'*14:<14s} {'-'*14:>14s} {'-'*14:>14s} {'-'*8:>8s}")
    all_pass = True
    for name, val, budget, ok in checks:
        flag = "PASS" if ok else "FAIL"
        all_pass &= ok
        print(f"  {name:<14s} {val:>14s} {budget:>14s} {flag:>8s}")
    print()
    print("OVERALL:", "PASS" if all_pass else "FAIL")
    return 0 if all_pass else 1


if __name__ == "__main__":
    raise SystemExit(main())