import json
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np


def read_json(path):
    return json.loads(Path(path).read_text(encoding="utf-8"))


def pick_models(files):
    items = []
    for p in files:
        try:
            j = read_json(p)
            items.append((Path(p).stem, j))
        except Exception:
            pass
    return items


def metric_value(obj, scope, lang, metric):
    if scope == "overall":
        return obj.get("overall", {}).get(metric, None)
    if scope == "by_lang":
        return obj.get("by_lang", {}).get(lang, {}).get(metric, None)
    return None


def section(obj, scope, lang):
    if scope == "overall":
        return obj.get("overall", {})
    if scope == "by_lang":
        return obj.get("by_lang", {}).get(lang, {})
    return {}


def rank_stat_value(obj, scope, lang, key):
    return section(obj, scope, lang).get("rank_stats", {}).get(key, None)


def score_stat_value(obj, scope, lang, group, key):
    return section(obj, scope, lang).get("score_stats", {}).get(group, {}).get(key, None)


def coverage_value(obj, scope, lang, key):
    return section(obj, scope, lang).get("coverage", {}).get(key, None)


def distribution_value(obj, scope, lang, key):
    return section(obj, scope, lang).get("distributions", {}).get(key, [])


def save_recall_plot(models, scope, lang, out_path):
    ks = [1, 3, 5, 10]
    x = np.arange(len(ks))
    width = 0.8 / max(1, len(models))

    plt.figure()
    for i, (name, obj) in enumerate(models):
        vals = []
        for k in ks:
            v = metric_value(obj, scope, lang, f"recall@{k}")
            vals.append(0.0 if v is None else float(v))
        plt.bar(
            x + (i - (len(models) - 1) / 2) * width,
            vals,
            width=width,
            label=obj.get("model", name),
        )

    plt.xticks(x, [f"@{k}" for k in ks])
    title = "Recall@k"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("score")
    ymax = max(
        [0.0]
        + [
            max(
                [
                    metric_value(o, scope, lang, f"recall@{k}") or 0.0
                    for k in ks
                ]
            )
            for _, o in models
        ]
    )
    plt.ylim(0, min(1.0, max(0.05, ymax * 1.2)))
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_rank_metrics_plot(models, scope, lang, out_path):
    metrics = ["mrr@10", "ndcg@10"]
    x = np.arange(len(metrics))
    width = 0.8 / max(1, len(models))

    plt.figure()
    for i, (name, obj) in enumerate(models):
        vals = []
        for m in metrics:
            v = metric_value(obj, scope, lang, m)
            vals.append(0.0 if v is None else float(v))
        plt.bar(
            x + (i - (len(models) - 1) / 2) * width,
            vals,
            width=width,
            label=obj.get("model", name),
        )

    plt.xticks(x, metrics)
    title = "Ranking metrics"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("score")
    ymax = max(
        [0.0]
        + [
            max([metric_value(o, scope, lang, m) or 0.0 for m in metrics])
            for _, o in models
        ]
    )
    plt.ylim(0, min(1.0, max(0.05, ymax * 1.2)))
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_precision_plot(models, scope, lang, out_path):
    ks = [1, 3, 5, 10]
    x = np.arange(len(ks))
    width = 0.8 / max(1, len(models))

    plt.figure()
    any_data = False
    for i, (name, obj) in enumerate(models):
        vals = []
        for k in ks:
            v = metric_value(obj, scope, lang, f"precision@{k}")
            if v is not None:
                any_data = True
            vals.append(0.0 if v is None else float(v))
        plt.bar(
            x + (i - (len(models) - 1) / 2) * width,
            vals,
            width=width,
            label=obj.get("model", name),
        )

    if not any_data:
        plt.close()
        return

    plt.xticks(x, [f"@{k}" for k in ks])
    title = "Precision@k (single-positive)"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("score")
    ymax = max(
        [0.0]
        + [
            max(
                [
                    metric_value(o, scope, lang, f"precision@{k}") or 0.0
                    for k in ks
                ]
            )
            for _, o in models
        ]
    )
    plt.ylim(0, min(1.0, max(0.05, ymax * 1.2)))
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_recall_curve_plot(models, scope, lang, out_path):
    ks = [1, 3, 5, 10]
    xs = np.array(ks, dtype=float)

    plt.figure()
    for name, obj in models:
        ys = []
        for k in ks:
            v = metric_value(obj, scope, lang, f"recall@{k}")
            ys.append(0.0 if v is None else float(v))
        plt.plot(xs, ys, marker="o", label=obj.get("model", name))

    plt.xticks(xs, [f"@{k}" for k in ks])
    title = "Recall@k vs k"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.xlabel("k")
    plt.ylabel("recall")
    ymax = max(
        [0.0]
        + [
            max(
                [
                    metric_value(o, scope, lang, f"recall@{k}") or 0.0
                    for k in ks
                ]
            )
            for _, o in models
        ]
    )
    plt.ylim(0, min(1.0, max(0.05, ymax * 1.2)))
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_rank_stats_plot(models, scope, lang, out_path):
    metrics = [("mean_rank", "Mean"), ("median_rank", "Median"), ("p90_rank", "P90")]
    x = np.arange(len(metrics))
    width = 0.8 / max(1, len(models))

    plt.figure()
    any_data = False
    for i, (name, obj) in enumerate(models):
        vals = []
        for key, _ in metrics:
            v = rank_stat_value(obj, scope, lang, key)
            if v is not None:
                any_data = True
            vals.append(np.nan if v is None else float(v))
        plt.bar(
            x + (i - (len(models) - 1) / 2) * width,
            vals,
            width=width,
            label=obj.get("model", name),
        )

    if not any_data:
        plt.close()
        return

    plt.xticks(x, [m[1] for m in metrics])
    title = "Rank stats (1-based)"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("rank")
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_rank_distribution_plot(models, scope, lang, out_path, not_found_out_path=None):
    top_k = None
    for _, obj in models:
        if "top_k" in obj:
            top_k = int(obj["top_k"])
            break
    if top_k is None:
        return

    x = np.arange(top_k)
    width = 0.8 / max(1, len(models))

    plt.figure()
    any_data = False
    not_found_rates = []
    not_found_labels = []
    for i, (name, obj) in enumerate(models):
        ranks = distribution_value(obj, scope, lang, "ranks")
        if not ranks:
            continue
        any_data = True
        buckets = [0] * top_k
        not_found = 0
        for r in ranks:
            if r is None or r < 0 or r >= top_k:
                not_found += 1
            else:
                buckets[int(r)] += 1
        total = max(1, len(ranks))
        not_found_rates.append(not_found / total)
        not_found_labels.append(obj.get("model", name))
        found_total = total - not_found
        if found_total <= 0:
            vals = [0.0] * top_k
        else:
            vals = [b / found_total for b in buckets]
        plt.bar(
            x + (i - (len(models) - 1) / 2) * width,
            vals,
            width=width,
            label=obj.get("model", name),
        )

    if not any_data:
        plt.close()
        return

    labels = [str(i + 1) for i in range(top_k)]
    plt.xticks(x, labels)
    title = "Rank distribution (found only)"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("share of found queries")
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()

    if not_found_out_path and not_found_rates:
        plt.figure()
        x_nf = np.arange(len(not_found_rates))
        plt.bar(x_nf, not_found_rates)
        plt.xticks(x_nf, not_found_labels, rotation=15, ha="right")
        title = "Not found rate (NF)"
        if scope == "overall":
            plt.title(f"{title} (overall)")
        else:
            plt.title(f"{title} ({lang})")
        plt.ylabel("share of queries")
        plt.ylim(0, 1.0)
        Path(not_found_out_path).parent.mkdir(parents=True, exist_ok=True)
        plt.tight_layout()
        plt.savefig(not_found_out_path, dpi=180)
        plt.close()


def save_margin_boxplot(models, scope, lang, out_path):
    data = []
    labels = []
    for name, obj in models:
        margins = distribution_value(obj, scope, lang, "margins")
        if margins:
            data.append(margins)
            labels.append(obj.get("model", name))

    if not data:
        return

    plt.figure(figsize=(8, 4.5))
    plt.boxplot(data, labels=labels, showfliers=False)
    title = "Score margin (top1 - top2)"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("margin")
    plt.xticks(rotation=15, ha="right")
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_coverage_plot(models, scope, lang, out_path):
    vals = []
    labels = []
    for name, obj in models:
        v = coverage_value(obj, scope, lang, "coverage_ratio")
        if v is not None:
            vals.append(float(v))
            labels.append(obj.get("model", name))

    if not vals:
        return

    x = np.arange(len(vals))
    plt.figure()
    plt.bar(x, vals)
    plt.xticks(x, labels, rotation=15, ha="right")
    title = "Coverage ratio (unique docs / corpus)"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("ratio")
    plt.ylim(0, 1.0)
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def _grouped_model_bars(models, value_fn, out_path, title, ylabel):
    labels = []
    overall_vals = []
    ru_vals = []
    kz_vals = []

    for name, obj in models:
        label = obj.get("model", name)
        labels.append(label)
        overall_vals.append(value_fn(obj, "overall", None))
        ru_vals.append(value_fn(obj, "by_lang", "ru"))
        kz_vals.append(value_fn(obj, "by_lang", "kz"))

    if not labels:
        return

    x = np.arange(len(labels))
    width = 0.25

    plt.figure(figsize=(9, 4.8))
    plt.bar(x - width, [0.0 if v is None else float(v) for v in overall_vals], width, label="overall")
    plt.bar(x, [0.0 if v is None else float(v) for v in ru_vals], width, label="ru")
    plt.bar(x + width, [0.0 if v is None else float(v) for v in kz_vals], width, label="kz")
    plt.title(title)
    plt.ylabel(ylabel)
    plt.ylim(0, 1.0)
    plt.xticks(x, labels, rotation=15, ha="right")
    plt.grid(axis="y", alpha=0.2)
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_coverage_grouped(models, out_path):
    def value_fn(obj, scope, lang):
        return coverage_value(obj, scope, lang, "coverage_ratio")

    _grouped_model_bars(
        models,
        value_fn,
        out_path,
        "Coverage (overall/ru/kz)",
        "ratio",
    )


def save_not_found_grouped(models, out_path):
    def value_fn(obj, scope, lang):
        v = metric_value(obj, scope, lang, "not_found_rate")
        if v is None:
            v = rank_stat_value(obj, scope, lang, "not_found_rate")
        return v

    _grouped_model_bars(
        models,
        value_fn,
        out_path,
        "Not found rate (overall/ru/kz)",
        "share of queries",
    )


def save_top1_score_hist(models, scope, lang, out_dir):
    for name, obj in models:
        tp = distribution_value(obj, scope, lang, "top1_scores_tp")
        fp = distribution_value(obj, scope, lang, "top1_scores_fp")
        if not tp and not fp:
            continue
        plt.figure()
        if tp:
            plt.hist(tp, bins=20, alpha=0.6, label="top-1 is positive")
        if fp:
            plt.hist(fp, bins=20, alpha=0.6, label="top-1 is not positive")
        title = "Top-1 score distribution"
        label = obj.get("model", name)
        if scope == "overall":
            plt.title(f"{title} ({label}, overall)")
        else:
            plt.title(f"{title} ({label}, {lang})")
        plt.xlabel("similarity score")
        plt.ylabel("count")
        plt.legend()
        Path(out_dir).mkdir(parents=True, exist_ok=True)
        out_path = (
            Path(out_dir)
            / f"top1_score_tp_fp_{model_label_key(obj, name)}_{scope if scope else 'overall'}{'' if lang is None else '_' + lang}.png"
        )
        plt.tight_layout()
        plt.savefig(out_path, dpi=180)
        plt.close()


def save_metrics_heatmap(models, out_path):
    metrics = ["recall@1", "recall@3", "recall@5", "recall@10", "mrr@10", "ndcg@10", "not_found_rate"]
    data = []
    labels = []
    for name, obj in models:
        labels.append(obj.get("model", name))
        row = []
        for m in metrics:
            v = metric_value(obj, "overall", None, m)
            if v is None:
                v = rank_stat_value(obj, "overall", None, m)
            row.append(0.0 if v is None else float(v))
        data.append(row)

    if not data:
        return

    arr = np.array(data)
    plt.figure(figsize=(9, 3.8))
    im = plt.imshow(arr, aspect="auto", cmap="viridis")
    plt.yticks(np.arange(len(labels)), labels)
    plt.xticks(np.arange(len(metrics)), metrics, rotation=30, ha="right")
    plt.title("Metrics heatmap (overall)")
    plt.colorbar(im, fraction=0.046, pad=0.04)
    plt.tight_layout()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_rank_cdf(models, out_path):
    top_k = None
    for _, obj in models:
        if "top_k" in obj:
            top_k = int(obj["top_k"])
            break
    if top_k is None:
        return

    ks = np.arange(1, top_k + 1)
    plt.figure(figsize=(8.5, 4.5))
    for name, obj in models:
        ranks = distribution_value(obj, "overall", None, "ranks")
        if not ranks:
            continue
        total = max(1, len(ranks))
        ys = []
        for k in ks:
            found = sum(1 for r in ranks if r is not None and r >= 0 and r < k)
            ys.append(found / total)
        plt.plot(ks, ys, marker="o", label=obj.get("model", name))

    plt.title("Rank CDF (overall)")
    plt.xlabel("k")
    plt.ylabel("share of queries with rank ≤ k")
    plt.ylim(0, 1.0)
    plt.grid(alpha=0.2)
    plt.legend()
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def save_confidence_scatter(models, out_path):
    xs = []
    ys = []
    labels = []
    for name, obj in models:
        mrr = metric_value(obj, "overall", None, "mrr@10")
        top1 = score_stat_value(obj, "overall", None, "top1_score", "mean")
        if mrr is None or top1 is None:
            continue
        xs.append(float(top1))
        ys.append(float(mrr))
        labels.append(obj.get("model", name))

    if not xs:
        return

    plt.figure(figsize=(6.5, 4.5))
    plt.scatter(xs, ys, s=60)
    for x, y, label in zip(xs, ys, labels):
        plt.text(x, y, f" {label}", fontsize=9, ha="left", va="center")
    plt.title("Top-1 confidence vs MRR@10 (overall)")
    plt.xlabel("mean top-1 score")
    plt.ylabel("mrr@10")
    plt.grid(alpha=0.2)
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def model_label_key(obj, name):
    s = str(obj.get("model", name)).lower()
    if "labse" in s:
        return "labse"
    if "finetuned" in s or "artifacts" in s:
        return "finetuned"
    if "paraphrase-multilingual-mpnet-base-v2" in s:
        return "base"
    if "mpnet" in s:
        return "base"
    return name.lower()


def select_model(models, key):
    for name, obj in models:
        if model_label_key(obj, name) == key:
            return (name, obj)
    return None


def save_relative_improvement_plot(models, scope, lang, out_path):
    fin = select_model(models, "finetuned")
    base = select_model(models, "base")
    if fin is None or base is None:
        return

    metrics = ["recall@1", "recall@3", "recall@5", "recall@10", "mrr@10", "ndcg@10"]
    labels = ["R@1", "R@3", "R@5", "R@10", "MRR@10", "nDCG@10"]

    fin_obj = fin[1]
    base_obj = base[1]

    vals = []
    for m in metrics:
        fv = metric_value(fin_obj, scope, lang, m)
        bv = metric_value(base_obj, scope, lang, m)
        fv = 0.0 if fv is None else float(fv)
        bv = 0.0 if bv is None else float(bv)
        if bv <= 0:
            vals.append(np.nan)
        else:
            vals.append((fv - bv) / bv * 100.0)

    x = np.arange(len(metrics))
    plt.figure()
    plt.bar(x, vals)
    plt.xticks(x, labels)
    title = "Relative improvement vs base (%)"
    if scope == "overall":
        plt.title(f"{title} (overall)")
    else:
        plt.title(f"{title} ({lang})")
    plt.ylabel("%")
    plt.axhline(0.0)
    Path(out_path).parent.mkdir(parents=True, exist_ok=True)
    plt.tight_layout()
    plt.savefig(out_path, dpi=180)
    plt.close()


def main():
    reports_dir = Path("artifacts/reports")
    files = sorted([str(p) for p in reports_dir.glob("eval_*.json")])
    models = pick_models(files)

    if not models:
        raise SystemExit("No eval_*.json found in artifacts/reports")

    fig_dir = reports_dir / "figures"
    fig_dir.mkdir(parents=True, exist_ok=True)

    save_recall_plot(models, "overall", None, fig_dir / "recall_overall.png")
    save_rank_metrics_plot(models, "overall", None, fig_dir / "rank_metrics_overall.png")
    save_recall_curve_plot(models, "overall", None, fig_dir / "recall_curve_overall.png")
    save_relative_improvement_plot(models, "overall", None, fig_dir / "relative_improvement_overall.png")
    save_precision_plot(models, "overall", None, fig_dir / "precision_overall.png")
    save_rank_stats_plot(models, "overall", None, fig_dir / "rank_stats_overall.png")
    save_rank_distribution_plot(
        models,
        "overall",
        None,
        fig_dir / "rank_distribution_overall.png",
        None,
    )
    save_margin_boxplot(models, "overall", None, fig_dir / "score_margin_overall.png")
    # Per-scope coverage plots removed in favor of grouped chart.
    save_top1_score_hist(models, "overall", None, fig_dir)
    save_coverage_grouped(models, fig_dir / "coverage_grouped.png")
    save_not_found_grouped(models, fig_dir / "not_found_grouped.png")
    save_metrics_heatmap(models, fig_dir / "metrics_heatmap_overall.png")
    save_rank_cdf(models, fig_dir / "rank_cdf_overall.png")
    save_confidence_scatter(models, fig_dir / "confidence_scatter_overall.png")

    for lang in ["ru", "kz"]:
        save_recall_plot(models, "by_lang", lang, fig_dir / f"recall_{lang}.png")
        save_rank_metrics_plot(
            models, "by_lang", lang, fig_dir / f"rank_metrics_{lang}.png"
        )
        save_recall_curve_plot(
            models, "by_lang", lang, fig_dir / f"recall_curve_{lang}.png"
        )
        save_relative_improvement_plot(
            models, "by_lang", lang, fig_dir / f"relative_improvement_{lang}.png"
        )
        save_precision_plot(models, "by_lang", lang, fig_dir / f"precision_{lang}.png")
        save_rank_stats_plot(models, "by_lang", lang, fig_dir / f"rank_stats_{lang}.png")
        save_rank_distribution_plot(
            models,
            "by_lang",
            lang,
            fig_dir / f"rank_distribution_{lang}.png",
            None,
        )

    summary = {
        "loaded_reports": [Path(f).name for f in files],
        "figures": [p.name for p in sorted(fig_dir.glob("*.png"))],
    }
    (reports_dir / "figures_summary.json").write_text(
        json.dumps(summary, ensure_ascii=False, indent=2),
        encoding="utf-8",
    )


if __name__ == "__main__":
    main()