"""Generate a combined evaluation report summarizing all results."""

import json
from pathlib import Path
from typing import Dict, Optional

from loguru import logger


def generate_report(
    results_dir: str,
    baseline_report: Optional[Dict] = None,
    distilbert_report: Optional[Dict] = None,
    ragas_output: Optional[Dict] = None,
) -> str:
    """Generate a markdown summary report of all evaluation results.

    Args:
        results_dir: Directory containing result artifacts.
        baseline_report: Baseline classification report dict (optional, loads from file if None).
        distilbert_report: DistilBERT classification report dict (optional, loads from file if None).
        ragas_output: RAGAS evaluation output dict (optional, loads from file if None).

    Returns:
        Markdown report string.
    """
    base = Path(results_dir)

    # Load from file if not provided
    if baseline_report is None:
        p = base / "baseline_classification_report.json"
        if p.exists():
            with open(p) as f:
                baseline_report = json.load(f)

    if distilbert_report is None:
        p = base / "classification_report.json"
        if p.exists():
            with open(p) as f:
                distilbert_report = json.load(f)

    if ragas_output is None:
        p = base / "ragas_scores.json"
        if p.exists():
            with open(p) as f:
                ragas_output = json.load(f)

    lines = [
        "# Customer Support Agent — Evaluation Report",
        "",
        "## Classification Results",
        "",
    ]

    if baseline_report:
        b_f1 = baseline_report.get("weighted avg", {}).get("f1-score", "N/A")
        b_acc = baseline_report.get("accuracy", "N/A")
        lines += [
            "### Baseline (TF-IDF + Logistic Regression)",
            f"- **Weighted F1**: {b_f1:.4f}" if isinstance(b_f1, float) else f"- **Weighted F1**: {b_f1}",
            f"- **Accuracy**: {b_acc:.4f}" if isinstance(b_acc, float) else f"- **Accuracy**: {b_acc}",
            "",
        ]

    if distilbert_report:
        d_f1 = distilbert_report.get("weighted avg", {}).get("f1-score", "N/A")
        d_acc = distilbert_report.get("accuracy", "N/A")
        lines += [
            "### DistilBERT Fine-tuned",
            f"- **Weighted F1**: {d_f1:.4f}" if isinstance(d_f1, float) else f"- **Weighted F1**: {d_f1}",
            f"- **Accuracy**: {d_acc:.4f}" if isinstance(d_acc, float) else f"- **Accuracy**: {d_acc}",
            "",
        ]

    if ragas_output and "aggregate" in ragas_output:
        agg = ragas_output["aggregate"]
        lines += [
            "## RAGAS Evaluation",
            "",
            f"- **Queries evaluated**: {ragas_output.get('n_evaluated', 'N/A')}",
            f"- **Flagged (low faithfulness)**: {ragas_output.get('n_flagged', 'N/A')} "
            f"({ragas_output.get('pct_flagged', 0.0):.1f}%)",
            "",
        ]
        for metric, stats in agg.items():
            lines += [
                f"### {metric.replace('_', ' ').title()}",
                f"- Mean: {stats['mean']:.4f}",
                f"- Median: {stats['median']:.4f}",
                f"- Std: {stats['std']:.4f}",
                f"- Min / Max: {stats['min']:.4f} / {stats['max']:.4f}",
                "",
            ]

    report = "\n".join(lines)
    path = base / "evaluation_report.md"
    path.write_text(report)
    logger.info(f"Saved evaluation report → {path}")
    return report