"""
Dynamic dataset generation for Government Fraud Detection tasks.

This module perturbs the base synthetic documents at episode reset time to
reduce memorization while keeping the task schema and key identifiers stable.
"""

from __future__ import annotations

import copy
import random
from datetime import date, timedelta
from typing import Any, Dict, Optional

from .documents import TASK1_DOCUMENTS, TASK2_DOCUMENTS, TASK3_DOCUMENTS


def _rng(seed: Optional[int] = None) -> random.Random:
    return random.Random(seed) if seed is not None else random.Random()


def _shift_date(r: random.Random, iso_date: str, max_days: int = 21) -> str:
    year, month, day = [int(x) for x in iso_date.split("-")]
    d = date(year, month, day)
    return (d + timedelta(days=r.randint(-max_days, max_days))).isoformat()


def _jitter_amount(r: random.Random, value: float, pct: float) -> float:
    factor = 1.0 + r.uniform(-pct, pct)
    return round(value * factor, 2)


def _add_noise_documents(
    docs: Dict[str, Dict[str, Any]],
    r: random.Random,
    count: int,
    prefix: str,
    doc_type: str,
    title_prefix: str,
    preview_prefix: str,
) -> None:
    """Add distractor documents that look plausible but are irrelevant."""
    for index in range(1, count + 1):
        suffix = r.randint(100, 999)
        doc_id = f"{prefix}-D{index:02d}-{suffix}"
        docs[doc_id] = {
            "doc_type": doc_type,
            "title": f"{title_prefix} {index:02d} / Ref {suffix}",
            "preview": f"{preview_prefix} {suffix}",
            "content": {
                "note": "Irrelevant distractor document generated for robustness testing",
                "reference": suffix,
                "seeded_variant": True,
            },
        }


def generate_dynamic_documents(task_id: str, seed: Optional[int] = None) -> Dict[str, Dict[str, Any]]:
    """
    Return per-episode task documents with randomized surface details.

    Notes:
    - Key document IDs remain unchanged so existing policies and graders still work.
    - Ground-truth relationships remain intact; only surface details are varied.
    """
    r = _rng(seed)

    if task_id == "duplicate_billing":
        docs = copy.deepcopy(TASK1_DOCUMENTS)

        patient_names = ["Robert Haines", "Martha Stone", "Peter Larson", "Nina Ortiz", "Daniel Brooks"]
        provider_names = ["MedCorp Associates LLC", "Summit Care Group", "Northfield Medical Partners"]
        patient_name = r.choice(patient_names)
        provider_name = r.choice(provider_names)
        patient_alias = f"{patient_name} (obfuscated)" if r.random() < 0.5 else patient_name
        provider_alias = r.choice([
            provider_name,
            provider_name.replace("Associates", "Assoc."),
            provider_name.replace("Partners", "Ptnrs"),
        ])
        service_date = _shift_date(r, "2024-03-15", max_days=30)
        near_service_date = _shift_date(r, service_date, max_days=2)
        billed_amount = _jitter_amount(r, 185.0, pct=0.12)

        for claim_id, doc in docs.items():
            content = doc["content"]
            content["submitted_date"] = _shift_date(r, content["submitted_date"], max_days=30)
            content["billed_amount"] = _jitter_amount(r, float(content["billed_amount"]), pct=0.15)

            if claim_id in {"CLAIM-001", "CLAIM-002", "CLAIM-004"}:
                content["patient_name"] = patient_alias
                content["provider_name"] = provider_alias
                content["provider_id"] = "PRV-8821"
                content["procedure_code"] = "99213"
                content["billed_amount"] = billed_amount

            if claim_id in {"CLAIM-001", "CLAIM-002"}:
                content["service_date"] = service_date
            elif claim_id == "CLAIM-004":
                content["service_date"] = near_service_date

            doc["preview"] = (
                f"Patient {content['patient_id']}, Procedure {content['procedure_code']}, "
                f"Date {content['service_date']}, ${content['billed_amount']:.2f}"
            )

        _add_noise_documents(
            docs,
            r,
            count=2,
            prefix="CLAIM",
            doc_type="medicare_claim",
            title_prefix="Medicare Claim Appendix",
            preview_prefix="Extra claim reference",
        )

        return docs

    if task_id == "shell_company":
        docs = copy.deepcopy(TASK2_DOCUMENTS)

        contract_1 = docs["CONTRACT-001"]["content"]
        contract_2 = docs["CONTRACT-002"]["content"]
        contract_1["award_date"] = _shift_date(r, contract_1["award_date"], max_days=60)
        contract_2["award_date"] = _shift_date(r, contract_2["award_date"], max_days=60)
        contract_1["project"] = r.choice([
            "GSA Region 4 Facility Renovation - Atlanta",
            "Federal Archive Retrofit - Atlanta",
            "Regional Operations Building Upgrade - Atlanta",
        ])
        contract_2["project"] = r.choice([
            "DOT Regional Office Network Upgrade",
            "Transport Systems Security Refresh",
            "Regional Data Backbone Modernization",
        ])

        vendor_reg = docs["VENDOR-REG-001"]["content"]
        vendor_reg["legal_name"] = r.choice(["FastBuild LLC", "FastBuild Group LLC", "FastBuild Holdings LLC"])
        vendor_reg["annual_revenue_reported"] = int(_jitter_amount(r, float(vendor_reg["annual_revenue_reported"]), pct=0.35))
        vendor_reg["employees_reported"] = max(2, int(round(_jitter_amount(r, float(vendor_reg["employees_reported"]), pct=0.4))))

        docs["TRUST-DOC-001"]["content"]["trust_date"] = _shift_date(r, docs["TRUST-DOC-001"]["content"]["trust_date"], max_days=180)
        docs["INVOICE-001"]["content"]["invoice_date"] = _shift_date(r, docs["INVOICE-001"]["content"]["invoice_date"], max_days=45)
        docs["GOV-EMPLOYEE-001"]["content"]["title"] = r.choice([
            "Senior Contracting Officer",
            "Contracting Officer IV",
            "Federal Procurement Officer",
        ])

        docs["CONTRACT-001"]["preview"] = (
            f"FastBuild LLC awarded ${contract_1['award_amount'] / 1_000_000:.2f}M for {contract_1['project']}"
        )
        docs["CONTRACT-002"]["preview"] = (
            f"FastBuild LLC awarded ${contract_2['award_amount'] / 1_000_000:.2f}M for {contract_2['project']}"
        )
        docs["VENDOR-REG-001"]["preview"] = (
            "FastBuild LLC, Delaware LLC, "
            f"reported revenue ${vendor_reg['annual_revenue_reported']:,}"
        )
        docs["TRUST-DOC-001"]["preview"] = (
            f"Trustee: {docs['TRUST-DOC-001']['content']['trustee']}. "
            f"Trust date {docs['TRUST-DOC-001']['content']['trust_date']}"
        )

        _add_noise_documents(
            docs,
            r,
            count=2,
            prefix="CORP",
            doc_type="corporate_filing",
            title_prefix="State Filing Appendix",
            preview_prefix="Irrelevant state filing ref",
        )

        return docs

    if task_id == "fca_complaint":
        docs = copy.deepcopy(TASK3_DOCUMENTS)

        tip = docs["ANON-TIP-001"]["content"]
        tip["received_date"] = _shift_date(r, tip["received_date"], max_days=75)
        tip["estimated_fraud_amount"] = r.choice([
            "Could be in the high single-digit millions",
            "Likely over $8M based on claims volume",
            "Potentially between $8M and $12M",
        ])
        tip["tipster_relation"] = r.choice([
            "Former billing department employee",
            "Former coding specialist",
            "Former claims auditor",
        ])

        claim_batch = docs["CMS-CLAIM-BATCH-001"]["content"]
        total_claims = int(round(_jitter_amount(r, float(claim_batch["total_claims"]), pct=0.1)))
        total_claims = max(700, min(950, total_claims))
        claim_batch["total_claims"] = total_claims
        claim_batch["comparison_industry_avg_k0831_claims"] = max(
            90,
            min(180, int(round(_jitter_amount(r, 120.0, pct=0.2)))),
        )

        sampled_orders = docs["PHYSICIAN-ORDERS-001"]["content"]
        sampled_orders["orders_supporting_k0831"] = r.randint(10, 16)
        sampled_orders["orders_not_supporting_k0831"] = 50 - sampled_orders["orders_supporting_k0831"]
        sampled_orders["orders_missing_entirely"] = r.randint(5, 11)

        complaints = docs["PATIENT-COMPLAINT-001"]["content"]
        complaints["complaints_reviewed"] = r.randint(20, 35)

        expert = docs["EXPERT-ANALYSIS-001"]["content"]["findings"]
        expert["non_compliant_claims_estimated_pct"] = r.randint(70, 82)
        expert["estimated_non_compliant_claims"] = int(
            total_claims * (expert["non_compliant_claims_estimated_pct"] / 100.0)
        )

        docs["CMS-CLAIM-BATCH-001"]["preview"] = (
            f"{total_claims} claims for K0831 power wheelchair, ${claim_batch['total_billed'] / 1_000_000:.1f}M total"
        )
        docs["PHYSICIAN-ORDERS-001"]["preview"] = (
            f"{sampled_orders['orders_not_supporting_k0831']} of 50 sampled physician orders do not support K0831 level"
        )
        docs["PATIENT-COMPLAINT-001"]["preview"] = (
            f"{complaints['complaints_reviewed']} complaints: received wrong equipment or no equipment"
        )
        docs["EXPERT-ANALYSIS-001"]["preview"] = (
            "Expert estimates improper Medicare payments in multi-million range "
            f"with {expert['non_compliant_claims_estimated_pct']}% non-compliance"
        )

        _add_noise_documents(
            docs,
            r,
            count=3,
            prefix="MISC",
            doc_type="misc_ledger",
            title_prefix="Miscellaneous Appendix",
            preview_prefix="Irrelevant operational note",
        )

        return docs

    raise ValueError(f"Unsupported task_id for dynamic generation: {task_id}")