"""Central configuration: paths, seeds, model names, severity mapping."""
from __future__ import annotations

from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]
DATA_DIR = ROOT / "data"
MODELS_DIR = ROOT / "models"
OUTPUTS_DIR = ROOT / "outputs"
NOTEBOOKS_DIR = ROOT / "notebooks"

for _p in (DATA_DIR, MODELS_DIR, OUTPUTS_DIR):
    _p.mkdir(exist_ok=True, parents=True)

CUAD_HF_ID = "dvgodoy/CUAD_v1_Contract_Understanding_clause_classification"
CUAD_PARQUET = DATA_DIR / "cuad.parquet"

EMBED_MODEL_NAME = "BAAI/bge-small-en-v1.5"
EMBED_DIM = 384

SEED = 42
TEST_SIZE = 0.20

# EPCC severity tiers over CUAD's 41 clause labels, viewed from an EPC
# contractor's perspective. "Critical" labels are the ones where a false
# negative creates direct commercial exposure (uncapped liability, missing
# LD/insurance terms, termination economics). "Moderate" affect rights and
# downstream operations. "Low" are bookkeeping fields useful for indexing.
SEVERITY_TIERS: dict[str, list[str]] = {
    "critical": [
        "Uncapped Liability",
        "Cap On Liability",
        "Liquidated Damages",
        "Termination For Convenience",
        "Insurance",
        "Warranty Duration",
        "Covenant Not To Sue",
        "Source Code Escrow",
    ],
    "moderate": [
        "Change Of Control",
        "Anti-Assignment",
        "Audit Rights",
        "Governing Law",
        "Non-Compete",
        "Exclusivity",
        "No-Solicit Of Customers",
        "No-Solicit Of Employees",
        "Non-Disparagement",
        "Rofr/Rofo/Rofn",
        "Revenue/Profit Sharing",
        "Price Restrictions",
        "Minimum Commitment",
        "Volume Restriction",
        "Ip Ownership Assignment",
        "Joint Ip Ownership",
        "License Grant",
        "Non-Transferable License",
        "Affiliate License-Licensor",
        "Affiliate License-Licensee",
        "Unlimited/All-You-Can-Eat-License",
        "Irrevocable Or Perpetual License",
        "Post-Termination Services",
        "Third Party Beneficiary",
        "Competitive Restriction Exception",
        "Notice Period To Terminate Renewal",
        "Renewal Term",
    ],
    "low": [
        "Document Name",
        "Parties",
        "Agreement Date",
        "Effective Date",
        "Expiration Date",
        "Most Favored Nation",
    ],
}


def label_to_severity() -> dict[str, str]:
    """Flat label -> severity tier mapping."""
    out: dict[str, str] = {}
    for tier, labels in SEVERITY_TIERS.items():
        for lab in labels:
            out[lab] = tier
    return out


# Confidence threshold below which the classifier abstains (routes to human
# review). Chosen empirically from the calibration sweep and overridable.
DEFAULT_ABSTAIN_THRESHOLD = 0.45