"""Scheme Code Resolver
======================
Resolves missing AMFI scheme codes by fuzzy-matching the fund name from the
CSV against mfapi.in's /mf/search endpoint.

This runs as a PRE-TRIAGE step so that the NAV engine can fire for funds whose
scheme code was absent from the CSV.
"""

from __future__ import annotations

import difflib
import re
import time

import requests


MFAPI_SEARCH = "https://api.mfapi.in/mf/search"
MATCH_CUTOFF  = 0.52    # minimum SequenceMatcher ratio to accept
SLEEP_BETWEEN = 0.25    # seconds between API calls (polite rate limit)

# Manual overrides for schemes that mfapi's search endpoint does not
# currently return, but whose AMFI codes are known and stable. Keys are
# normalized fund names (see _normalize).
SCHEME_OVERRIDES: dict[str, str] = {
    # ── Pre-verified from AMFI NAV master (portal.amfiindia.com) ──────────────
    # These funds have empty scheme codes in source CSV and cannot be reliably
    # resolved via mfapi fuzzy search. Codes are Regular Plan - Growth only.

    # Existing override
    "kotak tax saver scheme growth": "109234",

    # ── Debt: Banking and PSU ─────────────────────────────────────────────────
    "hdfc banking and psu debt fund growth option":         "128628",
    "icici prudential banking and psu debt fund growth":    "112342",
    "kotak banking and psu debt growth":                    "123690",
    "invesco india banking and psu fund growth option":     "118232",
    "sundaram banking psu fund formerly known as sundaram banking and psu debt fund regular plan growth": "100784",
    "hsbc banking and psu debt fund regular growth":        "151104",
    "iti banking psu debt fund regular plan growth option": "148535",

    # ── Debt: Liquid ──────────────────────────────────────────────────────────
    "dsp liquidity fund regular plan growth":               "119120",
    "invesco india liquid fund growth":                     "104488",
    "invesco india liquid fund regular growth":             "118769",
    "union liquid fund growth option":                      "115398",
    "parag parikh liquid fund regular plan growth":         "149038",
    "motilal oswal liquid fund regular growth":             "147622",
    "iti liquid fund regular plan growth option":           "147153",
    "quantum liquid fund regular plan growth option":       "103504",
    "lic mf liquid fund regular plan growth":               "120716",
    "icici prudential liquid fund growth":                  "120593",
    "aditya birla sun life liquid fund retail growth":      "100042",
    "aditya birla sun life liquid fund growth":             "100047",
    "edelweiss liquid fund regular plan growth option":     "140182",
    "edelweiss liquid fund retail plan growth option":      "119114",
    "axis liquid fund retail plan growth option":           "112090",
    "sbi liquid fund regular plan growth":                  "119822",
    "nippon india liquid fund retail option growth plan":   "100837",

    # ── Debt: Overnight ───────────────────────────────────────────────────────
    "uti overnight fund regular plan growth option":            "100814",
    "canara robeco overnight fund regular plan growth option":  "147534",
    "dsp overnight fund regular plan growth":                   "146061",
    "franklin india overnight fund growth":                     "146210",
    "bandhan overnight fund regular plan growth":               "146187",
    "iti overnight fund regular plan growth option":            "148529",
    "union overnight fund regular plan growth option":          "146997",
    "icici prudential overnight fund growth":                   "145811",
    "edelweiss overnight fund regular plan growth":             "147569",
    "lic mf overnight fund regular plan growth":                "146065",
    "hdfc overnight fund growth option":                        "145822",

    # ── Debt: Ultra Short Duration ────────────────────────────────────────────
    "icici prudential ultra short term fund growth":                    "120505",
    "invesco india ultra short duration fund growth":                   "117825",
    "uti ultra short duration fund regular plan growth option":         "102532",
    "aditya birla sun life savings fund growth regular plan":           "119293",
    "aditya birla sun life savings fund retail growth":                 "119293",
    "hdfc ultra short term fund growth option":                         "145539",
    "aditya birla sun life savings fund discipline advantage plan":     "112016",
    "pgim india ultra short duration fund growth":                      "100474",
    "iti ultra short duration fund regular plan growth option":         "148533",
    "motilal oswal ultra short term fund mofustf regular plan growth":  "124233",
    "tata ultra short term fund regular plan growth":                   "146070",
    "kotak savings fund growth":                                        "119270",
    "lic mf ultra short duration fund regular plan growth":             "147770",
    "canara robeco ultra short term fund regular plan growth option":   "119671",
    "sundaram ultra short duration fund formerly known as principal ultra short term fund growth option": "120826",
    "bank of india ultra short duration fund regular plan growth":      "109269",

    # ── Debt: Short Duration ──────────────────────────────────────────────────
    "hdfc short term debt fund growth option":                    "119247",
    "icici prudential short term fund growth option":             "101758",
    "sbi short horizon debt fund short term fund retail growth":  "106227",
    "sbi short term debt fund regular plan growth":               "119831",
    "kotak bond short term plan growth":                          "101373",
    "dsp short term fund regular plan growth":                    "119598",
    "lic mf short duration fund regular plan growth":             "145952",
    "mirae asset short duration fund regular plan growth":        "148416",
    "invesco india short duration fund growth":                   "105185",
    "canara robeco short duration fund regular plan growth option": "119675",
    "groww short duration fund formerly known as indiabulls short term fund regular plan growth option": "123708",
    "tata short term bond fund regular plan growth option":       "119802",

    # ── Debt: Medium Duration ─────────────────────────────────────────────────
    "aditya birla sun life medium term plan growth regular plan": "111803",
    "axis strategic bond fund regular plan growth option":        "116894",
    "icici prudential medium term bond fund growth":              "120841",
    "hdfc medium term debt fund growth option":                   "119238",
    "kotak medium term fund growth":                              "119281",
    "dsp bond fund growth":                                       "100078",
    "sundaram medium duration fund formerly known as sundaram medium term bond fund regular plan growth": "100603",

    # ── ETFs ──────────────────────────────────────────────────────────────────
    "hdfc nifty100 low volatility 30 etf growth option":  "145748",
    "hdfc nifty200 momentum 30 etf growth option":        "146058",
    "hdfc nifty it etf growth option":                    "120493",
    "hdfc nifty private bank etf growth option":          "145696",

    # ── Index Funds ───────────────────────────────────────────────────────────
    "dsp nifty next 50 index fund regular plan growth":         "143669",
    "uti nifty next 50 index fund regular plan growth option":  "120713",
    "motilal oswal nifty smallcap 250 index regular plan":      "147960",
    "icici prudential nifty pharma index fund growth":          "143874",
    "dsp nifty 50 index fund regular plan growth":              "143537",
    "motilal oswal nifty midcap 150 index fund regular plan":   "147068",
    "sbi nifty index fund regular plan growth":                 "135818",
    "motilal oswal nifty bank index regular plan":              "145552",
}


def _normalize(name: str) -> str:
    """Convert hyphenated CSV name to a clean lowercase string."""
    return re.sub(r"[-_]+", " ", name).strip().lower()


def _search_query(name: str) -> str:
    """Take first 6 tokens for a focused search query."""
    return " ".join(_normalize(name).split()[:6])


def _search_mfapi(query: str) -> list[dict]:
    try:
        resp = requests.get(MFAPI_SEARCH, params={"q": query}, timeout=15)
        resp.raise_for_status()
        return resp.json()
    except Exception as exc:
        print(f"  [resolver] search error for '{query}': {exc}")
        return []


def _best_match(candidates: list[dict], target_name: str) -> dict | None:
    if not candidates:
        return None
    target = _normalize(target_name)
    best_score = 0.0
    best_item  = None
    for item in candidates:
        candidate = _normalize(item.get("schemeName", ""))
        score = difflib.SequenceMatcher(None, target, candidate).ratio()
        if score > best_score:
            best_score = score
            best_item  = item
    if best_score >= MATCH_CUTOFF:
        return best_item
    return None


def _is_valid_scheme_code(code: str) -> bool:
    """AMFI scheme codes are purely numeric (e.g. 120586). Platform codes like GROWWEH are invalid."""
    return bool(code and code.isdigit())


def resolve_scheme_code_for_fund_name(
    fund_name: str,
) -> tuple[str | None, str | None]:
    """
    Resolve a scheme code for one fund name.

    Resolution order:
    1. Exact normalized-name override from SCHEME_OVERRIDES
    2. mfapi search + fuzzy best-match
    """
    norm = _normalize(fund_name)
    override_code = SCHEME_OVERRIDES.get(norm)
    if override_code:
        return override_code, "override"

    query = _search_query(fund_name)
    candidates = _search_mfapi(query)
    match = _best_match(candidates, fund_name)
    if match:
        return str(match["schemeCode"]), match.get("schemeName", "")
    return None, None


def resolve_missing_scheme_codes(
    rows: list[dict[str, str]],
    *,
    verbose: bool = True,
) -> tuple[list[dict[str, str]], dict[str, str]]:
    """
    Resolve blank scheme codes and also correct any exact-name rows whose
    current numeric code disagrees with SCHEME_OVERRIDES.

    Blank/invalid codes are resolved via SCHEME_OVERRIDES (O(1) dict lookup)
    first, then mfapi search in parallel.

    Complexity: O(N) time, O(N) space where N = funds with missing codes.
    Network I/O parallelised with ThreadPoolExecutor(20) — pure I/O bound.
    """
    from concurrent.futures import ThreadPoolExecutor, as_completed

    resolved: dict[str, str] = {}
    corrected_existing = 0

    # ── Collect rows that need resolution ─────────────────────────────────────
    target_rows: list[dict[str, str]] = []
    for row in rows:
        fund_name = (row.get("Fund") or "").strip()
        if not fund_name or fund_name.count("-") < 2 or ":" in fund_name:
            continue
        norm = _normalize(fund_name)
        raw_code = (row.get("Scheme Code") or "").strip()
        override_code = SCHEME_OVERRIDES.get(norm)

        # Future-proofing: if we know the canonical code for this exact fund name,
        # correct it even when the CSV already contains a numeric but stale code.
        if override_code and raw_code != override_code:
            row["Scheme Code"] = override_code
            resolved[fund_name] = override_code
            corrected_existing += 1
            continue

        if _is_valid_scheme_code(raw_code):
            continue
        if raw_code and not _is_valid_scheme_code(raw_code):
            row["Scheme Code"] = ""   # clear invalid platform codes e.g. GROWWEH
        target_rows.append(row)

    total_missing = len(target_rows)
    if total_missing == 0:
        if verbose:
            if corrected_existing:
                print(f"[resolver] Corrected {corrected_existing} existing scheme codes via override table.")
            else:
                print("[resolver] No missing scheme codes found.")
        return rows, resolved

    if verbose:
        print(f"[resolver] Resolving {total_missing} missing scheme codes (parallel)…")

    # ── Phase A: Override table — O(1) per fund, no network ───────────────────
    mfapi_needed: list[dict[str, str]] = []
    override_count = 0

    for row in target_rows:
        fund_name = (row.get("Fund") or "").strip()
        norm = _normalize(fund_name)
        code = SCHEME_OVERRIDES.get(norm)
        if code:
            row["Scheme Code"] = code
            resolved[fund_name] = code
            override_count += 1
        else:
            mfapi_needed.append(row)

    if verbose and override_count:
        print(f"  [resolver] {override_count} resolved via override table (instant)")
    if verbose and corrected_existing:
        print(f"  [resolver] {corrected_existing} existing codes corrected via override table")

    # ── Phase B: mfapi search — parallel ThreadPoolExecutor ───────────────────
    if not mfapi_needed:
        if verbose:
            print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved.")
        return rows, resolved

    lock = __import__("threading").Lock()
    completed = [0]

    def _resolve_one(row: dict[str, str]) -> tuple[str, str | None, str | None]:
        """Returns (fund_name, scheme_code_or_None, matched_name_or_None)."""
        fund_name  = (row.get("Fund") or "").strip()
        query      = _search_query(fund_name)
        candidates = _search_mfapi(query)
        match      = _best_match(candidates, fund_name)
        if match:
            return fund_name, str(match["schemeCode"]), match.get("schemeName", "")
        return fund_name, None, None

    # 20 workers: mfapi is pure REST, stateless, handles concurrency fine
    with ThreadPoolExecutor(max_workers=20) as executor:
        future_to_row = {executor.submit(_resolve_one, row): row for row in mfapi_needed}
        for future in as_completed(future_to_row):
            row = future_to_row[future]
            fund_name = (row.get("Fund") or "").strip()
            try:
                _, code, matched_name = future.result()
            except Exception:
                code = matched_name = None

            with lock:
                completed[0] += 1
                n = completed[0]
                total_mfapi = len(mfapi_needed)
                if code:
                    row["Scheme Code"] = code
                    resolved[fund_name] = code
                    if verbose:
                        print(f"  [{n}/{total_mfapi}] OK  {fund_name[:55]}")
                        print(f"       -> [{code}] {(matched_name or '')[:55]}")
                else:
                    if verbose:
                        print(f"  [{n}/{total_mfapi}] NO  {fund_name[:55]} -- no match")

    if verbose:
        print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved "
              f"({override_count} overrides + {len(resolved)-override_count-corrected_existing} mfapi"
              f"{f', {corrected_existing} corrected existing codes' if corrected_existing else ''}).")
    return rows, resolved