"""Scheme Code Resolver ====================== Resolves missing AMFI scheme codes by fuzzy-matching the fund name from the CSV against mfapi.in's /mf/search endpoint. This runs as a PRE-TRIAGE step so that the NAV engine can fire for funds whose scheme code was absent from the CSV. """ from __future__ import annotations import difflib import re import time import requests MFAPI_SEARCH = "https://api.mfapi.in/mf/search" MATCH_CUTOFF = 0.52 # minimum SequenceMatcher ratio to accept SLEEP_BETWEEN = 0.25 # seconds between API calls (polite rate limit) # Manual overrides for schemes that mfapi's search endpoint does not # currently return, but whose AMFI codes are known and stable. Keys are # normalized fund names (see _normalize). SCHEME_OVERRIDES: dict[str, str] = { # ── Pre-verified from AMFI NAV master (portal.amfiindia.com) ────────────── # These funds have empty scheme codes in source CSV and cannot be reliably # resolved via mfapi fuzzy search. Codes are Regular Plan - Growth only. # Existing override "kotak tax saver scheme growth": "109234", # ── Debt: Banking and PSU ───────────────────────────────────────────────── "hdfc banking and psu debt fund growth option": "128628", "icici prudential banking and psu debt fund growth": "112342", "kotak banking and psu debt growth": "123690", "invesco india banking and psu fund growth option": "118232", "sundaram banking psu fund formerly known as sundaram banking and psu debt fund regular plan growth": "100784", "hsbc banking and psu debt fund regular growth": "151104", "iti banking psu debt fund regular plan growth option": "148535", # ── Debt: Liquid ────────────────────────────────────────────────────────── "dsp liquidity fund regular plan growth": "119120", "invesco india liquid fund growth": "104488", "invesco india liquid fund regular growth": "118769", "union liquid fund growth option": "115398", "parag parikh liquid fund regular plan growth": "149038", "motilal oswal liquid fund regular growth": "147622", "iti liquid fund regular plan growth option": "147153", "quantum liquid fund regular plan growth option": "103504", "lic mf liquid fund regular plan growth": "120716", "icici prudential liquid fund growth": "120593", "aditya birla sun life liquid fund retail growth": "100042", "aditya birla sun life liquid fund growth": "100047", "edelweiss liquid fund regular plan growth option": "140182", "edelweiss liquid fund retail plan growth option": "119114", "axis liquid fund retail plan growth option": "112090", "sbi liquid fund regular plan growth": "119822", "nippon india liquid fund retail option growth plan": "100837", # ── Debt: Overnight ─────────────────────────────────────────────────────── "uti overnight fund regular plan growth option": "100814", "canara robeco overnight fund regular plan growth option": "147534", "dsp overnight fund regular plan growth": "146061", "franklin india overnight fund growth": "146210", "bandhan overnight fund regular plan growth": "146187", "iti overnight fund regular plan growth option": "148529", "union overnight fund regular plan growth option": "146997", "icici prudential overnight fund growth": "145811", "edelweiss overnight fund regular plan growth": "147569", "lic mf overnight fund regular plan growth": "146065", "hdfc overnight fund growth option": "145822", # ── Debt: Ultra Short Duration ──────────────────────────────────────────── "icici prudential ultra short term fund growth": "120505", "invesco india ultra short duration fund growth": "117825", "uti ultra short duration fund regular plan growth option": "102532", "aditya birla sun life savings fund growth regular plan": "119293", "aditya birla sun life savings fund retail growth": "119293", "hdfc ultra short term fund growth option": "145539", "aditya birla sun life savings fund discipline advantage plan": "112016", "pgim india ultra short duration fund growth": "100474", "iti ultra short duration fund regular plan growth option": "148533", "motilal oswal ultra short term fund mofustf regular plan growth": "124233", "tata ultra short term fund regular plan growth": "146070", "kotak savings fund growth": "119270", "lic mf ultra short duration fund regular plan growth": "147770", "canara robeco ultra short term fund regular plan growth option": "119671", "sundaram ultra short duration fund formerly known as principal ultra short term fund growth option": "120826", "bank of india ultra short duration fund regular plan growth": "109269", # ── Debt: Short Duration ────────────────────────────────────────────────── "hdfc short term debt fund growth option": "119247", "icici prudential short term fund growth option": "101758", "sbi short horizon debt fund short term fund retail growth": "106227", "sbi short term debt fund regular plan growth": "119831", "kotak bond short term plan growth": "101373", "dsp short term fund regular plan growth": "119598", "lic mf short duration fund regular plan growth": "145952", "mirae asset short duration fund regular plan growth": "148416", "invesco india short duration fund growth": "105185", "canara robeco short duration fund regular plan growth option": "119675", "groww short duration fund formerly known as indiabulls short term fund regular plan growth option": "123708", "tata short term bond fund regular plan growth option": "119802", # ── Debt: Medium Duration ───────────────────────────────────────────────── "aditya birla sun life medium term plan growth regular plan": "111803", "axis strategic bond fund regular plan growth option": "116894", "icici prudential medium term bond fund growth": "120841", "hdfc medium term debt fund growth option": "119238", "kotak medium term fund growth": "119281", "dsp bond fund growth": "100078", "sundaram medium duration fund formerly known as sundaram medium term bond fund regular plan growth": "100603", # ── ETFs ────────────────────────────────────────────────────────────────── "hdfc nifty100 low volatility 30 etf growth option": "145748", "hdfc nifty200 momentum 30 etf growth option": "146058", "hdfc nifty it etf growth option": "120493", "hdfc nifty private bank etf growth option": "145696", # ── Index Funds ─────────────────────────────────────────────────────────── "dsp nifty next 50 index fund regular plan growth": "143669", "uti nifty next 50 index fund regular plan growth option": "120713", "motilal oswal nifty smallcap 250 index regular plan": "147960", "icici prudential nifty pharma index fund growth": "143874", "dsp nifty 50 index fund regular plan growth": "143537", "motilal oswal nifty midcap 150 index fund regular plan": "147068", "sbi nifty index fund regular plan growth": "135818", "motilal oswal nifty bank index regular plan": "145552", } def _normalize(name: str) -> str: """Convert hyphenated CSV name to a clean lowercase string.""" return re.sub(r"[-_]+", " ", name).strip().lower() def _search_query(name: str) -> str: """Take first 6 tokens for a focused search query.""" return " ".join(_normalize(name).split()[:6]) def _search_mfapi(query: str) -> list[dict]: try: resp = requests.get(MFAPI_SEARCH, params={"q": query}, timeout=15) resp.raise_for_status() return resp.json() except Exception as exc: print(f" [resolver] search error for '{query}': {exc}") return [] def _best_match(candidates: list[dict], target_name: str) -> dict | None: if not candidates: return None target = _normalize(target_name) best_score = 0.0 best_item = None for item in candidates: candidate = _normalize(item.get("schemeName", "")) score = difflib.SequenceMatcher(None, target, candidate).ratio() if score > best_score: best_score = score best_item = item if best_score >= MATCH_CUTOFF: return best_item return None def _is_valid_scheme_code(code: str) -> bool: """AMFI scheme codes are purely numeric (e.g. 120586). Platform codes like GROWWEH are invalid.""" return bool(code and code.isdigit()) def resolve_scheme_code_for_fund_name( fund_name: str, ) -> tuple[str | None, str | None]: """ Resolve a scheme code for one fund name. Resolution order: 1. Exact normalized-name override from SCHEME_OVERRIDES 2. mfapi search + fuzzy best-match """ norm = _normalize(fund_name) override_code = SCHEME_OVERRIDES.get(norm) if override_code: return override_code, "override" query = _search_query(fund_name) candidates = _search_mfapi(query) match = _best_match(candidates, fund_name) if match: return str(match["schemeCode"]), match.get("schemeName", "") return None, None def resolve_missing_scheme_codes( rows: list[dict[str, str]], *, verbose: bool = True, ) -> tuple[list[dict[str, str]], dict[str, str]]: """ Resolve blank scheme codes and also correct any exact-name rows whose current numeric code disagrees with SCHEME_OVERRIDES. Blank/invalid codes are resolved via SCHEME_OVERRIDES (O(1) dict lookup) first, then mfapi search in parallel. Complexity: O(N) time, O(N) space where N = funds with missing codes. Network I/O parallelised with ThreadPoolExecutor(20) — pure I/O bound. """ from concurrent.futures import ThreadPoolExecutor, as_completed resolved: dict[str, str] = {} corrected_existing = 0 # ── Collect rows that need resolution ───────────────────────────────────── target_rows: list[dict[str, str]] = [] for row in rows: fund_name = (row.get("Fund") or "").strip() if not fund_name or fund_name.count("-") < 2 or ":" in fund_name: continue norm = _normalize(fund_name) raw_code = (row.get("Scheme Code") or "").strip() override_code = SCHEME_OVERRIDES.get(norm) # Future-proofing: if we know the canonical code for this exact fund name, # correct it even when the CSV already contains a numeric but stale code. if override_code and raw_code != override_code: row["Scheme Code"] = override_code resolved[fund_name] = override_code corrected_existing += 1 continue if _is_valid_scheme_code(raw_code): continue if raw_code and not _is_valid_scheme_code(raw_code): row["Scheme Code"] = "" # clear invalid platform codes e.g. GROWWEH target_rows.append(row) total_missing = len(target_rows) if total_missing == 0: if verbose: if corrected_existing: print(f"[resolver] Corrected {corrected_existing} existing scheme codes via override table.") else: print("[resolver] No missing scheme codes found.") return rows, resolved if verbose: print(f"[resolver] Resolving {total_missing} missing scheme codes (parallel)…") # ── Phase A: Override table — O(1) per fund, no network ─────────────────── mfapi_needed: list[dict[str, str]] = [] override_count = 0 for row in target_rows: fund_name = (row.get("Fund") or "").strip() norm = _normalize(fund_name) code = SCHEME_OVERRIDES.get(norm) if code: row["Scheme Code"] = code resolved[fund_name] = code override_count += 1 else: mfapi_needed.append(row) if verbose and override_count: print(f" [resolver] {override_count} resolved via override table (instant)") if verbose and corrected_existing: print(f" [resolver] {corrected_existing} existing codes corrected via override table") # ── Phase B: mfapi search — parallel ThreadPoolExecutor ─────────────────── if not mfapi_needed: if verbose: print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved.") return rows, resolved lock = __import__("threading").Lock() completed = [0] def _resolve_one(row: dict[str, str]) -> tuple[str, str | None, str | None]: """Returns (fund_name, scheme_code_or_None, matched_name_or_None).""" fund_name = (row.get("Fund") or "").strip() query = _search_query(fund_name) candidates = _search_mfapi(query) match = _best_match(candidates, fund_name) if match: return fund_name, str(match["schemeCode"]), match.get("schemeName", "") return fund_name, None, None # 20 workers: mfapi is pure REST, stateless, handles concurrency fine with ThreadPoolExecutor(max_workers=20) as executor: future_to_row = {executor.submit(_resolve_one, row): row for row in mfapi_needed} for future in as_completed(future_to_row): row = future_to_row[future] fund_name = (row.get("Fund") or "").strip() try: _, code, matched_name = future.result() except Exception: code = matched_name = None with lock: completed[0] += 1 n = completed[0] total_mfapi = len(mfapi_needed) if code: row["Scheme Code"] = code resolved[fund_name] = code if verbose: print(f" [{n}/{total_mfapi}] OK {fund_name[:55]}") print(f" -> [{code}] {(matched_name or '')[:55]}") else: if verbose: print(f" [{n}/{total_mfapi}] NO {fund_name[:55]} -- no match") if verbose: print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved " f"({override_count} overrides + {len(resolved)-override_count-corrected_existing} mfapi" f"{f', {corrected_existing} corrected existing codes' if corrected_existing else ''}).") return rows, resolved