Spaces:
Running
Running
| """Scheme Code Resolver | |
| ====================== | |
| Resolves missing AMFI scheme codes by fuzzy-matching the fund name from the | |
| CSV against mfapi.in's /mf/search endpoint. | |
| This runs as a PRE-TRIAGE step so that the NAV engine can fire for funds whose | |
| scheme code was absent from the CSV. | |
| """ | |
| from __future__ import annotations | |
| import difflib | |
| import re | |
| import time | |
| import requests | |
| MFAPI_SEARCH = "https://api.mfapi.in/mf/search" | |
| MATCH_CUTOFF = 0.52 # minimum SequenceMatcher ratio to accept | |
| SLEEP_BETWEEN = 0.25 # seconds between API calls (polite rate limit) | |
| # Manual overrides for schemes that mfapi's search endpoint does not | |
| # currently return, but whose AMFI codes are known and stable. Keys are | |
| # normalized fund names (see _normalize). | |
| SCHEME_OVERRIDES: dict[str, str] = { | |
| # ββ Pre-verified from AMFI NAV master (portal.amfiindia.com) ββββββββββββββ | |
| # These funds have empty scheme codes in source CSV and cannot be reliably | |
| # resolved via mfapi fuzzy search. Codes are Regular Plan - Growth only. | |
| # Existing override | |
| "kotak tax saver scheme growth": "109234", | |
| # ββ Debt: Banking and PSU βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "hdfc banking and psu debt fund growth option": "128628", | |
| "icici prudential banking and psu debt fund growth": "112342", | |
| "kotak banking and psu debt growth": "123690", | |
| "invesco india banking and psu fund growth option": "118232", | |
| "sundaram banking psu fund formerly known as sundaram banking and psu debt fund regular plan growth": "100784", | |
| "hsbc banking and psu debt fund regular growth": "151104", | |
| "iti banking psu debt fund regular plan growth option": "148535", | |
| # ββ Debt: Liquid ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "dsp liquidity fund regular plan growth": "119120", | |
| "invesco india liquid fund growth": "104488", | |
| "invesco india liquid fund regular growth": "118769", | |
| "union liquid fund growth option": "115398", | |
| "parag parikh liquid fund regular plan growth": "149038", | |
| "motilal oswal liquid fund regular growth": "147622", | |
| "iti liquid fund regular plan growth option": "147153", | |
| "quantum liquid fund regular plan growth option": "103504", | |
| "lic mf liquid fund regular plan growth": "120716", | |
| "icici prudential liquid fund growth": "120593", | |
| "aditya birla sun life liquid fund retail growth": "100042", | |
| "aditya birla sun life liquid fund growth": "100047", | |
| "edelweiss liquid fund regular plan growth option": "140182", | |
| "edelweiss liquid fund retail plan growth option": "119114", | |
| "axis liquid fund retail plan growth option": "112090", | |
| "sbi liquid fund regular plan growth": "119822", | |
| "nippon india liquid fund retail option growth plan": "100837", | |
| # ββ Debt: Overnight βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "uti overnight fund regular plan growth option": "100814", | |
| "canara robeco overnight fund regular plan growth option": "147534", | |
| "dsp overnight fund regular plan growth": "146061", | |
| "franklin india overnight fund growth": "146210", | |
| "bandhan overnight fund regular plan growth": "146187", | |
| "iti overnight fund regular plan growth option": "148529", | |
| "union overnight fund regular plan growth option": "146997", | |
| "icici prudential overnight fund growth": "145811", | |
| "edelweiss overnight fund regular plan growth": "147569", | |
| "lic mf overnight fund regular plan growth": "146065", | |
| "hdfc overnight fund growth option": "145822", | |
| # ββ Debt: Ultra Short Duration ββββββββββββββββββββββββββββββββββββββββββββ | |
| "icici prudential ultra short term fund growth": "120505", | |
| "invesco india ultra short duration fund growth": "117825", | |
| "uti ultra short duration fund regular plan growth option": "102532", | |
| "aditya birla sun life savings fund growth regular plan": "119293", | |
| "aditya birla sun life savings fund retail growth": "119293", | |
| "hdfc ultra short term fund growth option": "145539", | |
| "aditya birla sun life savings fund discipline advantage plan": "112016", | |
| "pgim india ultra short duration fund growth": "100474", | |
| "iti ultra short duration fund regular plan growth option": "148533", | |
| "motilal oswal ultra short term fund mofustf regular plan growth": "124233", | |
| "tata ultra short term fund regular plan growth": "146070", | |
| "kotak savings fund growth": "119270", | |
| "lic mf ultra short duration fund regular plan growth": "147770", | |
| "canara robeco ultra short term fund regular plan growth option": "119671", | |
| "sundaram ultra short duration fund formerly known as principal ultra short term fund growth option": "120826", | |
| "bank of india ultra short duration fund regular plan growth": "109269", | |
| # ββ Debt: Short Duration ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "hdfc short term debt fund growth option": "119247", | |
| "icici prudential short term fund growth option": "101758", | |
| "sbi short horizon debt fund short term fund retail growth": "106227", | |
| "sbi short term debt fund regular plan growth": "119831", | |
| "kotak bond short term plan growth": "101373", | |
| "dsp short term fund regular plan growth": "119598", | |
| "lic mf short duration fund regular plan growth": "145952", | |
| "mirae asset short duration fund regular plan growth": "148416", | |
| "invesco india short duration fund growth": "105185", | |
| "canara robeco short duration fund regular plan growth option": "119675", | |
| "groww short duration fund formerly known as indiabulls short term fund regular plan growth option": "123708", | |
| "tata short term bond fund regular plan growth option": "119802", | |
| # ββ Debt: Medium Duration βββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "aditya birla sun life medium term plan growth regular plan": "111803", | |
| "axis strategic bond fund regular plan growth option": "116894", | |
| "icici prudential medium term bond fund growth": "120841", | |
| "hdfc medium term debt fund growth option": "119238", | |
| "kotak medium term fund growth": "119281", | |
| "dsp bond fund growth": "100078", | |
| "sundaram medium duration fund formerly known as sundaram medium term bond fund regular plan growth": "100603", | |
| # ββ ETFs ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "hdfc nifty100 low volatility 30 etf growth option": "145748", | |
| "hdfc nifty200 momentum 30 etf growth option": "146058", | |
| "hdfc nifty it etf growth option": "120493", | |
| "hdfc nifty private bank etf growth option": "145696", | |
| # ββ Index Funds βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| "dsp nifty next 50 index fund regular plan growth": "143669", | |
| "uti nifty next 50 index fund regular plan growth option": "120713", | |
| "motilal oswal nifty smallcap 250 index regular plan": "147960", | |
| "icici prudential nifty pharma index fund growth": "143874", | |
| "dsp nifty 50 index fund regular plan growth": "143537", | |
| "motilal oswal nifty midcap 150 index fund regular plan": "147068", | |
| "sbi nifty index fund regular plan growth": "135818", | |
| "motilal oswal nifty bank index regular plan": "145552", | |
| } | |
| def _normalize(name: str) -> str: | |
| """Convert hyphenated CSV name to a clean lowercase string.""" | |
| return re.sub(r"[-_]+", " ", name).strip().lower() | |
| def _search_query(name: str) -> str: | |
| """Take first 6 tokens for a focused search query.""" | |
| return " ".join(_normalize(name).split()[:6]) | |
| def _search_mfapi(query: str) -> list[dict]: | |
| try: | |
| resp = requests.get(MFAPI_SEARCH, params={"q": query}, timeout=15) | |
| resp.raise_for_status() | |
| return resp.json() | |
| except Exception as exc: | |
| print(f" [resolver] search error for '{query}': {exc}") | |
| return [] | |
| def _best_match(candidates: list[dict], target_name: str) -> dict | None: | |
| if not candidates: | |
| return None | |
| target = _normalize(target_name) | |
| best_score = 0.0 | |
| best_item = None | |
| for item in candidates: | |
| candidate = _normalize(item.get("schemeName", "")) | |
| score = difflib.SequenceMatcher(None, target, candidate).ratio() | |
| if score > best_score: | |
| best_score = score | |
| best_item = item | |
| if best_score >= MATCH_CUTOFF: | |
| return best_item | |
| return None | |
| def _is_valid_scheme_code(code: str) -> bool: | |
| """AMFI scheme codes are purely numeric (e.g. 120586). Platform codes like GROWWEH are invalid.""" | |
| return bool(code and code.isdigit()) | |
| def resolve_scheme_code_for_fund_name( | |
| fund_name: str, | |
| ) -> tuple[str | None, str | None]: | |
| """ | |
| Resolve a scheme code for one fund name. | |
| Resolution order: | |
| 1. Exact normalized-name override from SCHEME_OVERRIDES | |
| 2. mfapi search + fuzzy best-match | |
| """ | |
| norm = _normalize(fund_name) | |
| override_code = SCHEME_OVERRIDES.get(norm) | |
| if override_code: | |
| return override_code, "override" | |
| query = _search_query(fund_name) | |
| candidates = _search_mfapi(query) | |
| match = _best_match(candidates, fund_name) | |
| if match: | |
| return str(match["schemeCode"]), match.get("schemeName", "") | |
| return None, None | |
| def resolve_missing_scheme_codes( | |
| rows: list[dict[str, str]], | |
| *, | |
| verbose: bool = True, | |
| ) -> tuple[list[dict[str, str]], dict[str, str]]: | |
| """ | |
| Resolve blank scheme codes and also correct any exact-name rows whose | |
| current numeric code disagrees with SCHEME_OVERRIDES. | |
| Blank/invalid codes are resolved via SCHEME_OVERRIDES (O(1) dict lookup) | |
| first, then mfapi search in parallel. | |
| Complexity: O(N) time, O(N) space where N = funds with missing codes. | |
| Network I/O parallelised with ThreadPoolExecutor(20) β pure I/O bound. | |
| """ | |
| from concurrent.futures import ThreadPoolExecutor, as_completed | |
| resolved: dict[str, str] = {} | |
| corrected_existing = 0 | |
| # ββ Collect rows that need resolution βββββββββββββββββββββββββββββββββββββ | |
| target_rows: list[dict[str, str]] = [] | |
| for row in rows: | |
| fund_name = (row.get("Fund") or "").strip() | |
| if not fund_name or fund_name.count("-") < 2 or ":" in fund_name: | |
| continue | |
| norm = _normalize(fund_name) | |
| raw_code = (row.get("Scheme Code") or "").strip() | |
| override_code = SCHEME_OVERRIDES.get(norm) | |
| # Future-proofing: if we know the canonical code for this exact fund name, | |
| # correct it even when the CSV already contains a numeric but stale code. | |
| if override_code and raw_code != override_code: | |
| row["Scheme Code"] = override_code | |
| resolved[fund_name] = override_code | |
| corrected_existing += 1 | |
| continue | |
| if _is_valid_scheme_code(raw_code): | |
| continue | |
| if raw_code and not _is_valid_scheme_code(raw_code): | |
| row["Scheme Code"] = "" # clear invalid platform codes e.g. GROWWEH | |
| target_rows.append(row) | |
| total_missing = len(target_rows) | |
| if total_missing == 0: | |
| if verbose: | |
| if corrected_existing: | |
| print(f"[resolver] Corrected {corrected_existing} existing scheme codes via override table.") | |
| else: | |
| print("[resolver] No missing scheme codes found.") | |
| return rows, resolved | |
| if verbose: | |
| print(f"[resolver] Resolving {total_missing} missing scheme codes (parallel)β¦") | |
| # ββ Phase A: Override table β O(1) per fund, no network βββββββββββββββββββ | |
| mfapi_needed: list[dict[str, str]] = [] | |
| override_count = 0 | |
| for row in target_rows: | |
| fund_name = (row.get("Fund") or "").strip() | |
| norm = _normalize(fund_name) | |
| code = SCHEME_OVERRIDES.get(norm) | |
| if code: | |
| row["Scheme Code"] = code | |
| resolved[fund_name] = code | |
| override_count += 1 | |
| else: | |
| mfapi_needed.append(row) | |
| if verbose and override_count: | |
| print(f" [resolver] {override_count} resolved via override table (instant)") | |
| if verbose and corrected_existing: | |
| print(f" [resolver] {corrected_existing} existing codes corrected via override table") | |
| # ββ Phase B: mfapi search β parallel ThreadPoolExecutor βββββββββββββββββββ | |
| if not mfapi_needed: | |
| if verbose: | |
| print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved.") | |
| return rows, resolved | |
| lock = __import__("threading").Lock() | |
| completed = [0] | |
| def _resolve_one(row: dict[str, str]) -> tuple[str, str | None, str | None]: | |
| """Returns (fund_name, scheme_code_or_None, matched_name_or_None).""" | |
| fund_name = (row.get("Fund") or "").strip() | |
| query = _search_query(fund_name) | |
| candidates = _search_mfapi(query) | |
| match = _best_match(candidates, fund_name) | |
| if match: | |
| return fund_name, str(match["schemeCode"]), match.get("schemeName", "") | |
| return fund_name, None, None | |
| # 20 workers: mfapi is pure REST, stateless, handles concurrency fine | |
| with ThreadPoolExecutor(max_workers=20) as executor: | |
| future_to_row = {executor.submit(_resolve_one, row): row for row in mfapi_needed} | |
| for future in as_completed(future_to_row): | |
| row = future_to_row[future] | |
| fund_name = (row.get("Fund") or "").strip() | |
| try: | |
| _, code, matched_name = future.result() | |
| except Exception: | |
| code = matched_name = None | |
| with lock: | |
| completed[0] += 1 | |
| n = completed[0] | |
| total_mfapi = len(mfapi_needed) | |
| if code: | |
| row["Scheme Code"] = code | |
| resolved[fund_name] = code | |
| if verbose: | |
| print(f" [{n}/{total_mfapi}] OK {fund_name[:55]}") | |
| print(f" -> [{code}] {(matched_name or '')[:55]}") | |
| else: | |
| if verbose: | |
| print(f" [{n}/{total_mfapi}] NO {fund_name[:55]} -- no match") | |
| if verbose: | |
| print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved " | |
| f"({override_count} overrides + {len(resolved)-override_count-corrected_existing} mfapi" | |
| f"{f', {corrected_existing} corrected existing codes' if corrected_existing else ''}).") | |
| return rows, resolved |