File size: 16,595 Bytes
b0e15c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
"""Scheme Code Resolver

======================

Resolves missing AMFI scheme codes by fuzzy-matching the fund name from the

CSV against mfapi.in's /mf/search endpoint.



This runs as a PRE-TRIAGE step so that the NAV engine can fire for funds whose

scheme code was absent from the CSV.

"""

from __future__ import annotations

import difflib
import re
import time

import requests


MFAPI_SEARCH = "https://api.mfapi.in/mf/search"
MATCH_CUTOFF  = 0.52    # minimum SequenceMatcher ratio to accept
SLEEP_BETWEEN = 0.25    # seconds between API calls (polite rate limit)

# Manual overrides for schemes that mfapi's search endpoint does not
# currently return, but whose AMFI codes are known and stable. Keys are
# normalized fund names (see _normalize).
SCHEME_OVERRIDES: dict[str, str] = {
    # ── Pre-verified from AMFI NAV master (portal.amfiindia.com) ──────────────
    # These funds have empty scheme codes in source CSV and cannot be reliably
    # resolved via mfapi fuzzy search. Codes are Regular Plan - Growth only.

    # Existing override
    "kotak tax saver scheme growth": "109234",

    # ── Debt: Banking and PSU ─────────────────────────────────────────────────
    "hdfc banking and psu debt fund growth option":         "128628",
    "icici prudential banking and psu debt fund growth":    "112342",
    "kotak banking and psu debt growth":                    "123690",
    "invesco india banking and psu fund growth option":     "118232",
    "sundaram banking psu fund formerly known as sundaram banking and psu debt fund regular plan growth": "100784",
    "hsbc banking and psu debt fund regular growth":        "151104",
    "iti banking psu debt fund regular plan growth option": "148535",

    # ── Debt: Liquid ──────────────────────────────────────────────────────────
    "dsp liquidity fund regular plan growth":               "119120",
    "invesco india liquid fund growth":                     "104488",
    "invesco india liquid fund regular growth":             "118769",
    "union liquid fund growth option":                      "115398",
    "parag parikh liquid fund regular plan growth":         "149038",
    "motilal oswal liquid fund regular growth":             "147622",
    "iti liquid fund regular plan growth option":           "147153",
    "quantum liquid fund regular plan growth option":       "103504",
    "lic mf liquid fund regular plan growth":               "120716",
    "icici prudential liquid fund growth":                  "120593",
    "aditya birla sun life liquid fund retail growth":      "100042",
    "aditya birla sun life liquid fund growth":             "100047",
    "edelweiss liquid fund regular plan growth option":     "140182",
    "edelweiss liquid fund retail plan growth option":      "119114",
    "axis liquid fund retail plan growth option":           "112090",
    "sbi liquid fund regular plan growth":                  "119822",
    "nippon india liquid fund retail option growth plan":   "100837",

    # ── Debt: Overnight ───────────────────────────────────────────────────────
    "uti overnight fund regular plan growth option":            "100814",
    "canara robeco overnight fund regular plan growth option":  "147534",
    "dsp overnight fund regular plan growth":                   "146061",
    "franklin india overnight fund growth":                     "146210",
    "bandhan overnight fund regular plan growth":               "146187",
    "iti overnight fund regular plan growth option":            "148529",
    "union overnight fund regular plan growth option":          "146997",
    "icici prudential overnight fund growth":                   "145811",
    "edelweiss overnight fund regular plan growth":             "147569",
    "lic mf overnight fund regular plan growth":                "146065",
    "hdfc overnight fund growth option":                        "145822",

    # ── Debt: Ultra Short Duration ────────────────────────────────────────────
    "icici prudential ultra short term fund growth":                    "120505",
    "invesco india ultra short duration fund growth":                   "117825",
    "uti ultra short duration fund regular plan growth option":         "102532",
    "aditya birla sun life savings fund growth regular plan":           "119293",
    "aditya birla sun life savings fund retail growth":                 "119293",
    "hdfc ultra short term fund growth option":                         "145539",
    "aditya birla sun life savings fund discipline advantage plan":     "112016",
    "pgim india ultra short duration fund growth":                      "100474",
    "iti ultra short duration fund regular plan growth option":         "148533",
    "motilal oswal ultra short term fund mofustf regular plan growth":  "124233",
    "tata ultra short term fund regular plan growth":                   "146070",
    "kotak savings fund growth":                                        "119270",
    "lic mf ultra short duration fund regular plan growth":             "147770",
    "canara robeco ultra short term fund regular plan growth option":   "119671",
    "sundaram ultra short duration fund formerly known as principal ultra short term fund growth option": "120826",
    "bank of india ultra short duration fund regular plan growth":      "109269",

    # ── Debt: Short Duration ──────────────────────────────────────────────────
    "hdfc short term debt fund growth option":                    "119247",
    "icici prudential short term fund growth option":             "101758",
    "sbi short horizon debt fund short term fund retail growth":  "106227",
    "sbi short term debt fund regular plan growth":               "119831",
    "kotak bond short term plan growth":                          "101373",
    "dsp short term fund regular plan growth":                    "119598",
    "lic mf short duration fund regular plan growth":             "145952",
    "mirae asset short duration fund regular plan growth":        "148416",
    "invesco india short duration fund growth":                   "105185",
    "canara robeco short duration fund regular plan growth option": "119675",
    "groww short duration fund formerly known as indiabulls short term fund regular plan growth option": "123708",
    "tata short term bond fund regular plan growth option":       "119802",

    # ── Debt: Medium Duration ─────────────────────────────────────────────────
    "aditya birla sun life medium term plan growth regular plan": "111803",
    "axis strategic bond fund regular plan growth option":        "116894",
    "icici prudential medium term bond fund growth":              "120841",
    "hdfc medium term debt fund growth option":                   "119238",
    "kotak medium term fund growth":                              "119281",
    "dsp bond fund growth":                                       "100078",
    "sundaram medium duration fund formerly known as sundaram medium term bond fund regular plan growth": "100603",

    # ── ETFs ──────────────────────────────────────────────────────────────────
    "hdfc nifty100 low volatility 30 etf growth option":  "145748",
    "hdfc nifty200 momentum 30 etf growth option":        "146058",
    "hdfc nifty it etf growth option":                    "120493",
    "hdfc nifty private bank etf growth option":          "145696",

    # ── Index Funds ───────────────────────────────────────────────────────────
    "dsp nifty next 50 index fund regular plan growth":         "143669",
    "uti nifty next 50 index fund regular plan growth option":  "120713",
    "motilal oswal nifty smallcap 250 index regular plan":      "147960",
    "icici prudential nifty pharma index fund growth":          "143874",
    "dsp nifty 50 index fund regular plan growth":              "143537",
    "motilal oswal nifty midcap 150 index fund regular plan":   "147068",
    "sbi nifty index fund regular plan growth":                 "135818",
    "motilal oswal nifty bank index regular plan":              "145552",
}


def _normalize(name: str) -> str:
    """Convert hyphenated CSV name to a clean lowercase string."""
    return re.sub(r"[-_]+", " ", name).strip().lower()


def _search_query(name: str) -> str:
    """Take first 6 tokens for a focused search query."""
    return " ".join(_normalize(name).split()[:6])


def _search_mfapi(query: str) -> list[dict]:
    try:
        resp = requests.get(MFAPI_SEARCH, params={"q": query}, timeout=15)
        resp.raise_for_status()
        return resp.json()
    except Exception as exc:
        print(f"  [resolver] search error for '{query}': {exc}")
        return []


def _best_match(candidates: list[dict], target_name: str) -> dict | None:
    if not candidates:
        return None
    target = _normalize(target_name)
    best_score = 0.0
    best_item  = None
    for item in candidates:
        candidate = _normalize(item.get("schemeName", ""))
        score = difflib.SequenceMatcher(None, target, candidate).ratio()
        if score > best_score:
            best_score = score
            best_item  = item
    if best_score >= MATCH_CUTOFF:
        return best_item
    return None


def _is_valid_scheme_code(code: str) -> bool:
    """AMFI scheme codes are purely numeric (e.g. 120586). Platform codes like GROWWEH are invalid."""
    return bool(code and code.isdigit())


def resolve_scheme_code_for_fund_name(

    fund_name: str,

) -> tuple[str | None, str | None]:
    """

    Resolve a scheme code for one fund name.



    Resolution order:

    1. Exact normalized-name override from SCHEME_OVERRIDES

    2. mfapi search + fuzzy best-match

    """
    norm = _normalize(fund_name)
    override_code = SCHEME_OVERRIDES.get(norm)
    if override_code:
        return override_code, "override"

    query = _search_query(fund_name)
    candidates = _search_mfapi(query)
    match = _best_match(candidates, fund_name)
    if match:
        return str(match["schemeCode"]), match.get("schemeName", "")
    return None, None


def resolve_missing_scheme_codes(

    rows: list[dict[str, str]],

    *,

    verbose: bool = True,

) -> tuple[list[dict[str, str]], dict[str, str]]:
    """

    Resolve blank scheme codes and also correct any exact-name rows whose

    current numeric code disagrees with SCHEME_OVERRIDES.



    Blank/invalid codes are resolved via SCHEME_OVERRIDES (O(1) dict lookup)

    first, then mfapi search in parallel.



    Complexity: O(N) time, O(N) space where N = funds with missing codes.

    Network I/O parallelised with ThreadPoolExecutor(20) β€” pure I/O bound.

    """
    from concurrent.futures import ThreadPoolExecutor, as_completed

    resolved: dict[str, str] = {}
    corrected_existing = 0

    # ── Collect rows that need resolution ─────────────────────────────────────
    target_rows: list[dict[str, str]] = []
    for row in rows:
        fund_name = (row.get("Fund") or "").strip()
        if not fund_name or fund_name.count("-") < 2 or ":" in fund_name:
            continue
        norm = _normalize(fund_name)
        raw_code = (row.get("Scheme Code") or "").strip()
        override_code = SCHEME_OVERRIDES.get(norm)

        # Future-proofing: if we know the canonical code for this exact fund name,
        # correct it even when the CSV already contains a numeric but stale code.
        if override_code and raw_code != override_code:
            row["Scheme Code"] = override_code
            resolved[fund_name] = override_code
            corrected_existing += 1
            continue

        if _is_valid_scheme_code(raw_code):
            continue
        if raw_code and not _is_valid_scheme_code(raw_code):
            row["Scheme Code"] = ""   # clear invalid platform codes e.g. GROWWEH
        target_rows.append(row)

    total_missing = len(target_rows)
    if total_missing == 0:
        if verbose:
            if corrected_existing:
                print(f"[resolver] Corrected {corrected_existing} existing scheme codes via override table.")
            else:
                print("[resolver] No missing scheme codes found.")
        return rows, resolved

    if verbose:
        print(f"[resolver] Resolving {total_missing} missing scheme codes (parallel)…")

    # ── Phase A: Override table β€” O(1) per fund, no network ───────────────────
    mfapi_needed: list[dict[str, str]] = []
    override_count = 0

    for row in target_rows:
        fund_name = (row.get("Fund") or "").strip()
        norm = _normalize(fund_name)
        code = SCHEME_OVERRIDES.get(norm)
        if code:
            row["Scheme Code"] = code
            resolved[fund_name] = code
            override_count += 1
        else:
            mfapi_needed.append(row)

    if verbose and override_count:
        print(f"  [resolver] {override_count} resolved via override table (instant)")
    if verbose and corrected_existing:
        print(f"  [resolver] {corrected_existing} existing codes corrected via override table")

    # ── Phase B: mfapi search β€” parallel ThreadPoolExecutor ───────────────────
    if not mfapi_needed:
        if verbose:
            print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved.")
        return rows, resolved

    lock = __import__("threading").Lock()
    completed = [0]

    def _resolve_one(row: dict[str, str]) -> tuple[str, str | None, str | None]:
        """Returns (fund_name, scheme_code_or_None, matched_name_or_None)."""
        fund_name  = (row.get("Fund") or "").strip()
        query      = _search_query(fund_name)
        candidates = _search_mfapi(query)
        match      = _best_match(candidates, fund_name)
        if match:
            return fund_name, str(match["schemeCode"]), match.get("schemeName", "")
        return fund_name, None, None

    # 20 workers: mfapi is pure REST, stateless, handles concurrency fine
    with ThreadPoolExecutor(max_workers=20) as executor:
        future_to_row = {executor.submit(_resolve_one, row): row for row in mfapi_needed}
        for future in as_completed(future_to_row):
            row = future_to_row[future]
            fund_name = (row.get("Fund") or "").strip()
            try:
                _, code, matched_name = future.result()
            except Exception:
                code = matched_name = None

            with lock:
                completed[0] += 1
                n = completed[0]
                total_mfapi = len(mfapi_needed)
                if code:
                    row["Scheme Code"] = code
                    resolved[fund_name] = code
                    if verbose:
                        print(f"  [{n}/{total_mfapi}] OK  {fund_name[:55]}")
                        print(f"       -> [{code}] {(matched_name or '')[:55]}")
                else:
                    if verbose:
                        print(f"  [{n}/{total_mfapi}] NO  {fund_name[:55]} -- no match")

    if verbose:
        print(f"[resolver] Done. {len(resolved)}/{total_missing} resolved "
              f"({override_count} overrides + {len(resolved)-override_count-corrected_existing} mfapi"
              f"{f', {corrected_existing} corrected existing codes' if corrected_existing else ''}).")
    return rows, resolved