File size: 16,953 Bytes
b0e15c1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
"""

Weightage scoring algorithm for mutual fund schemes.



Scoring method: Sum of column weights where cell qualifies for Light Green (Top/Bottom 10)

AND is NOT overridden by Light Red fill (threshold violations).



Weight Distribution (Advisor-revised, March 2026):

1.  Sortino Ratio:        1.300  (Top 10, higher is better)

2.  Sharpe Ratio:         1.200  (Top 10, higher is better)

3.  Information Ratio:    1.000  (Top 10, higher is better, Light Red if < 0)

4.  Alpha:                1.000  (Top 10, higher is better, Light Red if < 1)

5.  Maximum Drawdown:     1.350  (Top 10, closest to 0 is better)

6.  Down Market Capture:  1.000  (Bottom 10, lower is better)

7.  Standard Deviation:   1.000  (Bottom 10, lower is better)

8.  10 Years CAGR:        0.750  (Top 10, higher is better, Light Red if < Category Avg)

9.  5 Years CAGR:         0.600  (Top 10, higher is better, Light Red if < Category Avg)

10. 3 Years CAGR:         0.400  (Top 10, higher is better, Light Red if < Category Avg)

11. P/E Ratio:            0.150  (Bottom 10, lower is better)

12. TER:                  0.150  (Bottom 10, lower is better)

13. Turnover (%):         0.100  (Bottom 10, lower is better)



Total: 10.000

"""

import math
from typing import List, Optional, Dict
from src.models import Fund


# ─── Weight map (Advisor-revised March 2026) ─────────────────────────────────
WEIGHTS: Dict[str, float] = {
    "sortino":      1.30,
    "sharpe":       1.20,
    "info_ratio":   1.00,
    "alpha":        1.00,
    "max_drawdown": 1.35,
    "down_capture": 1.00,
    "std_dev":      1.00,
    "cagr_10y":     0.75,
    "cagr_5y":      0.60,
    "cagr_3y":      0.40,
    "pe_ratio":     0.15,
    "ter":          0.15,
    "turnover":     0.10,
}

# Sanity-check: total should equal 10.000
_TOTAL = round(sum(WEIGHTS.values()), 3)
assert _TOTAL == 10.000, f"WEIGHTS do not sum to 10.000 β€” got {_TOTAL}"

# Metrics where higher is better β†’ Top 10
TOP_10_METRICS = [
    "sharpe", "sortino", "alpha",
    "info_ratio", "max_drawdown",
    "cagr_3y", "cagr_5y", "cagr_10y",
]

# Metrics where lower is better β†’ Bottom 10
BOTTOM_10_METRICS = [
    "ter", "turnover", "std_dev",
    "down_capture", "pe_ratio",
]

# Dual-condition metrics: qualifies for green AND may trigger light-red override
DUAL_CONDITION_RULES: Dict[str, tuple] = {
    "alpha":      ("below_value",        1),    # Light Red if alpha < 1%
    "info_ratio": ("below_value",        0),    # Light Red if IR < 0
    "cagr_3y":    ("below_category_avg", None), # Light Red if < category avg
    "cagr_5y":    ("below_category_avg", None),
    "cagr_10y":   ("below_category_avg", None),
}


# ─── Value helpers ────────────────────────────────────────────────────────────

def _is_valid(v) -> bool:
    """True if v is a real, non-zero, non-NaN number."""
    if v is None:
        return False
    if isinstance(v, float) and (v != v):   # NaN check
        return False
    # 0.0 is treated as missing/not-applicable for risk metrics
    if v == 0:
        return False
    return True


def _is_valid_drawdown(v) -> bool:
    """

    For Maximum Drawdown specifically: 0.0 is a genuine data-quality gap

    (overnight/liquid funds sometimes publish 0 when the real figure was never

    fetched).  Treat 0 as invalid so that only funds with a real (negative)

    drawdown value compete in the ranking.

    """
    if v is None:
        return False
    if isinstance(v, float) and v != v:     # NaN
        return False
    if v == 0:
        return False   # ← exact zero excluded; see drawdown_zero_fix() below
    return True


# ─── Ranking helpers ──────────────────────────────────────────────────────────

def _top_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
    """

    Return True if fund is in the top-N (highest values) for metric.



    Special case:

      - For Information Ratio we allow a value of exactly 0.0 to participate

        in ranking (Excel treats 0 as a valid value; only < 0 is "red").

    """
    fund_val = getattr(fund, metric, None)

    def _valid_for_rank(v):
        if metric == "info_ratio":
            # Treat 0 as a real value; only None/NaN are invalid here.
            if v is None:
                return False
            if isinstance(v, float) and (v != v):
                return False
            return True
        return _is_valid(v)

    if not _valid_for_rank(fund_val):
        return False

    valid = [getattr(f, metric, None) for f in peers
             if _valid_for_rank(getattr(f, metric, None))]
    if len(valid) < 2:
        return False

    # Match Excel's TOP 10 conditional formatting:
    # "Top N items", with N capped at the number of valid funds.
    effective_n = min(n, len(valid))
    valid.sort(reverse=True)
    return fund_val >= valid[effective_n - 1]


def _top_n_drawdown(fund: Fund, peers: List[Fund], n: int = 10) -> bool:
    """

    Special top-N for Maximum Drawdown.



    "Closest to 0" = highest value among negatives.

    -5% is better than -20%, so we still sort descending.

    Only non-zero, non-None values participate (see _is_valid_drawdown).

    Uses strict-N (no 50% fallback) so a single liquid fund with a real

    drawdown doesn't accidentally qualify just because of category size.

    """
    fund_val = getattr(fund, "max_drawdown", None)
    if not _is_valid_drawdown(fund_val):
        return False

    valid = [getattr(f, "max_drawdown", None) for f in peers
             if _is_valid_drawdown(getattr(f, "max_drawdown", None))]
    if not valid:
        return False

    effective_n = min(n, len(valid))
    valid.sort(reverse=True)           # -5 > -20  β†’  -5 is rank-1
    return fund_val >= valid[effective_n - 1]


def _bottom_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
    """Return True if fund is in the bottom-N (lowest values) for metric."""
    fund_val = getattr(fund, metric, None)
    if not _is_valid(fund_val):
        return False

    valid = [getattr(f, metric, None) for f in peers
             if _is_valid(getattr(f, metric, None))]
    if len(valid) < 2:
        return False

    # Match Excel's BOTTOM 10 conditional formatting:
    # "Bottom N items", with N capped at the number of valid funds.
    effective_n = min(n, len(valid))
    valid.sort()
    return fund_val <= valid[effective_n - 1]


def _category_avg(peers: List[Fund], metric: str) -> Optional[float]:
    """Arithmetic mean of valid metric values across peers."""
    vals = [getattr(f, metric, None) for f in peers
            if _is_valid(getattr(f, metric, None))]
    return sum(vals) / len(vals) if vals else None


def _light_red(fund: Fund, metric: str, cat_avg: Optional[float]) -> bool:
    """Return True if the metric triggers a Light Red override for this fund."""
    if metric not in DUAL_CONDITION_RULES:
        return False
    rule_type, threshold = DUAL_CONDITION_RULES[metric]
    val = getattr(fund, metric, None)
    if not _is_valid(val):
        return False
    if rule_type == "below_value":
        return val < threshold
    if rule_type == "below_category_avg":
        return (cat_avg is not None) and (val < cat_avg)
    return False


# ─── Drawdown zero-cell fix ───────────────────────────────────────────────────

def drawdown_zero_fix(

    funds: List[Fund],

    *,

    verbose: bool = True,

) -> int:
    """

    Detect funds whose max_drawdown is exactly 0 (data-quality gap) and

    recompute it from live NAV history via the NAV engine.



    Strategy

    --------

    1. Collect every fund where max_drawdown == 0 AND the fund has a

       scheme_code (stored in fund.name as a fallback lookup key via CSV).

       In practice the scheme_code lives in the CSV row; the data_engine

       should pass it through.  We look for it on fund.fill_status

       (which sometimes carries audit tags) or via a side-channel dict

       passed in by the caller.  Most robustly, callers should set

       fund.fill_status = "DRAWDOWN_ZERO" before calling this function,

       OR we scan all funds whose max_drawdown is 0.



    2. For each such fund, call compute_nav_metrics_for_scheme() requesting

       only ["Maximum Drawdown"].



    3. If a real negative value comes back, write it to fund.max_drawdown.



    Returns the count of cells successfully fixed.



    NOTE: This function requires network access (mfapi.in + yfinance).

          It is intentionally separated from compute_scores() so callers

          can opt in only when enrichment is desired.

    """
    # Import here to avoid circular dependency at module level
    try:
        from src.nav_metrics_engine import NavEngineCache, compute_nav_metrics_for_scheme
    except ImportError:
        if verbose:
            print("[drawdown_fix] nav_metrics_engine not available β€” skipping.")
        return 0

    # Build a name β†’ scheme_code map from fund.fill_status field
    # (data_engine stores scheme codes in fill_status for audit; adjust if needed)
    # Fallback: use the fund name itself as a best-effort search key.

    DEBT_PREFIXES = ("debt", "liquid", "overnight", "money market", "gilt",
                    "fixed maturity", "interval", "fmp")

    from datetime import datetime as _dt
    _now = _dt.now()

    def _fund_age_years(f) -> float | None:
        ld = getattr(f, "_launch_date", None)
        if not isinstance(ld, _dt):
            return None
        return (_now - ld).days / 365.25

    # Import the set of funds already attempted by csv_enrichment NAV phase
    try:
        from src.csv_enrichment import _NAV_ATTEMPTED_FUNDS as _nav_attempted
    except Exception:
        _nav_attempted = set()

    zero_funds = [
        f for f in funds
        if (
            # Only target funds where drawdown is truly missing (0 or None)
            (f.max_drawdown == 0 or f.max_drawdown is None)
            # AND only equity/hybrid β€” debt funds have tiny/no drawdown, skip them
            and not any(f.category.lower().startswith(pfx) for pfx in DEBT_PREFIXES)
            # AND fund must be β‰₯3 years old β€” younger funds can't have 3Y NAV history
            and (_fund_age_years(f) is None or _fund_age_years(f) >= 3.0)
            # AND skip funds already attempted by csv_enrichment NAV phase β€”
            # if enrichment couldn't fill MDD, a second pass won't either
            and f.name not in _nav_attempted
        )
    ]

    if not zero_funds:
        if verbose:
            print("[drawdown_fix] No zero/missing drawdown cells found.")
        return 0

    if verbose:
        print(f"[drawdown_fix] Attempting to fix {len(zero_funds)} drawdown cells …")

    from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed
    import threading as _threading

    # Bulk-preload cache before parallel workers start (2 SQL queries instead of N)
    try:
        from src.nav_metrics_engine import _bulk_preload_cache, resolve_benchmark_ticker
        _scheme_codes  = [getattr(f, "_scheme_code", None) or "" for f in zero_funds]
        _bench_tickers = [resolve_benchmark_ticker(getattr(f, "benchmark", "") or "") for f in zero_funds]
        _bulk_preload_cache(_scheme_codes, _bench_tickers)
    except Exception:
        pass  # graceful degradation β€” workers will fall back to per-query

    cache = NavEngineCache()
    fixed = 0
    _lock = _threading.Lock()

    with_code = [
        (f, getattr(f, "_scheme_code", None) or "", getattr(f, "benchmark", "") or "")
        for f in zero_funds
        if (getattr(f, "_scheme_code", None) or "").strip()
    ]
    no_code = [f for f in zero_funds if not (getattr(f, "_scheme_code", None) or "").strip()]

    if verbose:
        for f in no_code:
            print(f"  SKIP  {f.name[:55]} β€” no scheme code available")

    def _fix_one(args):
        fund, scheme_code, benchmark = args
        metrics, skip = compute_nav_metrics_for_scheme(
            scheme_code=scheme_code,
            benchmark_type=benchmark,
            needed_metrics=["Maximum Drawdown"],
            cache=cache,
        )
        mdd    = metrics.get("Maximum Drawdown")
        reason = skip.get("Maximum Drawdown", "unknown")
        return fund, mdd, reason

    with ThreadPoolExecutor(max_workers=12) as executor:
        futures = {executor.submit(_fix_one, item): item for item in with_code}
        for fut in _as_completed(futures):
            try:
                fund, mdd, reason = fut.result()
            except Exception as e:
                continue
            if mdd is not None and mdd != 0:
                with _lock:
                    fund.max_drawdown = mdd
                    fixed += 1
                if verbose:
                    print(f"  FIXED {fund.name[:55]}  β†’  MDD = {mdd:.3f}%")
            else:
                if verbose:
                    print(f"  MISS  {fund.name[:55]} β€” {reason}")

    if verbose:
        print(f"[drawdown_fix] Done. Fixed {fixed}/{len(zero_funds)} cells.")

    return fixed


# ─── Main scoring engine ──────────────────────────────────────────────────────

def compute_scores(funds: List[Fund]) -> List[Fund]:
    """

    Score and rank all funds within their categories.



    Algorithm

    ---------

    For every metric that carries a weight:

      1. Check if the fund is in Top-N or Bottom-N (as appropriate) within

         its category peer group  β†’ "Light Green"

      2. If Light Green AND a dual-condition rule fires         β†’ "Light Red"

         override: weight contribution = 0

      3. Otherwise if Light Green and NOT Light Red             β†’ add weight



    fund.score is capped at 10.0 (model scale).



    Also sets:

      fund.rank_in_category   – 1 = best within category

      fund.is_top_quartile    – True for top ⌈N/4βŒ‰ funds



    Returns the same list (mutated in-place) for convenience.

    """
    # Group by category
    categories: Dict[str, List[Fund]] = {}
    for fund in funds:
        categories.setdefault(fund.category, []).append(fund)

    for cat_name, cat_funds in categories.items():

        # Pre-compute category averages for CAGR dual-condition rules
        cat_averages = {
            metric: _category_avg(cat_funds, metric)
            for metric in ("cagr_3y", "cagr_5y", "cagr_10y")
        }

        for fund in cat_funds:
            score = 0.0

            for metric, weight in WEIGHTS.items():
                is_green = False

                # ── Green check ──────────────────────────────────────────
                if metric == "max_drawdown":
                    is_green = _top_n_drawdown(fund, cat_funds)
                elif metric in TOP_10_METRICS:
                    is_green = _top_n(fund, cat_funds, metric)
                elif metric in BOTTOM_10_METRICS:
                    is_green = _bottom_n(fund, cat_funds, metric)

                # ── Light Red override ───────────────────────────────────
                if is_green and metric in DUAL_CONDITION_RULES:
                    cat_avg = cat_averages.get(metric)
                    if _light_red(fund, metric, cat_avg):
                        is_green = False   # zeroed by override

                if is_green:
                    score += weight

            fund.score = round(min(score, 10.0), 3)

        # ── Rank within category ─────────────────────────────────────────
        sorted_funds = sorted(
            cat_funds,
            key=lambda f: (-(f.score or 0), (f.name or "").lower(), getattr(f, "order", 0)),
        )
        top_quartile_cutoff = max(1, math.ceil(len(sorted_funds) / 4))

        for rank, fund in enumerate(sorted_funds, start=1):
            fund.rank_in_category = rank
            fund.is_top_quartile  = (rank <= top_quartile_cutoff)

    return funds