""" Weightage scoring algorithm for mutual fund schemes. Scoring method: Sum of column weights where cell qualifies for Light Green (Top/Bottom 10) AND is NOT overridden by Light Red fill (threshold violations). Weight Distribution (Advisor-revised, March 2026): 1. Sortino Ratio: 1.300 (Top 10, higher is better) 2. Sharpe Ratio: 1.200 (Top 10, higher is better) 3. Information Ratio: 1.000 (Top 10, higher is better, Light Red if < 0) 4. Alpha: 1.000 (Top 10, higher is better, Light Red if < 1) 5. Maximum Drawdown: 1.350 (Top 10, closest to 0 is better) 6. Down Market Capture: 1.000 (Bottom 10, lower is better) 7. Standard Deviation: 1.000 (Bottom 10, lower is better) 8. 10 Years CAGR: 0.750 (Top 10, higher is better, Light Red if < Category Avg) 9. 5 Years CAGR: 0.600 (Top 10, higher is better, Light Red if < Category Avg) 10. 3 Years CAGR: 0.400 (Top 10, higher is better, Light Red if < Category Avg) 11. P/E Ratio: 0.150 (Bottom 10, lower is better) 12. TER: 0.150 (Bottom 10, lower is better) 13. Turnover (%): 0.100 (Bottom 10, lower is better) Total: 10.000 """ import math from typing import List, Optional, Dict from src.models import Fund # ─── Weight map (Advisor-revised March 2026) ───────────────────────────────── WEIGHTS: Dict[str, float] = { "sortino": 1.30, "sharpe": 1.20, "info_ratio": 1.00, "alpha": 1.00, "max_drawdown": 1.35, "down_capture": 1.00, "std_dev": 1.00, "cagr_10y": 0.75, "cagr_5y": 0.60, "cagr_3y": 0.40, "pe_ratio": 0.15, "ter": 0.15, "turnover": 0.10, } # Sanity-check: total should equal 10.000 _TOTAL = round(sum(WEIGHTS.values()), 3) assert _TOTAL == 10.000, f"WEIGHTS do not sum to 10.000 — got {_TOTAL}" # Metrics where higher is better → Top 10 TOP_10_METRICS = [ "sharpe", "sortino", "alpha", "info_ratio", "max_drawdown", "cagr_3y", "cagr_5y", "cagr_10y", ] # Metrics where lower is better → Bottom 10 BOTTOM_10_METRICS = [ "ter", "turnover", "std_dev", "down_capture", "pe_ratio", ] # Dual-condition metrics: qualifies for green AND may trigger light-red override DUAL_CONDITION_RULES: Dict[str, tuple] = { "alpha": ("below_value", 1), # Light Red if alpha < 1% "info_ratio": ("below_value", 0), # Light Red if IR < 0 "cagr_3y": ("below_category_avg", None), # Light Red if < category avg "cagr_5y": ("below_category_avg", None), "cagr_10y": ("below_category_avg", None), } # ─── Value helpers ──────────────────────────────────────────────────────────── def _is_valid(v) -> bool: """True if v is a real, non-zero, non-NaN number.""" if v is None: return False if isinstance(v, float) and (v != v): # NaN check return False # 0.0 is treated as missing/not-applicable for risk metrics if v == 0: return False return True def _is_valid_drawdown(v) -> bool: """ For Maximum Drawdown specifically: 0.0 is a genuine data-quality gap (overnight/liquid funds sometimes publish 0 when the real figure was never fetched). Treat 0 as invalid so that only funds with a real (negative) drawdown value compete in the ranking. """ if v is None: return False if isinstance(v, float) and v != v: # NaN return False if v == 0: return False # ← exact zero excluded; see drawdown_zero_fix() below return True # ─── Ranking helpers ────────────────────────────────────────────────────────── def _top_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool: """ Return True if fund is in the top-N (highest values) for metric. Special case: - For Information Ratio we allow a value of exactly 0.0 to participate in ranking (Excel treats 0 as a valid value; only < 0 is "red"). """ fund_val = getattr(fund, metric, None) def _valid_for_rank(v): if metric == "info_ratio": # Treat 0 as a real value; only None/NaN are invalid here. if v is None: return False if isinstance(v, float) and (v != v): return False return True return _is_valid(v) if not _valid_for_rank(fund_val): return False valid = [getattr(f, metric, None) for f in peers if _valid_for_rank(getattr(f, metric, None))] if len(valid) < 2: return False # Match Excel's TOP 10 conditional formatting: # "Top N items", with N capped at the number of valid funds. effective_n = min(n, len(valid)) valid.sort(reverse=True) return fund_val >= valid[effective_n - 1] def _top_n_drawdown(fund: Fund, peers: List[Fund], n: int = 10) -> bool: """ Special top-N for Maximum Drawdown. "Closest to 0" = highest value among negatives. -5% is better than -20%, so we still sort descending. Only non-zero, non-None values participate (see _is_valid_drawdown). Uses strict-N (no 50% fallback) so a single liquid fund with a real drawdown doesn't accidentally qualify just because of category size. """ fund_val = getattr(fund, "max_drawdown", None) if not _is_valid_drawdown(fund_val): return False valid = [getattr(f, "max_drawdown", None) for f in peers if _is_valid_drawdown(getattr(f, "max_drawdown", None))] if not valid: return False effective_n = min(n, len(valid)) valid.sort(reverse=True) # -5 > -20 → -5 is rank-1 return fund_val >= valid[effective_n - 1] def _bottom_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool: """Return True if fund is in the bottom-N (lowest values) for metric.""" fund_val = getattr(fund, metric, None) if not _is_valid(fund_val): return False valid = [getattr(f, metric, None) for f in peers if _is_valid(getattr(f, metric, None))] if len(valid) < 2: return False # Match Excel's BOTTOM 10 conditional formatting: # "Bottom N items", with N capped at the number of valid funds. effective_n = min(n, len(valid)) valid.sort() return fund_val <= valid[effective_n - 1] def _category_avg(peers: List[Fund], metric: str) -> Optional[float]: """Arithmetic mean of valid metric values across peers.""" vals = [getattr(f, metric, None) for f in peers if _is_valid(getattr(f, metric, None))] return sum(vals) / len(vals) if vals else None def _light_red(fund: Fund, metric: str, cat_avg: Optional[float]) -> bool: """Return True if the metric triggers a Light Red override for this fund.""" if metric not in DUAL_CONDITION_RULES: return False rule_type, threshold = DUAL_CONDITION_RULES[metric] val = getattr(fund, metric, None) if not _is_valid(val): return False if rule_type == "below_value": return val < threshold if rule_type == "below_category_avg": return (cat_avg is not None) and (val < cat_avg) return False # ─── Drawdown zero-cell fix ─────────────────────────────────────────────────── def drawdown_zero_fix( funds: List[Fund], *, verbose: bool = True, ) -> int: """ Detect funds whose max_drawdown is exactly 0 (data-quality gap) and recompute it from live NAV history via the NAV engine. Strategy -------- 1. Collect every fund where max_drawdown == 0 AND the fund has a scheme_code (stored in fund.name as a fallback lookup key via CSV). In practice the scheme_code lives in the CSV row; the data_engine should pass it through. We look for it on fund.fill_status (which sometimes carries audit tags) or via a side-channel dict passed in by the caller. Most robustly, callers should set fund.fill_status = "DRAWDOWN_ZERO" before calling this function, OR we scan all funds whose max_drawdown is 0. 2. For each such fund, call compute_nav_metrics_for_scheme() requesting only ["Maximum Drawdown"]. 3. If a real negative value comes back, write it to fund.max_drawdown. Returns the count of cells successfully fixed. NOTE: This function requires network access (mfapi.in + yfinance). It is intentionally separated from compute_scores() so callers can opt in only when enrichment is desired. """ # Import here to avoid circular dependency at module level try: from src.nav_metrics_engine import NavEngineCache, compute_nav_metrics_for_scheme except ImportError: if verbose: print("[drawdown_fix] nav_metrics_engine not available — skipping.") return 0 # Build a name → scheme_code map from fund.fill_status field # (data_engine stores scheme codes in fill_status for audit; adjust if needed) # Fallback: use the fund name itself as a best-effort search key. DEBT_PREFIXES = ("debt", "liquid", "overnight", "money market", "gilt", "fixed maturity", "interval", "fmp") from datetime import datetime as _dt _now = _dt.now() def _fund_age_years(f) -> float | None: ld = getattr(f, "_launch_date", None) if not isinstance(ld, _dt): return None return (_now - ld).days / 365.25 # Import the set of funds already attempted by csv_enrichment NAV phase try: from src.csv_enrichment import _NAV_ATTEMPTED_FUNDS as _nav_attempted except Exception: _nav_attempted = set() zero_funds = [ f for f in funds if ( # Only target funds where drawdown is truly missing (0 or None) (f.max_drawdown == 0 or f.max_drawdown is None) # AND only equity/hybrid — debt funds have tiny/no drawdown, skip them and not any(f.category.lower().startswith(pfx) for pfx in DEBT_PREFIXES) # AND fund must be ≥3 years old — younger funds can't have 3Y NAV history and (_fund_age_years(f) is None or _fund_age_years(f) >= 3.0) # AND skip funds already attempted by csv_enrichment NAV phase — # if enrichment couldn't fill MDD, a second pass won't either and f.name not in _nav_attempted ) ] if not zero_funds: if verbose: print("[drawdown_fix] No zero/missing drawdown cells found.") return 0 if verbose: print(f"[drawdown_fix] Attempting to fix {len(zero_funds)} drawdown cells …") from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed import threading as _threading # Bulk-preload cache before parallel workers start (2 SQL queries instead of N) try: from src.nav_metrics_engine import _bulk_preload_cache, resolve_benchmark_ticker _scheme_codes = [getattr(f, "_scheme_code", None) or "" for f in zero_funds] _bench_tickers = [resolve_benchmark_ticker(getattr(f, "benchmark", "") or "") for f in zero_funds] _bulk_preload_cache(_scheme_codes, _bench_tickers) except Exception: pass # graceful degradation — workers will fall back to per-query cache = NavEngineCache() fixed = 0 _lock = _threading.Lock() with_code = [ (f, getattr(f, "_scheme_code", None) or "", getattr(f, "benchmark", "") or "") for f in zero_funds if (getattr(f, "_scheme_code", None) or "").strip() ] no_code = [f for f in zero_funds if not (getattr(f, "_scheme_code", None) or "").strip()] if verbose: for f in no_code: print(f" SKIP {f.name[:55]} — no scheme code available") def _fix_one(args): fund, scheme_code, benchmark = args metrics, skip = compute_nav_metrics_for_scheme( scheme_code=scheme_code, benchmark_type=benchmark, needed_metrics=["Maximum Drawdown"], cache=cache, ) mdd = metrics.get("Maximum Drawdown") reason = skip.get("Maximum Drawdown", "unknown") return fund, mdd, reason with ThreadPoolExecutor(max_workers=12) as executor: futures = {executor.submit(_fix_one, item): item for item in with_code} for fut in _as_completed(futures): try: fund, mdd, reason = fut.result() except Exception as e: continue if mdd is not None and mdd != 0: with _lock: fund.max_drawdown = mdd fixed += 1 if verbose: print(f" FIXED {fund.name[:55]} → MDD = {mdd:.3f}%") else: if verbose: print(f" MISS {fund.name[:55]} — {reason}") if verbose: print(f"[drawdown_fix] Done. Fixed {fixed}/{len(zero_funds)} cells.") return fixed # ─── Main scoring engine ────────────────────────────────────────────────────── def compute_scores(funds: List[Fund]) -> List[Fund]: """ Score and rank all funds within their categories. Algorithm --------- For every metric that carries a weight: 1. Check if the fund is in Top-N or Bottom-N (as appropriate) within its category peer group → "Light Green" 2. If Light Green AND a dual-condition rule fires → "Light Red" override: weight contribution = 0 3. Otherwise if Light Green and NOT Light Red → add weight fund.score is capped at 10.0 (model scale). Also sets: fund.rank_in_category – 1 = best within category fund.is_top_quartile – True for top ⌈N/4⌉ funds Returns the same list (mutated in-place) for convenience. """ # Group by category categories: Dict[str, List[Fund]] = {} for fund in funds: categories.setdefault(fund.category, []).append(fund) for cat_name, cat_funds in categories.items(): # Pre-compute category averages for CAGR dual-condition rules cat_averages = { metric: _category_avg(cat_funds, metric) for metric in ("cagr_3y", "cagr_5y", "cagr_10y") } for fund in cat_funds: score = 0.0 for metric, weight in WEIGHTS.items(): is_green = False # ── Green check ────────────────────────────────────────── if metric == "max_drawdown": is_green = _top_n_drawdown(fund, cat_funds) elif metric in TOP_10_METRICS: is_green = _top_n(fund, cat_funds, metric) elif metric in BOTTOM_10_METRICS: is_green = _bottom_n(fund, cat_funds, metric) # ── Light Red override ─────────────────────────────────── if is_green and metric in DUAL_CONDITION_RULES: cat_avg = cat_averages.get(metric) if _light_red(fund, metric, cat_avg): is_green = False # zeroed by override if is_green: score += weight fund.score = round(min(score, 10.0), 3) # ── Rank within category ───────────────────────────────────────── sorted_funds = sorted( cat_funds, key=lambda f: (-(f.score or 0), (f.name or "").lower(), getattr(f, "order", 0)), ) top_quartile_cutoff = max(1, math.ceil(len(sorted_funds) / 4)) for rank, fund in enumerate(sorted_funds, start=1): fund.rank_in_category = rank fund.is_top_quartile = (rank <= top_quartile_cutoff) return funds