Spaces:
Running
Running
| """ | |
| Weightage scoring algorithm for mutual fund schemes. | |
| Scoring method: Sum of column weights where cell qualifies for Light Green (Top/Bottom 10) | |
| AND is NOT overridden by Light Red fill (threshold violations). | |
| Weight Distribution (Advisor-revised, March 2026): | |
| 1. Sortino Ratio: 1.300 (Top 10, higher is better) | |
| 2. Sharpe Ratio: 1.200 (Top 10, higher is better) | |
| 3. Information Ratio: 1.000 (Top 10, higher is better, Light Red if < 0) | |
| 4. Alpha: 1.000 (Top 10, higher is better, Light Red if < 1) | |
| 5. Maximum Drawdown: 1.350 (Top 10, closest to 0 is better) | |
| 6. Down Market Capture: 1.000 (Bottom 10, lower is better) | |
| 7. Standard Deviation: 1.000 (Bottom 10, lower is better) | |
| 8. 10 Years CAGR: 0.750 (Top 10, higher is better, Light Red if < Category Avg) | |
| 9. 5 Years CAGR: 0.600 (Top 10, higher is better, Light Red if < Category Avg) | |
| 10. 3 Years CAGR: 0.400 (Top 10, higher is better, Light Red if < Category Avg) | |
| 11. P/E Ratio: 0.150 (Bottom 10, lower is better) | |
| 12. TER: 0.150 (Bottom 10, lower is better) | |
| 13. Turnover (%): 0.100 (Bottom 10, lower is better) | |
| Total: 10.000 | |
| """ | |
| import math | |
| from typing import List, Optional, Dict | |
| from src.models import Fund | |
| # βββ Weight map (Advisor-revised March 2026) βββββββββββββββββββββββββββββββββ | |
| WEIGHTS: Dict[str, float] = { | |
| "sortino": 1.30, | |
| "sharpe": 1.20, | |
| "info_ratio": 1.00, | |
| "alpha": 1.00, | |
| "max_drawdown": 1.35, | |
| "down_capture": 1.00, | |
| "std_dev": 1.00, | |
| "cagr_10y": 0.75, | |
| "cagr_5y": 0.60, | |
| "cagr_3y": 0.40, | |
| "pe_ratio": 0.15, | |
| "ter": 0.15, | |
| "turnover": 0.10, | |
| } | |
| # Sanity-check: total should equal 10.000 | |
| _TOTAL = round(sum(WEIGHTS.values()), 3) | |
| assert _TOTAL == 10.000, f"WEIGHTS do not sum to 10.000 β got {_TOTAL}" | |
| # Metrics where higher is better β Top 10 | |
| TOP_10_METRICS = [ | |
| "sharpe", "sortino", "alpha", | |
| "info_ratio", "max_drawdown", | |
| "cagr_3y", "cagr_5y", "cagr_10y", | |
| ] | |
| # Metrics where lower is better β Bottom 10 | |
| BOTTOM_10_METRICS = [ | |
| "ter", "turnover", "std_dev", | |
| "down_capture", "pe_ratio", | |
| ] | |
| # Dual-condition metrics: qualifies for green AND may trigger light-red override | |
| DUAL_CONDITION_RULES: Dict[str, tuple] = { | |
| "alpha": ("below_value", 1), # Light Red if alpha < 1% | |
| "info_ratio": ("below_value", 0), # Light Red if IR < 0 | |
| "cagr_3y": ("below_category_avg", None), # Light Red if < category avg | |
| "cagr_5y": ("below_category_avg", None), | |
| "cagr_10y": ("below_category_avg", None), | |
| } | |
| # βββ Value helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _is_valid(v) -> bool: | |
| """True if v is a real, non-zero, non-NaN number.""" | |
| if v is None: | |
| return False | |
| if isinstance(v, float) and (v != v): # NaN check | |
| return False | |
| # 0.0 is treated as missing/not-applicable for risk metrics | |
| if v == 0: | |
| return False | |
| return True | |
| def _is_valid_drawdown(v) -> bool: | |
| """ | |
| For Maximum Drawdown specifically: 0.0 is a genuine data-quality gap | |
| (overnight/liquid funds sometimes publish 0 when the real figure was never | |
| fetched). Treat 0 as invalid so that only funds with a real (negative) | |
| drawdown value compete in the ranking. | |
| """ | |
| if v is None: | |
| return False | |
| if isinstance(v, float) and v != v: # NaN | |
| return False | |
| if v == 0: | |
| return False # β exact zero excluded; see drawdown_zero_fix() below | |
| return True | |
| # βββ Ranking helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _top_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool: | |
| """ | |
| Return True if fund is in the top-N (highest values) for metric. | |
| Special case: | |
| - For Information Ratio we allow a value of exactly 0.0 to participate | |
| in ranking (Excel treats 0 as a valid value; only < 0 is "red"). | |
| """ | |
| fund_val = getattr(fund, metric, None) | |
| def _valid_for_rank(v): | |
| if metric == "info_ratio": | |
| # Treat 0 as a real value; only None/NaN are invalid here. | |
| if v is None: | |
| return False | |
| if isinstance(v, float) and (v != v): | |
| return False | |
| return True | |
| return _is_valid(v) | |
| if not _valid_for_rank(fund_val): | |
| return False | |
| valid = [getattr(f, metric, None) for f in peers | |
| if _valid_for_rank(getattr(f, metric, None))] | |
| if len(valid) < 2: | |
| return False | |
| # Match Excel's TOP 10 conditional formatting: | |
| # "Top N items", with N capped at the number of valid funds. | |
| effective_n = min(n, len(valid)) | |
| valid.sort(reverse=True) | |
| return fund_val >= valid[effective_n - 1] | |
| def _top_n_drawdown(fund: Fund, peers: List[Fund], n: int = 10) -> bool: | |
| """ | |
| Special top-N for Maximum Drawdown. | |
| "Closest to 0" = highest value among negatives. | |
| -5% is better than -20%, so we still sort descending. | |
| Only non-zero, non-None values participate (see _is_valid_drawdown). | |
| Uses strict-N (no 50% fallback) so a single liquid fund with a real | |
| drawdown doesn't accidentally qualify just because of category size. | |
| """ | |
| fund_val = getattr(fund, "max_drawdown", None) | |
| if not _is_valid_drawdown(fund_val): | |
| return False | |
| valid = [getattr(f, "max_drawdown", None) for f in peers | |
| if _is_valid_drawdown(getattr(f, "max_drawdown", None))] | |
| if not valid: | |
| return False | |
| effective_n = min(n, len(valid)) | |
| valid.sort(reverse=True) # -5 > -20 β -5 is rank-1 | |
| return fund_val >= valid[effective_n - 1] | |
| def _bottom_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool: | |
| """Return True if fund is in the bottom-N (lowest values) for metric.""" | |
| fund_val = getattr(fund, metric, None) | |
| if not _is_valid(fund_val): | |
| return False | |
| valid = [getattr(f, metric, None) for f in peers | |
| if _is_valid(getattr(f, metric, None))] | |
| if len(valid) < 2: | |
| return False | |
| # Match Excel's BOTTOM 10 conditional formatting: | |
| # "Bottom N items", with N capped at the number of valid funds. | |
| effective_n = min(n, len(valid)) | |
| valid.sort() | |
| return fund_val <= valid[effective_n - 1] | |
| def _category_avg(peers: List[Fund], metric: str) -> Optional[float]: | |
| """Arithmetic mean of valid metric values across peers.""" | |
| vals = [getattr(f, metric, None) for f in peers | |
| if _is_valid(getattr(f, metric, None))] | |
| return sum(vals) / len(vals) if vals else None | |
| def _light_red(fund: Fund, metric: str, cat_avg: Optional[float]) -> bool: | |
| """Return True if the metric triggers a Light Red override for this fund.""" | |
| if metric not in DUAL_CONDITION_RULES: | |
| return False | |
| rule_type, threshold = DUAL_CONDITION_RULES[metric] | |
| val = getattr(fund, metric, None) | |
| if not _is_valid(val): | |
| return False | |
| if rule_type == "below_value": | |
| return val < threshold | |
| if rule_type == "below_category_avg": | |
| return (cat_avg is not None) and (val < cat_avg) | |
| return False | |
| # βββ Drawdown zero-cell fix βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def drawdown_zero_fix( | |
| funds: List[Fund], | |
| *, | |
| verbose: bool = True, | |
| ) -> int: | |
| """ | |
| Detect funds whose max_drawdown is exactly 0 (data-quality gap) and | |
| recompute it from live NAV history via the NAV engine. | |
| Strategy | |
| -------- | |
| 1. Collect every fund where max_drawdown == 0 AND the fund has a | |
| scheme_code (stored in fund.name as a fallback lookup key via CSV). | |
| In practice the scheme_code lives in the CSV row; the data_engine | |
| should pass it through. We look for it on fund.fill_status | |
| (which sometimes carries audit tags) or via a side-channel dict | |
| passed in by the caller. Most robustly, callers should set | |
| fund.fill_status = "DRAWDOWN_ZERO" before calling this function, | |
| OR we scan all funds whose max_drawdown is 0. | |
| 2. For each such fund, call compute_nav_metrics_for_scheme() requesting | |
| only ["Maximum Drawdown"]. | |
| 3. If a real negative value comes back, write it to fund.max_drawdown. | |
| Returns the count of cells successfully fixed. | |
| NOTE: This function requires network access (mfapi.in + yfinance). | |
| It is intentionally separated from compute_scores() so callers | |
| can opt in only when enrichment is desired. | |
| """ | |
| # Import here to avoid circular dependency at module level | |
| try: | |
| from src.nav_metrics_engine import NavEngineCache, compute_nav_metrics_for_scheme | |
| except ImportError: | |
| if verbose: | |
| print("[drawdown_fix] nav_metrics_engine not available β skipping.") | |
| return 0 | |
| # Build a name β scheme_code map from fund.fill_status field | |
| # (data_engine stores scheme codes in fill_status for audit; adjust if needed) | |
| # Fallback: use the fund name itself as a best-effort search key. | |
| DEBT_PREFIXES = ("debt", "liquid", "overnight", "money market", "gilt", | |
| "fixed maturity", "interval", "fmp") | |
| from datetime import datetime as _dt | |
| _now = _dt.now() | |
| def _fund_age_years(f) -> float | None: | |
| ld = getattr(f, "_launch_date", None) | |
| if not isinstance(ld, _dt): | |
| return None | |
| return (_now - ld).days / 365.25 | |
| # Import the set of funds already attempted by csv_enrichment NAV phase | |
| try: | |
| from src.csv_enrichment import _NAV_ATTEMPTED_FUNDS as _nav_attempted | |
| except Exception: | |
| _nav_attempted = set() | |
| zero_funds = [ | |
| f for f in funds | |
| if ( | |
| # Only target funds where drawdown is truly missing (0 or None) | |
| (f.max_drawdown == 0 or f.max_drawdown is None) | |
| # AND only equity/hybrid β debt funds have tiny/no drawdown, skip them | |
| and not any(f.category.lower().startswith(pfx) for pfx in DEBT_PREFIXES) | |
| # AND fund must be β₯3 years old β younger funds can't have 3Y NAV history | |
| and (_fund_age_years(f) is None or _fund_age_years(f) >= 3.0) | |
| # AND skip funds already attempted by csv_enrichment NAV phase β | |
| # if enrichment couldn't fill MDD, a second pass won't either | |
| and f.name not in _nav_attempted | |
| ) | |
| ] | |
| if not zero_funds: | |
| if verbose: | |
| print("[drawdown_fix] No zero/missing drawdown cells found.") | |
| return 0 | |
| if verbose: | |
| print(f"[drawdown_fix] Attempting to fix {len(zero_funds)} drawdown cells β¦") | |
| from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed | |
| import threading as _threading | |
| # Bulk-preload cache before parallel workers start (2 SQL queries instead of N) | |
| try: | |
| from src.nav_metrics_engine import _bulk_preload_cache, resolve_benchmark_ticker | |
| _scheme_codes = [getattr(f, "_scheme_code", None) or "" for f in zero_funds] | |
| _bench_tickers = [resolve_benchmark_ticker(getattr(f, "benchmark", "") or "") for f in zero_funds] | |
| _bulk_preload_cache(_scheme_codes, _bench_tickers) | |
| except Exception: | |
| pass # graceful degradation β workers will fall back to per-query | |
| cache = NavEngineCache() | |
| fixed = 0 | |
| _lock = _threading.Lock() | |
| with_code = [ | |
| (f, getattr(f, "_scheme_code", None) or "", getattr(f, "benchmark", "") or "") | |
| for f in zero_funds | |
| if (getattr(f, "_scheme_code", None) or "").strip() | |
| ] | |
| no_code = [f for f in zero_funds if not (getattr(f, "_scheme_code", None) or "").strip()] | |
| if verbose: | |
| for f in no_code: | |
| print(f" SKIP {f.name[:55]} β no scheme code available") | |
| def _fix_one(args): | |
| fund, scheme_code, benchmark = args | |
| metrics, skip = compute_nav_metrics_for_scheme( | |
| scheme_code=scheme_code, | |
| benchmark_type=benchmark, | |
| needed_metrics=["Maximum Drawdown"], | |
| cache=cache, | |
| ) | |
| mdd = metrics.get("Maximum Drawdown") | |
| reason = skip.get("Maximum Drawdown", "unknown") | |
| return fund, mdd, reason | |
| with ThreadPoolExecutor(max_workers=12) as executor: | |
| futures = {executor.submit(_fix_one, item): item for item in with_code} | |
| for fut in _as_completed(futures): | |
| try: | |
| fund, mdd, reason = fut.result() | |
| except Exception as e: | |
| continue | |
| if mdd is not None and mdd != 0: | |
| with _lock: | |
| fund.max_drawdown = mdd | |
| fixed += 1 | |
| if verbose: | |
| print(f" FIXED {fund.name[:55]} β MDD = {mdd:.3f}%") | |
| else: | |
| if verbose: | |
| print(f" MISS {fund.name[:55]} β {reason}") | |
| if verbose: | |
| print(f"[drawdown_fix] Done. Fixed {fixed}/{len(zero_funds)} cells.") | |
| return fixed | |
| # βββ Main scoring engine ββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def compute_scores(funds: List[Fund]) -> List[Fund]: | |
| """ | |
| Score and rank all funds within their categories. | |
| Algorithm | |
| --------- | |
| For every metric that carries a weight: | |
| 1. Check if the fund is in Top-N or Bottom-N (as appropriate) within | |
| its category peer group β "Light Green" | |
| 2. If Light Green AND a dual-condition rule fires β "Light Red" | |
| override: weight contribution = 0 | |
| 3. Otherwise if Light Green and NOT Light Red β add weight | |
| fund.score is capped at 10.0 (model scale). | |
| Also sets: | |
| fund.rank_in_category β 1 = best within category | |
| fund.is_top_quartile β True for top βN/4β funds | |
| Returns the same list (mutated in-place) for convenience. | |
| """ | |
| # Group by category | |
| categories: Dict[str, List[Fund]] = {} | |
| for fund in funds: | |
| categories.setdefault(fund.category, []).append(fund) | |
| for cat_name, cat_funds in categories.items(): | |
| # Pre-compute category averages for CAGR dual-condition rules | |
| cat_averages = { | |
| metric: _category_avg(cat_funds, metric) | |
| for metric in ("cagr_3y", "cagr_5y", "cagr_10y") | |
| } | |
| for fund in cat_funds: | |
| score = 0.0 | |
| for metric, weight in WEIGHTS.items(): | |
| is_green = False | |
| # ββ Green check ββββββββββββββββββββββββββββββββββββββββββ | |
| if metric == "max_drawdown": | |
| is_green = _top_n_drawdown(fund, cat_funds) | |
| elif metric in TOP_10_METRICS: | |
| is_green = _top_n(fund, cat_funds, metric) | |
| elif metric in BOTTOM_10_METRICS: | |
| is_green = _bottom_n(fund, cat_funds, metric) | |
| # ββ Light Red override βββββββββββββββββββββββββββββββββββ | |
| if is_green and metric in DUAL_CONDITION_RULES: | |
| cat_avg = cat_averages.get(metric) | |
| if _light_red(fund, metric, cat_avg): | |
| is_green = False # zeroed by override | |
| if is_green: | |
| score += weight | |
| fund.score = round(min(score, 10.0), 3) | |
| # ββ Rank within category βββββββββββββββββββββββββββββββββββββββββ | |
| sorted_funds = sorted( | |
| cat_funds, | |
| key=lambda f: (-(f.score or 0), (f.name or "").lower(), getattr(f, "order", 0)), | |
| ) | |
| top_quartile_cutoff = max(1, math.ceil(len(sorted_funds) / 4)) | |
| for rank, fund in enumerate(sorted_funds, start=1): | |
| fund.rank_in_category = rank | |
| fund.is_top_quartile = (rank <= top_quartile_cutoff) | |
| return funds |