Spaces:

Parthiban97
/

MF

Running

File size: 16,953 Bytes

b0e15c1

"""

Weightage scoring algorithm for mutual fund schemes.



Scoring method: Sum of column weights where cell qualifies for Light Green (Top/Bottom 10)

AND is NOT overridden by Light Red fill (threshold violations).



Weight Distribution (Advisor-revised, March 2026):

1.  Sortino Ratio:        1.300  (Top 10, higher is better)

2.  Sharpe Ratio:         1.200  (Top 10, higher is better)

3.  Information Ratio:    1.000  (Top 10, higher is better, Light Red if < 0)

4.  Alpha:                1.000  (Top 10, higher is better, Light Red if < 1)

5.  Maximum Drawdown:     1.350  (Top 10, closest to 0 is better)

6.  Down Market Capture:  1.000  (Bottom 10, lower is better)

7.  Standard Deviation:   1.000  (Bottom 10, lower is better)

8.  10 Years CAGR:        0.750  (Top 10, higher is better, Light Red if < Category Avg)

9.  5 Years CAGR:         0.600  (Top 10, higher is better, Light Red if < Category Avg)

10. 3 Years CAGR:         0.400  (Top 10, higher is better, Light Red if < Category Avg)

11. P/E Ratio:            0.150  (Bottom 10, lower is better)

12. TER:                  0.150  (Bottom 10, lower is better)

13. Turnover (%):         0.100  (Bottom 10, lower is better)



Total: 10.000

"""

import math
from typing import List, Optional, Dict
from src.models import Fund


# ─── Weight map (Advisor-revised March 2026) ─────────────────────────────────
WEIGHTS: Dict[str, float] = {
    "sortino":      1.30,
    "sharpe":       1.20,
    "info_ratio":   1.00,
    "alpha":        1.00,
    "max_drawdown": 1.35,
    "down_capture": 1.00,
    "std_dev":      1.00,
    "cagr_10y":     0.75,
    "cagr_5y":      0.60,
    "cagr_3y":      0.40,
    "pe_ratio":     0.15,
    "ter":          0.15,
    "turnover":     0.10,
}

# Sanity-check: total should equal 10.000
_TOTAL = round(sum(WEIGHTS.values()), 3)
assert _TOTAL == 10.000, f"WEIGHTS do not sum to 10.000 — got {_TOTAL}"

# Metrics where higher is better → Top 10
TOP_10_METRICS = [
    "sharpe", "sortino", "alpha",
    "info_ratio", "max_drawdown",
    "cagr_3y", "cagr_5y", "cagr_10y",
]

# Metrics where lower is better → Bottom 10
BOTTOM_10_METRICS = [
    "ter", "turnover", "std_dev",
    "down_capture", "pe_ratio",
]

# Dual-condition metrics: qualifies for green AND may trigger light-red override
DUAL_CONDITION_RULES: Dict[str, tuple] = {
    "alpha":      ("below_value",        1),    # Light Red if alpha < 1%
    "info_ratio": ("below_value",        0),    # Light Red if IR < 0
    "cagr_3y":    ("below_category_avg", None), # Light Red if < category avg
    "cagr_5y":    ("below_category_avg", None),
    "cagr_10y":   ("below_category_avg", None),
}


# ─── Value helpers ────────────────────────────────────────────────────────────

def _is_valid(v) -> bool:
    """True if v is a real, non-zero, non-NaN number."""
    if v is None:
        return False
    if isinstance(v, float) and (v != v):   # NaN check
        return False
    # 0.0 is treated as missing/not-applicable for risk metrics
    if v == 0:
        return False
    return True


def _is_valid_drawdown(v) -> bool:
    """

    For Maximum Drawdown specifically: 0.0 is a genuine data-quality gap

    (overnight/liquid funds sometimes publish 0 when the real figure was never

    fetched).  Treat 0 as invalid so that only funds with a real (negative)

    drawdown value compete in the ranking.

    """
    if v is None:
        return False
    if isinstance(v, float) and v != v:     # NaN
        return False
    if v == 0:
        return False   # ← exact zero excluded; see drawdown_zero_fix() below
    return True


# ─── Ranking helpers ──────────────────────────────────────────────────────────

def _top_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
    """

    Return True if fund is in the top-N (highest values) for metric.



    Special case:

      - For Information Ratio we allow a value of exactly 0.0 to participate

        in ranking (Excel treats 0 as a valid value; only < 0 is "red").

    """
    fund_val = getattr(fund, metric, None)

    def _valid_for_rank(v):
        if metric == "info_ratio":
            # Treat 0 as a real value; only None/NaN are invalid here.
            if v is None:
                return False
            if isinstance(v, float) and (v != v):
                return False
            return True
        return _is_valid(v)

    if not _valid_for_rank(fund_val):
        return False

    valid = [getattr(f, metric, None) for f in peers
             if _valid_for_rank(getattr(f, metric, None))]
    if len(valid) < 2:
        return False

    # Match Excel's TOP 10 conditional formatting:
    # "Top N items", with N capped at the number of valid funds.
    effective_n = min(n, len(valid))
    valid.sort(reverse=True)
    return fund_val >= valid[effective_n - 1]


def _top_n_drawdown(fund: Fund, peers: List[Fund], n: int = 10) -> bool:
    """

    Special top-N for Maximum Drawdown.



    "Closest to 0" = highest value among negatives.

    -5% is better than -20%, so we still sort descending.

    Only non-zero, non-None values participate (see _is_valid_drawdown).

    Uses strict-N (no 50% fallback) so a single liquid fund with a real

    drawdown doesn't accidentally qualify just because of category size.

    """
    fund_val = getattr(fund, "max_drawdown", None)
    if not _is_valid_drawdown(fund_val):
        return False

    valid = [getattr(f, "max_drawdown", None) for f in peers
             if _is_valid_drawdown(getattr(f, "max_drawdown", None))]
    if not valid:
        return False

    effective_n = min(n, len(valid))
    valid.sort(reverse=True)           # -5 > -20  →  -5 is rank-1
    return fund_val >= valid[effective_n - 1]


def _bottom_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
    """Return True if fund is in the bottom-N (lowest values) for metric."""
    fund_val = getattr(fund, metric, None)
    if not _is_valid(fund_val):
        return False

    valid = [getattr(f, metric, None) for f in peers
             if _is_valid(getattr(f, metric, None))]
    if len(valid) < 2:
        return False

    # Match Excel's BOTTOM 10 conditional formatting:
    # "Bottom N items", with N capped at the number of valid funds.
    effective_n = min(n, len(valid))
    valid.sort()
    return fund_val <= valid[effective_n - 1]


def _category_avg(peers: List[Fund], metric: str) -> Optional[float]:
    """Arithmetic mean of valid metric values across peers."""
    vals = [getattr(f, metric, None) for f in peers
            if _is_valid(getattr(f, metric, None))]
    return sum(vals) / len(vals) if vals else None


def _light_red(fund: Fund, metric: str, cat_avg: Optional[float]) -> bool:
    """Return True if the metric triggers a Light Red override for this fund."""
    if metric not in DUAL_CONDITION_RULES:
        return False
    rule_type, threshold = DUAL_CONDITION_RULES[metric]
    val = getattr(fund, metric, None)
    if not _is_valid(val):
        return False
    if rule_type == "below_value":
        return val < threshold
    if rule_type == "below_category_avg":
        return (cat_avg is not None) and (val < cat_avg)
    return False


# ─── Drawdown zero-cell fix ───────────────────────────────────────────────────

def drawdown_zero_fix(

    funds: List[Fund],

    *,

    verbose: bool = True,

) -> int:
    """

    Detect funds whose max_drawdown is exactly 0 (data-quality gap) and

    recompute it from live NAV history via the NAV engine.



    Strategy

    --------

    1. Collect every fund where max_drawdown == 0 AND the fund has a

       scheme_code (stored in fund.name as a fallback lookup key via CSV).

       In practice the scheme_code lives in the CSV row; the data_engine

       should pass it through.  We look for it on fund.fill_status

       (which sometimes carries audit tags) or via a side-channel dict

       passed in by the caller.  Most robustly, callers should set

       fund.fill_status = "DRAWDOWN_ZERO" before calling this function,

       OR we scan all funds whose max_drawdown is 0.



    2. For each such fund, call compute_nav_metrics_for_scheme() requesting

       only ["Maximum Drawdown"].



    3. If a real negative value comes back, write it to fund.max_drawdown.



    Returns the count of cells successfully fixed.



    NOTE: This function requires network access (mfapi.in + yfinance).

          It is intentionally separated from compute_scores() so callers

          can opt in only when enrichment is desired.

    """
    # Import here to avoid circular dependency at module level
    try:
        from src.nav_metrics_engine import NavEngineCache, compute_nav_metrics_for_scheme
    except ImportError:
        if verbose:
            print("[drawdown_fix] nav_metrics_engine not available — skipping.")
        return 0

    # Build a name → scheme_code map from fund.fill_status field
    # (data_engine stores scheme codes in fill_status for audit; adjust if needed)
    # Fallback: use the fund name itself as a best-effort search key.

    DEBT_PREFIXES = ("debt", "liquid", "overnight", "money market", "gilt",
                    "fixed maturity", "interval", "fmp")

    from datetime import datetime as _dt
    _now = _dt.now()

    def _fund_age_years(f) -> float | None:
        ld = getattr(f, "_launch_date", None)
        if not isinstance(ld, _dt):
            return None
        return (_now - ld).days / 365.25

    # Import the set of funds already attempted by csv_enrichment NAV phase
    try:
        from src.csv_enrichment import _NAV_ATTEMPTED_FUNDS as _nav_attempted
    except Exception:
        _nav_attempted = set()

    zero_funds = [
        f for f in funds
        if (
            # Only target funds where drawdown is truly missing (0 or None)
            (f.max_drawdown == 0 or f.max_drawdown is None)
            # AND only equity/hybrid — debt funds have tiny/no drawdown, skip them
            and not any(f.category.lower().startswith(pfx) for pfx in DEBT_PREFIXES)
            # AND fund must be ≥3 years old — younger funds can't have 3Y NAV history
            and (_fund_age_years(f) is None or _fund_age_years(f) >= 3.0)
            # AND skip funds already attempted by csv_enrichment NAV phase —
            # if enrichment couldn't fill MDD, a second pass won't either
            and f.name not in _nav_attempted
        )
    ]

    if not zero_funds:
        if verbose:
            print("[drawdown_fix] No zero/missing drawdown cells found.")
        return 0

    if verbose:
        print(f"[drawdown_fix] Attempting to fix {len(zero_funds)} drawdown cells …")

    from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed
    import threading as _threading

    # Bulk-preload cache before parallel workers start (2 SQL queries instead of N)
    try:
        from src.nav_metrics_engine import _bulk_preload_cache, resolve_benchmark_ticker
        _scheme_codes  = [getattr(f, "_scheme_code", None) or "" for f in zero_funds]
        _bench_tickers = [resolve_benchmark_ticker(getattr(f, "benchmark", "") or "") for f in zero_funds]
        _bulk_preload_cache(_scheme_codes, _bench_tickers)
    except Exception:
        pass  # graceful degradation — workers will fall back to per-query

    cache = NavEngineCache()
    fixed = 0
    _lock = _threading.Lock()

    with_code = [
        (f, getattr(f, "_scheme_code", None) or "", getattr(f, "benchmark", "") or "")
        for f in zero_funds
        if (getattr(f, "_scheme_code", None) or "").strip()
    ]
    no_code = [f for f in zero_funds if not (getattr(f, "_scheme_code", None) or "").strip()]

    if verbose:
        for f in no_code:
            print(f"  SKIP  {f.name[:55]} — no scheme code available")

    def _fix_one(args):
        fund, scheme_code, benchmark = args
        metrics, skip = compute_nav_metrics_for_scheme(
            scheme_code=scheme_code,
            benchmark_type=benchmark,
            needed_metrics=["Maximum Drawdown"],
            cache=cache,
        )
        mdd    = metrics.get("Maximum Drawdown")
        reason = skip.get("Maximum Drawdown", "unknown")
        return fund, mdd, reason

    with ThreadPoolExecutor(max_workers=12) as executor:
        futures = {executor.submit(_fix_one, item): item for item in with_code}
        for fut in _as_completed(futures):
            try:
                fund, mdd, reason = fut.result()
            except Exception as e:
                continue
            if mdd is not None and mdd != 0:
                with _lock:
                    fund.max_drawdown = mdd
                    fixed += 1
                if verbose:
                    print(f"  FIXED {fund.name[:55]}  →  MDD = {mdd:.3f}%")
            else:
                if verbose:
                    print(f"  MISS  {fund.name[:55]} — {reason}")

    if verbose:
        print(f"[drawdown_fix] Done. Fixed {fixed}/{len(zero_funds)} cells.")

    return fixed


# ─── Main scoring engine ──────────────────────────────────────────────────────

def compute_scores(funds: List[Fund]) -> List[Fund]:
    """

    Score and rank all funds within their categories.



    Algorithm

    ---------

    For every metric that carries a weight:

      1. Check if the fund is in Top-N or Bottom-N (as appropriate) within

         its category peer group  → "Light Green"

      2. If Light Green AND a dual-condition rule fires         → "Light Red"

         override: weight contribution = 0

      3. Otherwise if Light Green and NOT Light Red             → add weight



    fund.score is capped at 10.0 (model scale).



    Also sets:

      fund.rank_in_category   – 1 = best within category

      fund.is_top_quartile    – True for top ⌈N/4⌉ funds



    Returns the same list (mutated in-place) for convenience.

    """
    # Group by category
    categories: Dict[str, List[Fund]] = {}
    for fund in funds:
        categories.setdefault(fund.category, []).append(fund)

    for cat_name, cat_funds in categories.items():

        # Pre-compute category averages for CAGR dual-condition rules
        cat_averages = {
            metric: _category_avg(cat_funds, metric)
            for metric in ("cagr_3y", "cagr_5y", "cagr_10y")
        }

        for fund in cat_funds:
            score = 0.0

            for metric, weight in WEIGHTS.items():
                is_green = False

                # ── Green check ──────────────────────────────────────────
                if metric == "max_drawdown":
                    is_green = _top_n_drawdown(fund, cat_funds)
                elif metric in TOP_10_METRICS:
                    is_green = _top_n(fund, cat_funds, metric)
                elif metric in BOTTOM_10_METRICS:
                    is_green = _bottom_n(fund, cat_funds, metric)

                # ── Light Red override ───────────────────────────────────
                if is_green and metric in DUAL_CONDITION_RULES:
                    cat_avg = cat_averages.get(metric)
                    if _light_red(fund, metric, cat_avg):
                        is_green = False   # zeroed by override

                if is_green:
                    score += weight

            fund.score = round(min(score, 10.0), 3)

        # ── Rank within category ─────────────────────────────────────────
        sorted_funds = sorted(
            cat_funds,
            key=lambda f: (-(f.score or 0), (f.name or "").lower(), getattr(f, "order", 0)),
        )
        top_quartile_cutoff = max(1, math.ceil(len(sorted_funds) / 4))

        for rank, fund in enumerate(sorted_funds, start=1):
            fund.rank_in_category = rank
            fund.is_top_quartile  = (rank <= top_quartile_cutoff)

    return funds