Spaces:
Running
Running
File size: 16,953 Bytes
b0e15c1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 | """
Weightage scoring algorithm for mutual fund schemes.
Scoring method: Sum of column weights where cell qualifies for Light Green (Top/Bottom 10)
AND is NOT overridden by Light Red fill (threshold violations).
Weight Distribution (Advisor-revised, March 2026):
1. Sortino Ratio: 1.300 (Top 10, higher is better)
2. Sharpe Ratio: 1.200 (Top 10, higher is better)
3. Information Ratio: 1.000 (Top 10, higher is better, Light Red if < 0)
4. Alpha: 1.000 (Top 10, higher is better, Light Red if < 1)
5. Maximum Drawdown: 1.350 (Top 10, closest to 0 is better)
6. Down Market Capture: 1.000 (Bottom 10, lower is better)
7. Standard Deviation: 1.000 (Bottom 10, lower is better)
8. 10 Years CAGR: 0.750 (Top 10, higher is better, Light Red if < Category Avg)
9. 5 Years CAGR: 0.600 (Top 10, higher is better, Light Red if < Category Avg)
10. 3 Years CAGR: 0.400 (Top 10, higher is better, Light Red if < Category Avg)
11. P/E Ratio: 0.150 (Bottom 10, lower is better)
12. TER: 0.150 (Bottom 10, lower is better)
13. Turnover (%): 0.100 (Bottom 10, lower is better)
Total: 10.000
"""
import math
from typing import List, Optional, Dict
from src.models import Fund
# βββ Weight map (Advisor-revised March 2026) βββββββββββββββββββββββββββββββββ
WEIGHTS: Dict[str, float] = {
"sortino": 1.30,
"sharpe": 1.20,
"info_ratio": 1.00,
"alpha": 1.00,
"max_drawdown": 1.35,
"down_capture": 1.00,
"std_dev": 1.00,
"cagr_10y": 0.75,
"cagr_5y": 0.60,
"cagr_3y": 0.40,
"pe_ratio": 0.15,
"ter": 0.15,
"turnover": 0.10,
}
# Sanity-check: total should equal 10.000
_TOTAL = round(sum(WEIGHTS.values()), 3)
assert _TOTAL == 10.000, f"WEIGHTS do not sum to 10.000 β got {_TOTAL}"
# Metrics where higher is better β Top 10
TOP_10_METRICS = [
"sharpe", "sortino", "alpha",
"info_ratio", "max_drawdown",
"cagr_3y", "cagr_5y", "cagr_10y",
]
# Metrics where lower is better β Bottom 10
BOTTOM_10_METRICS = [
"ter", "turnover", "std_dev",
"down_capture", "pe_ratio",
]
# Dual-condition metrics: qualifies for green AND may trigger light-red override
DUAL_CONDITION_RULES: Dict[str, tuple] = {
"alpha": ("below_value", 1), # Light Red if alpha < 1%
"info_ratio": ("below_value", 0), # Light Red if IR < 0
"cagr_3y": ("below_category_avg", None), # Light Red if < category avg
"cagr_5y": ("below_category_avg", None),
"cagr_10y": ("below_category_avg", None),
}
# βββ Value helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _is_valid(v) -> bool:
"""True if v is a real, non-zero, non-NaN number."""
if v is None:
return False
if isinstance(v, float) and (v != v): # NaN check
return False
# 0.0 is treated as missing/not-applicable for risk metrics
if v == 0:
return False
return True
def _is_valid_drawdown(v) -> bool:
"""
For Maximum Drawdown specifically: 0.0 is a genuine data-quality gap
(overnight/liquid funds sometimes publish 0 when the real figure was never
fetched). Treat 0 as invalid so that only funds with a real (negative)
drawdown value compete in the ranking.
"""
if v is None:
return False
if isinstance(v, float) and v != v: # NaN
return False
if v == 0:
return False # β exact zero excluded; see drawdown_zero_fix() below
return True
# βββ Ranking helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def _top_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
"""
Return True if fund is in the top-N (highest values) for metric.
Special case:
- For Information Ratio we allow a value of exactly 0.0 to participate
in ranking (Excel treats 0 as a valid value; only < 0 is "red").
"""
fund_val = getattr(fund, metric, None)
def _valid_for_rank(v):
if metric == "info_ratio":
# Treat 0 as a real value; only None/NaN are invalid here.
if v is None:
return False
if isinstance(v, float) and (v != v):
return False
return True
return _is_valid(v)
if not _valid_for_rank(fund_val):
return False
valid = [getattr(f, metric, None) for f in peers
if _valid_for_rank(getattr(f, metric, None))]
if len(valid) < 2:
return False
# Match Excel's TOP 10 conditional formatting:
# "Top N items", with N capped at the number of valid funds.
effective_n = min(n, len(valid))
valid.sort(reverse=True)
return fund_val >= valid[effective_n - 1]
def _top_n_drawdown(fund: Fund, peers: List[Fund], n: int = 10) -> bool:
"""
Special top-N for Maximum Drawdown.
"Closest to 0" = highest value among negatives.
-5% is better than -20%, so we still sort descending.
Only non-zero, non-None values participate (see _is_valid_drawdown).
Uses strict-N (no 50% fallback) so a single liquid fund with a real
drawdown doesn't accidentally qualify just because of category size.
"""
fund_val = getattr(fund, "max_drawdown", None)
if not _is_valid_drawdown(fund_val):
return False
valid = [getattr(f, "max_drawdown", None) for f in peers
if _is_valid_drawdown(getattr(f, "max_drawdown", None))]
if not valid:
return False
effective_n = min(n, len(valid))
valid.sort(reverse=True) # -5 > -20 β -5 is rank-1
return fund_val >= valid[effective_n - 1]
def _bottom_n(fund: Fund, peers: List[Fund], metric: str, n: int = 10) -> bool:
"""Return True if fund is in the bottom-N (lowest values) for metric."""
fund_val = getattr(fund, metric, None)
if not _is_valid(fund_val):
return False
valid = [getattr(f, metric, None) for f in peers
if _is_valid(getattr(f, metric, None))]
if len(valid) < 2:
return False
# Match Excel's BOTTOM 10 conditional formatting:
# "Bottom N items", with N capped at the number of valid funds.
effective_n = min(n, len(valid))
valid.sort()
return fund_val <= valid[effective_n - 1]
def _category_avg(peers: List[Fund], metric: str) -> Optional[float]:
"""Arithmetic mean of valid metric values across peers."""
vals = [getattr(f, metric, None) for f in peers
if _is_valid(getattr(f, metric, None))]
return sum(vals) / len(vals) if vals else None
def _light_red(fund: Fund, metric: str, cat_avg: Optional[float]) -> bool:
"""Return True if the metric triggers a Light Red override for this fund."""
if metric not in DUAL_CONDITION_RULES:
return False
rule_type, threshold = DUAL_CONDITION_RULES[metric]
val = getattr(fund, metric, None)
if not _is_valid(val):
return False
if rule_type == "below_value":
return val < threshold
if rule_type == "below_category_avg":
return (cat_avg is not None) and (val < cat_avg)
return False
# βββ Drawdown zero-cell fix βββββββββββββββββββββββββββββββββββββββββββββββββββ
def drawdown_zero_fix(
funds: List[Fund],
*,
verbose: bool = True,
) -> int:
"""
Detect funds whose max_drawdown is exactly 0 (data-quality gap) and
recompute it from live NAV history via the NAV engine.
Strategy
--------
1. Collect every fund where max_drawdown == 0 AND the fund has a
scheme_code (stored in fund.name as a fallback lookup key via CSV).
In practice the scheme_code lives in the CSV row; the data_engine
should pass it through. We look for it on fund.fill_status
(which sometimes carries audit tags) or via a side-channel dict
passed in by the caller. Most robustly, callers should set
fund.fill_status = "DRAWDOWN_ZERO" before calling this function,
OR we scan all funds whose max_drawdown is 0.
2. For each such fund, call compute_nav_metrics_for_scheme() requesting
only ["Maximum Drawdown"].
3. If a real negative value comes back, write it to fund.max_drawdown.
Returns the count of cells successfully fixed.
NOTE: This function requires network access (mfapi.in + yfinance).
It is intentionally separated from compute_scores() so callers
can opt in only when enrichment is desired.
"""
# Import here to avoid circular dependency at module level
try:
from src.nav_metrics_engine import NavEngineCache, compute_nav_metrics_for_scheme
except ImportError:
if verbose:
print("[drawdown_fix] nav_metrics_engine not available β skipping.")
return 0
# Build a name β scheme_code map from fund.fill_status field
# (data_engine stores scheme codes in fill_status for audit; adjust if needed)
# Fallback: use the fund name itself as a best-effort search key.
DEBT_PREFIXES = ("debt", "liquid", "overnight", "money market", "gilt",
"fixed maturity", "interval", "fmp")
from datetime import datetime as _dt
_now = _dt.now()
def _fund_age_years(f) -> float | None:
ld = getattr(f, "_launch_date", None)
if not isinstance(ld, _dt):
return None
return (_now - ld).days / 365.25
# Import the set of funds already attempted by csv_enrichment NAV phase
try:
from src.csv_enrichment import _NAV_ATTEMPTED_FUNDS as _nav_attempted
except Exception:
_nav_attempted = set()
zero_funds = [
f for f in funds
if (
# Only target funds where drawdown is truly missing (0 or None)
(f.max_drawdown == 0 or f.max_drawdown is None)
# AND only equity/hybrid β debt funds have tiny/no drawdown, skip them
and not any(f.category.lower().startswith(pfx) for pfx in DEBT_PREFIXES)
# AND fund must be β₯3 years old β younger funds can't have 3Y NAV history
and (_fund_age_years(f) is None or _fund_age_years(f) >= 3.0)
# AND skip funds already attempted by csv_enrichment NAV phase β
# if enrichment couldn't fill MDD, a second pass won't either
and f.name not in _nav_attempted
)
]
if not zero_funds:
if verbose:
print("[drawdown_fix] No zero/missing drawdown cells found.")
return 0
if verbose:
print(f"[drawdown_fix] Attempting to fix {len(zero_funds)} drawdown cells β¦")
from concurrent.futures import ThreadPoolExecutor, as_completed as _as_completed
import threading as _threading
# Bulk-preload cache before parallel workers start (2 SQL queries instead of N)
try:
from src.nav_metrics_engine import _bulk_preload_cache, resolve_benchmark_ticker
_scheme_codes = [getattr(f, "_scheme_code", None) or "" for f in zero_funds]
_bench_tickers = [resolve_benchmark_ticker(getattr(f, "benchmark", "") or "") for f in zero_funds]
_bulk_preload_cache(_scheme_codes, _bench_tickers)
except Exception:
pass # graceful degradation β workers will fall back to per-query
cache = NavEngineCache()
fixed = 0
_lock = _threading.Lock()
with_code = [
(f, getattr(f, "_scheme_code", None) or "", getattr(f, "benchmark", "") or "")
for f in zero_funds
if (getattr(f, "_scheme_code", None) or "").strip()
]
no_code = [f for f in zero_funds if not (getattr(f, "_scheme_code", None) or "").strip()]
if verbose:
for f in no_code:
print(f" SKIP {f.name[:55]} β no scheme code available")
def _fix_one(args):
fund, scheme_code, benchmark = args
metrics, skip = compute_nav_metrics_for_scheme(
scheme_code=scheme_code,
benchmark_type=benchmark,
needed_metrics=["Maximum Drawdown"],
cache=cache,
)
mdd = metrics.get("Maximum Drawdown")
reason = skip.get("Maximum Drawdown", "unknown")
return fund, mdd, reason
with ThreadPoolExecutor(max_workers=12) as executor:
futures = {executor.submit(_fix_one, item): item for item in with_code}
for fut in _as_completed(futures):
try:
fund, mdd, reason = fut.result()
except Exception as e:
continue
if mdd is not None and mdd != 0:
with _lock:
fund.max_drawdown = mdd
fixed += 1
if verbose:
print(f" FIXED {fund.name[:55]} β MDD = {mdd:.3f}%")
else:
if verbose:
print(f" MISS {fund.name[:55]} β {reason}")
if verbose:
print(f"[drawdown_fix] Done. Fixed {fixed}/{len(zero_funds)} cells.")
return fixed
# βββ Main scoring engine ββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def compute_scores(funds: List[Fund]) -> List[Fund]:
"""
Score and rank all funds within their categories.
Algorithm
---------
For every metric that carries a weight:
1. Check if the fund is in Top-N or Bottom-N (as appropriate) within
its category peer group β "Light Green"
2. If Light Green AND a dual-condition rule fires β "Light Red"
override: weight contribution = 0
3. Otherwise if Light Green and NOT Light Red β add weight
fund.score is capped at 10.0 (model scale).
Also sets:
fund.rank_in_category β 1 = best within category
fund.is_top_quartile β True for top βN/4β funds
Returns the same list (mutated in-place) for convenience.
"""
# Group by category
categories: Dict[str, List[Fund]] = {}
for fund in funds:
categories.setdefault(fund.category, []).append(fund)
for cat_name, cat_funds in categories.items():
# Pre-compute category averages for CAGR dual-condition rules
cat_averages = {
metric: _category_avg(cat_funds, metric)
for metric in ("cagr_3y", "cagr_5y", "cagr_10y")
}
for fund in cat_funds:
score = 0.0
for metric, weight in WEIGHTS.items():
is_green = False
# ββ Green check ββββββββββββββββββββββββββββββββββββββββββ
if metric == "max_drawdown":
is_green = _top_n_drawdown(fund, cat_funds)
elif metric in TOP_10_METRICS:
is_green = _top_n(fund, cat_funds, metric)
elif metric in BOTTOM_10_METRICS:
is_green = _bottom_n(fund, cat_funds, metric)
# ββ Light Red override βββββββββββββββββββββββββββββββββββ
if is_green and metric in DUAL_CONDITION_RULES:
cat_avg = cat_averages.get(metric)
if _light_red(fund, metric, cat_avg):
is_green = False # zeroed by override
if is_green:
score += weight
fund.score = round(min(score, 10.0), 3)
# ββ Rank within category βββββββββββββββββββββββββββββββββββββββββ
sorted_funds = sorted(
cat_funds,
key=lambda f: (-(f.score or 0), (f.name or "").lower(), getattr(f, "order", 0)),
)
top_quartile_cutoff = max(1, math.ceil(len(sorted_funds) / 4))
for rank, fund in enumerate(sorted_funds, start=1):
fund.rank_in_category = rank
fund.is_top_quartile = (rank <= top_quartile_cutoff)
return funds |