OpenMHC / app.py
NSchuetz's picture
Add /api/data JSON endpoint (3 tracks incl. downstream) + CORS
ea8b332 verified
Raw
History Blame Contribute Delete
34.7 kB
"""OpenMHC leaderboard — self-rendering website (Track 2, imputation).
Computes the leaderboard live from the per-user substrate in the
``MyHeartCounts/OpenMHC-leaderboard-data`` HF dataset (see
``leaderboard_compute.py``) and serves it as an HTML page. The same data is
also exposed as JSON at ``/api/data`` (CORS-enabled) for the public site.
The dataset is public, so no token is required. Styling mirrors the public
MyHeartCounts / OpenMHC site (light theme, red accent). The table is grouped by
sub-track (single-day / long-context) and can be filtered by method type.
"""
from __future__ import annotations
import html
import math
from pathlib import Path
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, HTMLResponse, JSONResponse
from leaderboard_compute import (
compute_downstream_rows,
compute_forecasting_rows,
compute_imputation_rows,
)
app = FastAPI(title="OpenMHC Leaderboard", docs_url=None, redoc_url=None)
# The public site (myheartcounts.stanford.edu) reads /api/data cross-origin.
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["GET"],
allow_headers=["*"],
)
# Compute is mildly expensive (download + reduce); cache per track in-process.
_CACHE: dict = {
"downstream": {"rows": None, "error": None},
"imputation": {"rows": None, "error": None},
"forecasting": {"rows": None, "error": None},
}
# (row key, header label). Order + arrows mirror the paper tables.
# "_pos" is the rank-standing badge column (gold/silver/bronze for the top 3).
_IMPUTATION_COLUMNS = [
("_pos", "#"),
("method", "Method"),
("mtype", "Type"),
("rank", "R ↓"),
("skill", "S ↑"),
("fair_skill", "S_fair ↑"),
("activity", "Activity ↑"),
("physiology", "Physio. ↑"),
("sleep", "Sleep ↑"),
("workout", "Workout ↑"),
("semantic", "Semantic ↑"),
("fallback", "Fallback ↓"),
("submitter", "Submitter"),
]
# Forecasting has the same shape minus the imputation-only "semantic" scope.
_FORECASTING_COLUMNS = [c for c in _IMPUTATION_COLUMNS if c[0] != "semantic"]
# Track 1 (predictive tasks) — headline metrics + the five outcome-category skills
# (the task domains; same category-balanced bootstrap mean as the headline S).
_DOWNSTREAM_COLUMNS = [
("_pos", "#"),
("method", "Method"),
("mtype", "Type"),
("rank", "R ↓"),
("skill", "S ↑"),
("fair_skill", "S_fair ↑"),
("demographics", "Demo. ↑"),
("conditions", "Medical ↑"),
("vitals", "Vitals ↑"),
("mental", "Mental ↑"),
("lifestyle", "Lifestyle ↑"),
("fallback", "Fallback ↓"),
("submitter", "Submitter"),
]
_IMPUTATION_NOTE = (
"<b>Metric legend</b> — scores are computed live vs the <b>LOCF</b> "
"(last-observation-carried-forward) baseline.<br>"
"<b>R</b> (Average Rank) — mean cross-method rank across all masking-scenario × channel "
"tasks; <b>1 = best</b> (lower is better).<br>"
"<b>S</b> (Skill Score) — overall % reduction in reconstruction error vs LOCF "
"(paired per-user geometric mean across tasks); higher is better.<br>"
"<b>S_fair</b> (Fairness skill) — % reduction in the cross-subgroup error <i>disparity</i> "
"(age group + sex, MAPD ratio vs LOCF); higher = more equitable.<br>"
"<b>Activity / Physio. / Sleep / Workout</b> — per-category skill on that sensor group's "
"channels (activity = steps, distance, flights; physiology = heart rate, active energy; "
"sleep = asleep / in-bed; workout = the 10 workout-type channels).<br>"
"<b>Semantic</b> — skill on the three structured-gap masking scenarios "
"(sleep gap, workout gap, intensity failure).<br>"
"<b>Fallback</b> — % of imputed values substituted by the LOCF baseline when the "
"method produced no valid output (lower is better).<br>"
"Source: <code>MyHeartCounts/OpenMHC-leaderboard-data</code>."
)
_FORECASTING_NOTE = (
"<b>Metric legend</b> — scores are computed live vs the <b>Seasonal Naive</b> baseline "
"(24-hour-ahead forecasting; MAE on continuous channels, AUROC on binary).<br>"
"<b>R</b> (Average Rank) — mean cross-method rank across channel tasks; "
"<b>1 = best</b> (lower is better).<br>"
"<b>S</b> (Skill Score) — overall category-balanced % reduction in forecast error vs "
"Seasonal Naive (paired per-user geometric mean); higher is better.<br>"
"<b>S_fair</b> (Fairness skill) — % reduction in the cross-subgroup error <i>disparity</i> "
"(age group + sex, MAPD ratio vs Seasonal Naive); higher = more equitable.<br>"
"<b>Activity / Physio. / Sleep / Workout</b> — per-category skill on that sensor group's "
"channels (activity = steps, distance, flights; physiology = heart rate, active energy; "
"sleep = asleep / in-bed; workout = the 10 workout-type channels).<br>"
"<b>Fallback</b> — % of forecasts substituted by the Seasonal Naive baseline when "
"the model produced no valid output (lower is better).<br>"
"Source: <code>MyHeartCounts/OpenMHC-leaderboard-data</code>."
)
_DOWNSTREAM_NOTE = (
"<b>Metric legend</b> — Track 1 predicts weekly health outcomes from 168-hour "
"sensor embeddings; scores are computed vs the <b>Linear</b> baseline.<br>"
"<b>R</b> (Average Rank) — mean cross-method rank across the outcome tasks; "
"<b>1 = best</b> (lower is better).<br>"
"<b>S</b> (Skill Score) — category-balanced % improvement over Linear across tasks "
"(per-task AUPRC / Spearman / Pearson, paired-bootstrap mean); higher is better.<br>"
"<b>S_fair</b> (Fairness skill) — % reduction in the cross-subgroup error <i>disparity</i> "
"(age group + sex, MAPD ratio vs Linear); higher = more equitable.<br>"
"<b>Demo. / Medical / Vitals / Mental / Lifestyle</b> — per-category skill on that outcome "
"group's tasks (Demographics; Medical Conditions &amp; Risk; Vitals &amp; Blood Biomarkers; "
"Mental Well-Being; Sleep &amp; Lifestyle), category-balanced like S.<br>"
"<b>Fallback</b> — % of test predictions substituted by the Linear baseline when the "
"method produced no valid output (lower is better).<br>"
"Source: <code>MyHeartCounts/OpenMHC-leaderboard-data</code>."
)
def _column_tips(baseline: str) -> dict[str, str]:
"""Per-column hover-tooltip text; ``baseline`` is the track's reference method."""
return {
"_pos": "Leaderboard standing by average rank — gold / silver / bronze mark the top 3.",
"method": "Model name; links to its published checkpoint on Hugging Face where one exists.",
"mtype": "Method family — click to filter the table.",
"rank": "Average Rank (R): mean cross-method rank across tasks; 1 = best (lower is better).",
"skill": (
f"Skill Score (S): overall % reduction in error vs the {baseline} baseline "
"(paired per-user geometric mean); higher is better."
),
"fair_skill": (
f"Fairness skill (S_fair): % reduction in cross-subgroup (age + sex) error "
f"disparity vs {baseline} (MAPD ratio); higher = more equitable."
),
"activity": f"Skill on activity channels — steps, distance, flights — vs {baseline}.",
"physiology": f"Skill on physiology channels — heart rate, active energy — vs {baseline}.",
"sleep": f"Skill on sleep channels — asleep / in-bed — vs {baseline}.",
"workout": f"Skill on workout channels — the 10 workout-type channels — vs {baseline}.",
"semantic": (
"Skill on the three structured-gap masking scenarios "
"(sleep gap, workout gap, intensity failure)."
),
"demographics": f"Skill on the Demographics tasks (age, sex, BMI) vs {baseline}.",
"conditions": f"Skill on the Medical Conditions & Risk tasks vs {baseline}.",
"vitals": f"Skill on the Vitals & Blood Biomarker tasks vs {baseline}.",
"mental": f"Skill on the Mental Well-Being tasks vs {baseline}.",
"lifestyle": f"Skill on the Sleep & Lifestyle tasks vs {baseline}.",
"fallback": (
f"Fallback rate: % of predictions substituted by the {baseline} fallback "
"when the model produced no valid output; lower is better."
),
"submitter": "Submitting team.",
}
# Ordered leaderboard sections rendered on the page (one table each).
TRACKS = [
{
"key": "downstream",
"compute": compute_downstream_rows,
"columns": _DOWNSTREAM_COLUMNS,
"subtracks": [],
"tab": "Predictive Tasks",
"title": "Track 1 · Predictive Tasks",
"note": _DOWNSTREAM_NOTE,
"tips": _column_tips("Linear"),
},
{
"key": "imputation",
"compute": compute_imputation_rows,
"columns": _IMPUTATION_COLUMNS,
"subtracks": [
("single-day", "Single-day imputation"),
("long-context", "Long-context imputation (≥ 7×1440 time steps)"),
],
"tab": "Imputation",
"title": "Track 2a · Imputation",
"note": _IMPUTATION_NOTE,
"tips": _column_tips("LOCF"),
},
{
"key": "forecasting",
"compute": compute_forecasting_rows,
"columns": _FORECASTING_COLUMNS,
"subtracks": [],
"tab": "Forecasting",
"title": "Track 2b · Forecasting",
"note": _FORECASTING_NOTE,
"tips": _column_tips("Seasonal Naive"),
},
]
CODE_URL = "https://github.com/AshleyLab/myheartcounts-dataset"
MAIN_URL = "https://myheartcounts.stanford.edu"
MODELS_URL = "https://huggingface.co/MyHeartCounts/models"
# Where submissions go (PRs) + the step-by-step guide.
DATA_URL = "https://huggingface.co/datasets/MyHeartCounts/OpenMHC-leaderboard-data"
SUBMIT_URL = "https://github.com/AshleyLab/myheartcounts-dataset#submit-to-the-leaderboard"
PAGE = """<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1">
<title>OpenMHC Leaderboard</title>
<link rel="icon" type="image/png" href="/logo.png">
<style>
:root{
--bg:#ffffff; --fg:#141b25; --muted:#6b7280; --line:#e8eaed; --soft:#f6f7f8;
--accent:#e4002b; --pos:#15803d; --neg:#e4002b;
}
*{box-sizing:border-box;}
html{-webkit-text-size-adjust:100%;}
body{margin:0;background:var(--bg);color:var(--fg);
font:16px/1.6 -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Helvetica,Arial,sans-serif;
-webkit-font-smoothing:antialiased;}
a{color:inherit;text-decoration:none;}
.wrap{max-width:1100px;margin:0 auto;padding:0 24px;}
.topbar{display:flex;align-items:center;justify-content:space-between;
padding:20px 0;border-bottom:1px solid var(--line);}
.brand{font-weight:800;letter-spacing:.02em;font-size:15px;display:inline-flex;align-items:center;gap:9px;}
.brand .logo{height:26px;width:auto;display:block;}
.nav-active{color:var(--accent);font-weight:700;font-size:14px;border-bottom:2px solid var(--accent);padding-bottom:4px;}
.hero{padding:56px 0 28px;}
.eyebrow{font-size:12px;font-weight:700;letter-spacing:.14em;text-transform:uppercase;color:var(--muted);margin:0 0 16px;}
h1{font-size:clamp(34px,6vw,52px);font-weight:800;letter-spacing:-.025em;line-height:1.02;margin:0 0 14px;}
.lede{font-size:18px;color:#3f4754;margin:0 0 26px;}
.pills{display:flex;flex-wrap:wrap;gap:12px;}
.pill{display:inline-flex;align-items:center;gap:8px;padding:10px 18px;border:1px solid var(--line);
border-radius:999px;font-weight:600;font-size:14px;background:#fff;transition:border-color .15s,background .15s;}
a.pill:hover{border-color:#c9ccd1;background:var(--soft);}
.pill.disabled{color:var(--muted);background:var(--soft);cursor:default;}
a.pill.primary{border-color:var(--accent);color:var(--accent);font-weight:700;}
a.pill.primary:hover{background:#fdecef;border-color:var(--accent);}
.section{padding:8px 0 64px;}
.section-title{font-size:13px;font-weight:700;letter-spacing:.06em;text-transform:uppercase;color:var(--muted);margin:0 0 14px;}
.submit{padding:26px 0 60px;border-top:1px solid var(--line);}
.submit p:not(.section-title){font-size:14px;color:#3f4754;line-height:1.65;max-width:780px;margin:0;}
.submit a{color:var(--accent);border-bottom:1px solid transparent;transition:border-color .15s;}
.submit a:hover{border-bottom-color:currentColor;}
thead th.filter-th{cursor:pointer;user-select:none;}
thead th.filter-th:hover{color:var(--fg);}
thead th.filter-th .caret{font-size:9px;}
thead th.filter-th.active{color:var(--accent);}
thead th.filter-th .dropdown{display:none;position:absolute;left:8px;top:100%;z-index:30;
background:#fff;border:1px solid var(--line);border-radius:10px;box-shadow:0 8px 28px rgba(16,24,40,.14);
padding:6px;min-width:150px;text-align:left;font-weight:400;text-transform:none;letter-spacing:0;}
thead th.filter-th.open .dropdown{display:block;}
thead th.filter-th .dropdown label{display:flex;align-items:center;gap:8px;padding:6px 8px;border-radius:6px;font-size:13px;color:var(--fg);}
thead th.filter-th .dropdown label:hover{background:var(--soft);}
thead th.filter-th .dropdown input{accent-color:var(--accent);width:15px;height:15px;margin:0;}
.card{border:1px solid var(--line);border-radius:14px;overflow:auto;box-shadow:0 1px 2px rgba(16,24,40,.04);}
table{width:100%;border-collapse:collapse;font-size:14px;}
th,td{padding:13px 16px;text-align:right;white-space:nowrap;}
th:first-child,td:first-child{text-align:center;padding-left:18px;padding-right:8px;}
th:nth-child(2),td:nth-child(2),th:nth-child(3),td:nth-child(3),th:last-child,td:last-child{text-align:left;}
thead th{font-size:11px;font-weight:600;letter-spacing:.06em;text-transform:uppercase;color:var(--muted);
border-bottom:1px solid var(--line);background:var(--soft);position:sticky;top:0;}
thead th.sortable{cursor:pointer;user-select:none;}
thead th.sortable:hover{color:var(--fg);}
thead th.s-asc,thead th.s-desc{color:var(--accent);}
thead th.s-asc::after{content:" ▲";font-size:9px;}
thead th.s-desc::after{content:" ▼";font-size:9px;}
tbody td{border-bottom:1px solid var(--soft);}
tbody tr:last-child td{border-bottom:none;}
tbody tr.mrow:hover td.method{color:var(--accent);}
td.method{font-weight:700;}
td.method a.mlink{color:inherit;border-bottom:1px solid transparent;transition:border-color .15s;}
td.method a.mlink::after{content:"↗";font-size:10px;color:var(--muted);font-weight:600;margin-left:3px;vertical-align:1px;}
tr.mrow:hover td.method a.mlink{border-bottom-color:currentColor;}
tr.mrow:hover td.method a.mlink::after{color:var(--accent);}
td.type{color:var(--muted);}
.num{font-variant-numeric:tabular-nums;}
.num.zero{color:var(--muted);} .num.best{font-weight:800;}
tr.sec td{font-weight:800;font-size:15px;padding-top:24px;padding-bottom:10px;border-bottom:1px solid var(--line);background:#fff;}
.note{color:var(--muted);font-size:13px;margin-top:18px;line-height:1.6;}
.err{color:var(--accent);background:#fdecef;border:1px solid #f6c9d2;padding:16px 18px;border-radius:12px;white-space:pre-wrap;}
code{font-family:ui-monospace,SFMono-Regular,Menlo,monospace;font-size:.9em;color:#3f4754;}
/* track selector — segmented control (mirrors the main site) */
.tabs{display:inline-flex;gap:4px;padding:5px;border:1px solid var(--line);border-radius:13px;
background:var(--soft);margin:4px 0 22px;}
.tab{appearance:none;border:0;background:transparent;color:var(--muted);font:inherit;font-weight:700;
font-size:14px;padding:9px 20px;border-radius:9px;cursor:pointer;transition:background .15s,color .15s;}
.tab:hover{color:var(--fg);}
.tab.active{background:#141b25;color:#fff;}
/* rank standing badge (gold / silver / bronze for the top 3 by average rank) */
.rankcell{width:1%;}
.rankbadge{display:inline-flex;align-items:center;justify-content:center;min-width:24px;height:24px;
padding:0 6px;border-radius:7px;font-weight:800;font-size:12px;font-variant-numeric:tabular-nums;
background:var(--soft);color:var(--muted);border:1px solid var(--line);}
.rankbadge.gold{background:#f7c948;color:#5a4300;border-color:#e6b733;}
.rankbadge.silver{background:#dfe3e8;color:#363b42;border-color:#cdd2d8;}
.rankbadge.bronze{background:#e7a76b;color:#5a3210;border-color:#d9945a;}
/* custom header tooltip — a single fixed bubble (never clipped by the card) */
thead th[data-tip]{position:relative;}
#tip{position:fixed;z-index:1000;max-width:300px;
font:500 12px/1.5 -apple-system,BlinkMacSystemFont,"Segoe UI",Roboto,Helvetica,Arial,sans-serif;
color:#fff;background:#141b25;padding:8px 11px;border-radius:8px;
box-shadow:0 8px 28px rgba(16,24,40,.25);pointer-events:none;
white-space:normal;text-transform:none;letter-spacing:normal;text-align:left;
opacity:0;transition:opacity .12s;}
#tip.show{opacity:1;}
@media(max-width:640px){.hero{padding:40px 0 20px;}}
</style>
</head>
<body>
<div class="wrap">
<div class="topbar">
<a class="brand" href="%%MAIN%%" target="_blank" rel="noopener"><img class="logo" src="/logo.png" alt="MyHeartCounts logo"> OpenMHC</a>
<span class="nav-active">Benchmark</span>
</div>
<div class="hero">
<p class="eyebrow">OpenMHC Benchmark</p>
<h1>OpenMHC Leaderboard</h1>
<p class="lede">Wearable &amp; mobile health benchmark on MyHeartCounts. Track 1 (predictive tasks) predicts weekly health outcomes &mdash; demographics, medical risk, vitals, mental well-being, and lifestyle &mdash; from 168-hour sensor embeddings. The generative tasks operate on the raw signals: Track 2a (imputation) reconstructs masked daily, minute-level signals, and Track 2b (forecasting) predicts future hourly signals. Each method is ranked by skill score vs a track baseline, computed live from the per-user evaluation substrate.</p>
<div class="pills">
<a class="pill primary" href="%%SUBMIT%%" target="_blank" rel="noopener">&#128228; Submit a model</a>
<a class="pill" href="%%CODE%%" target="_blank" rel="noopener">&#9881;&#65039; Code</a>
<span class="pill disabled">&#128202; Dataset &middot; coming soon</span>
<span class="pill disabled">&#128196; Paper &middot; coming soon</span>
<a class="pill" href="%%MAIN%%" target="_blank" rel="noopener">&#127968; MyHeartCounts</a>
<a class="pill" href="%%MODELS%%" target="_blank" rel="noopener">&#129303; Models</a>
</div>
</div>
%%CONTENT%%
<div class="submit">
<p class="section-title">Submit your model</p>
<p>Add a method by opening a pull request on the
<a href="%%DATA%%" target="_blank" rel="noopener">OpenMHC leaderboard dataset</a>
that adds your per-user evaluation substrate
(<code>&lt;track&gt;/&lt;method&gt;.parquet</code>) plus a small
<code>&lt;method&gt;.meta.json</code> sidecar. Produce the substrate by running the OpenMHC
eval with <code>output_dir=&hellip;</code>; the maintainers recompute the skill, fairness,
and rank scores from it. See the
<a href="%%SUBMIT%%" target="_blank" rel="noopener">step-by-step submission guide</a>
for the exact file schema.</p>
</div>
</div>
<script>
(function(){
function cell(td){
if('v' in td.dataset){var n=parseFloat(td.dataset.v);return {num:true,v:isNaN(n)?null:n};}
return {num:false,v:td.textContent.trim().toLowerCase()};
}
// Each table (one per track) gets its own independent sort + Type filter.
document.querySelectorAll('table').forEach(function(table){
var ths=table.querySelectorAll('thead th');
// Rows arrive pre-sorted by the server-marked default column; sync the arrow.
var marked=table.querySelector('thead th.s-asc, thead th.s-desc');
var cur=marked?{i:[].indexOf.call(ths,marked),d:marked.classList.contains('s-asc')?1:-1}:{i:-1,d:1};
function sortBy(i,d){
table.querySelectorAll('tbody').forEach(function(tb){
var rows=[].slice.call(tb.querySelectorAll('tr.mrow'));
rows.sort(function(a,b){
var A=cell(a.children[i]),B=cell(b.children[i]);
if(A.num){
if(A.v===null&&B.v===null)return 0;
if(A.v===null)return 1; if(B.v===null)return -1;
return (A.v-B.v)*d;
}
return String(A.v).localeCompare(String(B.v))*d;
});
rows.forEach(function(r){tb.appendChild(r);});
});
}
// Type column: checkbox filter (does NOT sort). Others: click to sort.
var boxes=table.querySelectorAll('input.tfilter');
function applyFilter(){
var checked={}, any=false;
boxes.forEach(function(cb){ if(cb.checked){checked[cb.value]=1; any=true;} });
var partial=any && (Object.keys(checked).length < boxes.length);
table.querySelectorAll('tr.mrow').forEach(function(r){
r.style.display=(!any||checked[r.dataset.type])?'':'none';
});
table.querySelectorAll('tbody').forEach(function(tb){
var vis=[].some.call(tb.querySelectorAll('tr.mrow'),function(r){return r.style.display!=='none';});
var sec=tb.querySelector('tr.sec'); if(sec) sec.style.display=vis?'':'none';
});
table.querySelectorAll('.filter-th').forEach(function(t){t.classList.toggle('active', partial);});
}
boxes.forEach(function(cb){ cb.addEventListener('change', applyFilter); });
ths.forEach(function(th,i){
if(th.dataset.nosort) return; // # column = rank standing, not sortable
if(th.dataset.filter){ // Type = filter dropdown, not sortable
th.addEventListener('click',function(e){
if(e.target.closest('.dropdown')) return; // clicks inside menu don't toggle
th.classList.toggle('open');
});
return;
}
th.classList.add('sortable');
th.addEventListener('click',function(){
var d=(cur.i===i)?-cur.d:(th.dataset.def==='asc'?1:-1);
cur={i:i,d:d};
ths.forEach(function(h){h.classList.remove('s-asc','s-desc');});
th.classList.add(d===1?'s-asc':'s-desc');
sortBy(i,d);
});
});
});
// Track tabs: show one section at a time (segmented-control style).
document.querySelectorAll('.tab').forEach(function(tab){
tab.addEventListener('click',function(){
var key=tab.dataset.track;
document.querySelectorAll('.tab').forEach(function(t){t.classList.toggle('active',t===tab);});
document.querySelectorAll('.section[data-track]').forEach(function(s){
s.hidden=(s.dataset.track!==key);
});
});
});
// Header tooltips: one fixed bubble, positioned under the hovered <th>.
var tipEl=document.createElement('div');
tipEl.id='tip';
document.body.appendChild(tipEl);
function placeTip(th){
var r=th.getBoundingClientRect(), b=tipEl.getBoundingClientRect();
var left=Math.min(Math.max(8,r.left), window.innerWidth-b.width-8);
var top=r.bottom+8;
if(top+b.height>window.innerHeight-8) top=r.top-b.height-8; // flip above if no room
tipEl.style.left=left+'px';
tipEl.style.top=top+'px';
}
document.querySelectorAll('thead th[data-tip]').forEach(function(th){
th.addEventListener('mouseenter',function(){
tipEl.textContent=th.getAttribute('data-tip');
tipEl.classList.add('show');
placeTip(th);
});
th.addEventListener('mouseleave',function(){ tipEl.classList.remove('show'); });
th.addEventListener('click',function(){ tipEl.classList.remove('show'); }); // sort/filter dismiss
});
// One global handler closes any open Type dropdown across all tables.
document.addEventListener('click',function(e){
if(!e.target.closest('.filter-th')){
document.querySelectorAll('.filter-th.open').forEach(function(t){t.classList.remove('open');});
}
});
})();
</script>
</body>
</html>
"""
def _rows(track_key: str, compute) -> tuple[list[dict] | None, str | None]:
cache = _CACHE[track_key]
if cache["rows"] is None and cache["error"] is None:
try:
cache["rows"] = compute()
except Exception as e: # surface the failure on the page; don't hide it
cache["error"] = f"{type(e).__name__}: {e}"
return cache["rows"], cache["error"]
# Per-column heatmap (paper-style blue gradient): better -> more saturated blue.
# `fallback` is a diagnostic, not a score, so it is left out and renders plain on every
# track. (Heatmapping it shades the column only when there's a range of values — a track
# with a non-zero fallback would get a coloured column while all-zero tracks would not.)
HIGHER_BETTER = {"skill", "fair_skill", "activity", "physiology", "sleep", "workout", "semantic",
"demographics", "conditions", "vitals", "mental", "lifestyle"}
LOWER_BETTER = {"rank"}
_HEAT_RGB = (59, 130, 246)
_HEAT_MAX_ALPHA = 0.55
def _col_stats(rows: list[dict]) -> dict:
stats = {}
for k in HIGHER_BETTER | LOWER_BETTER:
vals = [r[k] for r in rows if isinstance(r.get(k), (int, float))]
stats[k] = (min(vals), max(vals)) if vals else (0.0, 0.0)
return stats
def _heat_alpha(key: str, v, stats: dict) -> float:
if not isinstance(v, (int, float)):
return 0.0
lo, hi = stats.get(key, (0.0, 0.0))
if key in LOWER_BETTER:
t = (hi - v) / (hi - lo) if hi > lo else 0.0 # lower is better
else:
t = v / hi if (v > 0 and hi > 0) else 0.0 # non-positive unshaded (paper)
return round(max(0.0, min(1.0, t)) * _HEAT_MAX_ALPHA, 3)
def _bg_style(key: str, v, stats: dict) -> str:
a = _heat_alpha(key, v, stats)
if a <= 0:
return ""
r, g, b = _HEAT_RGB
return f' style="background:rgba({r},{g},{b},{a})"'
def _is_best(key: str, v, stats: dict) -> bool:
if not isinstance(v, (int, float)):
return False
lo, hi = stats.get(key, (0.0, 0.0))
target = lo if key in LOWER_BETTER else hi
return abs(v - target) < 1e-9
def _num_span(key: str, v, stats: dict) -> str:
if v is None:
return '<span class="num zero">&mdash;</span>'
if key != "rank" and abs(v) < 1e-9:
return '<span class="num zero">0.0</span>'
if key == "rank":
txt = f"{v:.1f}"
elif key == "fallback":
txt = f"{v * 100:.1f}" # a rate (% of predictions), unsigned
else:
txt = f"{v * 100:+.1f}"
best = " best" if _is_best(key, v, stats) else ""
return f'<span class="num{best}">{txt}</span>'
def _rank_key(r: dict):
v = r.get("rank")
return (v is None, v if v is not None else 0.0)
def _rank_badge(pos: int) -> str:
medal = {1: "gold", 2: "silver", 3: "bronze"}.get(pos, "")
cls = f"rankbadge {medal}".strip()
return f'<span class="{cls}">{pos}</span>'
def _method_row(r: dict, section_key: str, stats: dict, columns: list, pos: int) -> str:
cells = []
for k, _ in columns:
if k == "_pos":
cells.append(f'<td class="rankcell">{_rank_badge(pos)}</td>')
elif k == "method":
name = html.escape(str(r.get(k)))
url = r.get("model_url")
inner = (
f'<a class="mlink" href="{html.escape(str(url))}" target="_blank" rel="noopener">{name}</a>'
if url
else name
)
cells.append(f'<td class="method">{inner}</td>')
elif k in ("mtype", "submitter"):
cells.append(f'<td class="type">{html.escape(str(r.get(k)))}</td>')
else: # numeric — heatmap background + value + sortable raw value
v = r.get(k)
dv = repr(float(v)) if isinstance(v, (int, float)) else ""
cells.append(f'<td data-v="{dv}"{_bg_style(k, v, stats)}>{_num_span(k, v, stats)}</td>')
typ = html.escape(str(r.get("mtype", "")))
return f'<tr class="mrow" data-subtrack="{html.escape(section_key)}" data-type="{typ}">{"".join(cells)}</tr>'
def _th(
key: str, label: str, type_values: list[str], sort_default_key: str = "", tip: str = ""
) -> str:
tipattr = f' data-tip="{html.escape(tip)}"' if tip else "" # custom JS hover tooltip
if key == "_pos": # rank-standing column = not sortable / not filterable
return f'<th data-nosort="1"{tipattr}>#</th>'
if key == "mtype": # Type column = checkbox filter (not sortable)
boxes = "".join(
f'<label><input type="checkbox" class="tfilter" value="{html.escape(t)}" checked> {html.escape(t)}</label>'
for t in type_values
)
return (
f'<th class="filter-th" data-filter="type"{tipattr}>{html.escape(label)} '
f'<span class="caret">&#9662;</span>'
f'<div class="dropdown">{boxes}</div></th>'
)
# default sort direction the first time a column is clicked: best-first.
default = "asc" if (key in LOWER_BETTER or key not in HIGHER_BETTER) else "desc"
# Mark the default-sorted column so its arrow shows on load (rows are
# already pre-sorted server-side to match).
marker = ' class="s-asc"' if key == sort_default_key else ""
return f'<th data-def="{default}"{marker}{tipattr}>{html.escape(label)}</th>'
def _table(rows: list[dict], columns: list, subtracks: list, tips: dict) -> str:
ncols = len(columns)
type_values = sorted({str(r["mtype"]) for r in rows if r.get("mtype")})
head = "".join(_th(k, label, type_values, "rank", tips.get(k, "")) for k, label in columns)
stats = _col_stats(rows)
def group_body(section_key: str, sub_rows: list[dict]) -> str:
# Default order: best average rank first; positions (1..n) drive the
# gold/silver/bronze badge. JS re-sorts on header click.
ordered = sorted(sub_rows, key=_rank_key)
return "".join(
_method_row(r, section_key, stats, columns, i + 1) for i, r in enumerate(ordered)
)
if not subtracks:
tbodies = f'<tbody data-subtrack="all">{group_body("all", rows)}</tbody>'
return f'<div class="card"><table><thead><tr>{head}</tr></thead>{tbodies}</table></div>'
known = {k for k, _ in subtracks}
sections = list(subtracks)
if any(r.get("subtrack") not in known for r in rows):
sections = sections + [("other", "Other")]
tbodies = ""
for key, label in sections:
if key in known:
sub_rows = [r for r in rows if r.get("subtrack") == key]
else:
sub_rows = [r for r in rows if r.get("subtrack") not in known]
if not sub_rows:
continue
sec = f'<tr class="sec"><td colspan="{ncols}">{html.escape(label)}</td></tr>'
tbodies += f'<tbody data-subtrack="{html.escape(key)}">{sec}{group_body(key, sub_rows)}</tbody>'
table = f'<div class="card"><table><thead><tr>{head}</tr></thead>{tbodies}</table></div>'
return table
def _render_section(cfg: dict, active: bool) -> str:
rows, error = _rows(cfg["key"], cfg["compute"])
if error is not None:
body = f'<div class="err">Failed to compute leaderboard:\n{html.escape(error)}</div>'
elif not rows:
body = '<div class="err">No methods found in the substrate dataset.</div>'
else:
body = _table(rows, cfg["columns"], cfg["subtracks"], cfg["tips"])
hidden = "" if active else " hidden"
return (
f'<div class="section" data-track="{html.escape(cfg["key"])}"{hidden}>'
f'<p class="section-title">{html.escape(cfg["title"])}</p>'
f"{body}"
f'<p class="note">{cfg["note"]}</p>'
"</div>"
)
@app.get("/", response_class=HTMLResponse)
def index() -> str:
tabs = "".join(
f'<button class="tab{" active" if i == 0 else ""}" '
f'data-track="{html.escape(cfg["key"])}">{html.escape(cfg["tab"])}</button>'
for i, cfg in enumerate(TRACKS)
)
sections = "".join(_render_section(cfg, active=(i == 0)) for i, cfg in enumerate(TRACKS))
content = f'<div class="tabs">{tabs}</div>\n{sections}'
return (
PAGE.replace("%%CONTENT%%", content)
.replace("%%CODE%%", CODE_URL)
.replace("%%MAIN%%", MAIN_URL)
.replace("%%MODELS%%", MODELS_URL)
.replace("%%SUBMIT%%", SUBMIT_URL)
.replace("%%DATA%%", DATA_URL)
)
@app.get("/health")
def health() -> dict:
tracks: dict = {}
ok = True
for cfg in TRACKS:
rows, error = _rows(cfg["key"], cfg["compute"])
tracks[cfg["key"]] = {"methods": len(rows or []), "error": error}
if error is not None:
ok = False
return {"status": "ok" if ok else "error", "tracks": tracks}
# ---------------------------------------------------------------------------
# JSON API — consumed cross-origin by the public MyHeartCounts site.
# ---------------------------------------------------------------------------
def _sanitize(row: dict) -> dict:
"""Replace non-finite floats (NaN/inf) with None so the row is valid JSON.
Starlette's JSONResponse serialises with ``allow_nan=False``; an unsanitised
NaN would 500 the endpoint and break the client's ``.json()``.
"""
return {
k: (None if isinstance(v, float) and not math.isfinite(v) else v)
for k, v in row.items()
}
def _api_subtracks(cfg: dict, rows: list[dict]) -> list[tuple[str, str]]:
"""Subtracks for the payload; mirrors the HTML's dynamic "Other" bucket.
The frontend silently drops rows whose ``subtrack`` matches no subtrack key,
so when any row falls outside the configured set we append ("other", "Other")
— exactly as ``_table`` does for the HTML page.
"""
subs = list(cfg["subtracks"])
if subs:
known = {k for k, _ in subs}
if any(r.get("subtrack") not in known for r in rows):
subs = subs + [("other", "Other")]
return subs
def _track_payload(cfg: dict, rows: list[dict] | None, error: str | None) -> dict:
# `fallback` is a downstream-only column in the public API contract.
columns = [
{"key": k, "label": label}
for k, label in cfg["columns"]
if not (k == "fallback" and cfg["key"] != "downstream")
]
rows = rows or []
return {
"title": cfg["title"],
"tab": cfg["tab"],
"columns": columns,
"subtracks": [{"key": k, "label": label} for k, label in _api_subtracks(cfg, rows)],
"legend_html": cfg["note"],
"rows": [_sanitize(r) for r in rows],
"error": error,
}
@app.get("/api/data")
def api_data() -> JSONResponse:
payload = {
cfg["key"]: _track_payload(cfg, *_rows(cfg["key"], cfg["compute"]))
for cfg in TRACKS
}
return JSONResponse(payload)
@app.get("/logo.png")
def logo() -> FileResponse:
return FileResponse(Path(__file__).parent / "logo.png", media_type="image/png")