Spaces:

theodabos
/

varientlens

Sleeping

File size: 2,506 Bytes

21977cc

"""Pre-warm the OMIM cache for the lab's most-used genes.

The OMIM API is slow (~3-8s per gene cold). For the live demo we want
the disease-association banner to populate instantly. This script
pre-fetches every gene in the kidney + HCT panels and writes the
results to /tmp/omim_cache.db, which the OMIMClient consults before
hitting the network.

Idempotent. Already-cached genes are skipped.

Usage:
    python scripts/warm_omim_cache.py
"""

from __future__ import annotations

import asyncio
import sys
from pathlib import Path

REPO_ROOT = Path(__file__).resolve().parents[1]
sys.path.insert(0, str(REPO_ROOT))

# Genes the lab cares about across both panels + a few common headliner
# variants we use in the demo. ~20 genes total, ~1-3 min to pre-warm.
GENES = [
    # Kidney (PKD1/PKD2/PKHD1 + relatives)
    "PKD1", "PKD2", "PKHD1", "TSC1", "TSC2",
    # HCT panel — hereditary cancer
    "BRCA1", "BRCA2", "ATM", "CHEK2", "PALB2",
    "TP53", "PTEN", "CDH1", "STK11", "RAD51C", "RAD51D",
    "BRIP1", "NBN", "BARD1",
    # Lynch syndrome MMR
    "MLH1", "MSH2", "MSH6", "PMS2", "EPCAM",
    # Common single-gene demo highlights
    "APC", "NF1", "VHL", "RET", "CDKN2A", "MUTYH",
]


async def main() -> int:
    from backend.app.services.omim import OMIMClient
    client = OMIMClient()
    if not client.enabled:
        print("OMIM_API_KEY not set — skipping cache warm")
        return 0

    print(f"Warming OMIM cache for {len(GENES)} genes...")
    new_count = 0
    skip_count = 0
    fail_count = 0
    for i, gene in enumerate(GENES, 1):
        # Check cache first to skip already-warm genes
        try:
            hit, _ = client._cache_get(gene)
        except Exception:
            hit = False
        if hit:
            skip_count += 1
            print(f"  [{i:2d}/{len(GENES)}] {gene}: cached, skipping")
            continue
        try:
            entry = await client.lookup_gene(gene)
            if entry:
                new_count += 1
                print(f"  [{i:2d}/{len(GENES)}] {gene}: warmed — MIM {entry.mim_number}")
            else:
                fail_count += 1
                print(f"  [{i:2d}/{len(GENES)}] {gene}: no OMIM hit")
        except Exception as exc:
            fail_count += 1
            print(f"  [{i:2d}/{len(GENES)}] {gene}: FAILED — {exc}")

    print(f"\nDone — {new_count} warmed, {skip_count} already cached, {fail_count} unresolved.")
    return 0


if __name__ == "__main__":
    raise SystemExit(asyncio.run(main()))