gemeo-twin-stack / src /gemeo /reverse_pheno.py
timmers's picture
GEMEO world-model — initial release (module + NeuralSurv ckpt + RareBench v49 + KG embeddings)
089d665 verified
"""Reverse phenotyping — given a (suspected) diagnosis, what to look for.
If the differential is dominated by disease *d*, what HPO terms or
findings does *d* typically present that we haven't checked yet?
This is the dual of `ask.py`: ask is forward (which test best
discriminates the differential), reverse is backward (given dx, what
signs/symptoms should we systematically rule in/out before declaring
the case closed).
Useful for clinicians as a checklist before a multidisciplinary
review or before triggering a definitive (often expensive) test.
"""
from __future__ import annotations
import logging
from typing import Optional
from .types import ReversePhenoSpec, ReversePhenoItem
logger = logging.getLogger("gemeo.reverse_pheno")
async def _safe_query(cypher: str, params: dict = None) -> list:
try:
from space_graph import _safe_query as q
return await q(cypher, params or {}, timeout=10.0)
except Exception as e:
logger.debug(f"cypher failed: {e}")
return []
async def look_for(
*,
orpha: str,
already_present: list[str] = None,
top_n: int = 10,
) -> ReversePhenoSpec:
"""Suggest HPO terms typical for `orpha` not yet observed in patient."""
if not orpha:
return ReversePhenoSpec(disease_orpha=None, items=[])
already_present = set(already_present or [])
cypher = """
MATCH (d:Disease {orphaCode: $orpha})-[r:HAS_PHENOTYPE]->(p:Phenotype)
RETURN p.hpoId AS hpo,
p.name AS name,
coalesce(r.frequency, r.prevalence, 0.5) AS freq,
p.definition AS definition
ORDER BY freq DESC
"""
rows = await _safe_query(cypher, {"orpha": orpha})
if not rows:
return ReversePhenoSpec(disease_orpha=orpha, items=[])
items = []
for r in rows:
hpo = r.get("hpo")
if not hpo or hpo in already_present:
continue
try:
f = float(r.get("freq", 0.5))
except Exception:
f = 0.5
items.append(ReversePhenoItem(
hpo_id=hpo,
name=r.get("name") or hpo,
expected_frequency=max(0.0, min(1.0, f)),
definition=r.get("definition", ""),
))
if len(items) >= top_n:
break
return ReversePhenoSpec(disease_orpha=orpha, items=items)