"""Reverse phenotyping — given a (suspected) diagnosis, what to look for. If the differential is dominated by disease *d*, what HPO terms or findings does *d* typically present that we haven't checked yet? This is the dual of `ask.py`: ask is forward (which test best discriminates the differential), reverse is backward (given dx, what signs/symptoms should we systematically rule in/out before declaring the case closed). Useful for clinicians as a checklist before a multidisciplinary review or before triggering a definitive (often expensive) test. """ from __future__ import annotations import logging from typing import Optional from .types import ReversePhenoSpec, ReversePhenoItem logger = logging.getLogger("gemeo.reverse_pheno") async def _safe_query(cypher: str, params: dict = None) -> list: try: from space_graph import _safe_query as q return await q(cypher, params or {}, timeout=10.0) except Exception as e: logger.debug(f"cypher failed: {e}") return [] async def look_for( *, orpha: str, already_present: list[str] = None, top_n: int = 10, ) -> ReversePhenoSpec: """Suggest HPO terms typical for `orpha` not yet observed in patient.""" if not orpha: return ReversePhenoSpec(disease_orpha=None, items=[]) already_present = set(already_present or []) cypher = """ MATCH (d:Disease {orphaCode: $orpha})-[r:HAS_PHENOTYPE]->(p:Phenotype) RETURN p.hpoId AS hpo, p.name AS name, coalesce(r.frequency, r.prevalence, 0.5) AS freq, p.definition AS definition ORDER BY freq DESC """ rows = await _safe_query(cypher, {"orpha": orpha}) if not rows: return ReversePhenoSpec(disease_orpha=orpha, items=[]) items = [] for r in rows: hpo = r.get("hpo") if not hpo or hpo in already_present: continue try: f = float(r.get("freq", 0.5)) except Exception: f = 0.5 items.append(ReversePhenoItem( hpo_id=hpo, name=r.get("name") or hpo, expected_frequency=max(0.0, min(1.0, f)), definition=r.get("definition", ""), )) if len(items) >= top_n: break return ReversePhenoSpec(disease_orpha=orpha, items=items)