File size: 2,323 Bytes
089d665
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
"""Reverse phenotyping — given a (suspected) diagnosis, what to look for.

If the differential is dominated by disease *d*, what HPO terms or
findings does *d* typically present that we haven't checked yet?
This is the dual of `ask.py`: ask is forward (which test best
discriminates the differential), reverse is backward (given dx, what
signs/symptoms should we systematically rule in/out before declaring
the case closed).

Useful for clinicians as a checklist before a multidisciplinary
review or before triggering a definitive (often expensive) test.
"""
from __future__ import annotations
import logging
from typing import Optional

from .types import ReversePhenoSpec, ReversePhenoItem

logger = logging.getLogger("gemeo.reverse_pheno")


async def _safe_query(cypher: str, params: dict = None) -> list:
    try:
        from space_graph import _safe_query as q
        return await q(cypher, params or {}, timeout=10.0)
    except Exception as e:
        logger.debug(f"cypher failed: {e}")
        return []


async def look_for(
    *,
    orpha: str,
    already_present: list[str] = None,
    top_n: int = 10,
) -> ReversePhenoSpec:
    """Suggest HPO terms typical for `orpha` not yet observed in patient."""
    if not orpha:
        return ReversePhenoSpec(disease_orpha=None, items=[])

    already_present = set(already_present or [])

    cypher = """
    MATCH (d:Disease {orphaCode: $orpha})-[r:HAS_PHENOTYPE]->(p:Phenotype)
    RETURN p.hpoId AS hpo,
           p.name AS name,
           coalesce(r.frequency, r.prevalence, 0.5) AS freq,
           p.definition AS definition
    ORDER BY freq DESC
    """
    rows = await _safe_query(cypher, {"orpha": orpha})
    if not rows:
        return ReversePhenoSpec(disease_orpha=orpha, items=[])

    items = []
    for r in rows:
        hpo = r.get("hpo")
        if not hpo or hpo in already_present:
            continue
        try:
            f = float(r.get("freq", 0.5))
        except Exception:
            f = 0.5
        items.append(ReversePhenoItem(
            hpo_id=hpo,
            name=r.get("name") or hpo,
            expected_frequency=max(0.0, min(1.0, f)),
            definition=r.get("definition", ""),
        ))
        if len(items) >= top_n:
            break

    return ReversePhenoSpec(disease_orpha=orpha, items=items)