gemeo-twin-stack / src /gemeo /types.py
timmers's picture
GEMEO world-model β€” initial release (module + NeuralSurv ckpt + RareBench v49 + KG embeddings)
089d665 verified
"""Gemeo dataclasses β€” public types.
JSON-serializable, stable across versions. Internal modules can extend
via `extra: dict` payloads.
"""
from __future__ import annotations
from dataclasses import dataclass, field, asdict
from datetime import datetime, timezone
from typing import Optional, Any
import uuid
def _now() -> str:
return datetime.now(timezone.utc).isoformat()
def _new_id() -> str:
return f"gemeo_{uuid.uuid4().hex[:10]}"
# ─── retrieval / cohort ────────────────────────────────────────────────────
@dataclass
class CohortMember:
space_id: str
similarity: float
shared_phenotypes: list = field(default_factory=list)
shared_diseases: list = field(default_factory=list)
confirmed_diagnosis: Optional[str] = None
confirmed_orpha: Optional[str] = None
sus_region: Optional[str] = None
source: str = "registry" # registry | space | literature
@dataclass
class Cohort:
members: list = field(default_factory=list)
method: str = "knn_fused"
radius: float = 0.0
n_total_population: int = 0
centroid_disease: Optional[dict] = None
# ─── reasoning / subgraph ──────────────────────────────────────────────────
@dataclass
class SubgraphNode:
id: str
label: str
name: str
code: Optional[str] = None
weight: float = 1.0
extra: dict = field(default_factory=dict)
@dataclass
class SubgraphEdge:
source: str
target: str
rel: str
weight: float = 1.0
evidence: list = field(default_factory=list)
@dataclass
class Subgraph:
nodes: list = field(default_factory=list)
edges: list = field(default_factory=list)
paths: list = field(default_factory=list)
method: str = "cypher_sparsify"
target_disease: Optional[str] = None
# ─── prediction / trajectory ───────────────────────────────────────────────
@dataclass
class TrajectoryHorizon:
months: int
state: str
risk_score: float = 0.0
confidence_low: float = 0.0
confidence_high: float = 0.0
expected_phenotypes: list = field(default_factory=list)
expected_complications: list = field(default_factory=list)
@dataclass
class TrajectorySpec:
horizons: list = field(default_factory=list)
model: str = "tgnn_bootstrap"
natural_history_basis: list = field(default_factory=list)
# ─── risk / survival ───────────────────────────────────────────────────────
@dataclass
class RiskSpec:
overall_severity: float = 0.0
progression_risk: float = 0.0
treatment_urgency: float = 0.0
survival_curve: list = field(default_factory=list)
top_complications: list = field(default_factory=list)
model: str = "rule_based"
# ─── drugs / repurposing ───────────────────────────────────────────────────
@dataclass
class DrugSpec:
candidates: list = field(default_factory=list)
model: str = "kg_walks"
n_evaluated: int = 0
# ─── trials ────────────────────────────────────────────────────────────────
@dataclass
class TrialSpec:
matches: list = field(default_factory=list)
model: str = "trialgpt_bootstrap"
n_searched: int = 0
# ─── active learning ───────────────────────────────────────────────────────
@dataclass
class NextQuestion:
hpo_id: str
name: str
rationale: str
information_gain: float = 0.0
discriminates_between: list = field(default_factory=list)
asks_in_pcdt: bool = False
# ─── SUS grounding ─────────────────────────────────────────────────────────
@dataclass
class SusCheck:
disease_orpha: Optional[str] = None
has_pcdt: bool = False
pcdt_url: Optional[str] = None
therapy_pcdt_recommended: list = field(default_factory=list)
therapy_dispensed_in_uf: dict = field(default_factory=dict)
nearest_centro: Optional[dict] = None
triagem_neonatal_includes: bool = False
associations: list = field(default_factory=list)
# ─── viz ───────────────────────────────────────────────────────────────────
@dataclass
class VizData:
nodes: list = field(default_factory=list)
links: list = field(default_factory=list)
center_id: Optional[str] = None
legend: dict = field(default_factory=dict)
# ─── what-if ───────────────────────────────────────────────────────────────
@dataclass
class WhatIfResult:
intervention: dict = field(default_factory=dict)
delta_risk: float = 0.0
delta_trajectory: list = field(default_factory=list)
new_risk: Optional[RiskSpec] = None
new_trajectory: Optional[TrajectorySpec] = None
rationale: str = ""
confidence: float = 0.5
# ─── DDI ───────────────────────────────────────────────────────────────────
@dataclass
class DdiPair:
drug_a: str
drug_b: str
rxcui_a: Optional[str] = None
rxcui_b: Optional[str] = None
severity: str = "unknown" # contraindicated | major | moderate | minor | unknown
mechanism: str = ""
evidence_level: str = ""
management: str = ""
references: list = field(default_factory=list)
@dataclass
class DdiSpec:
pairs: list = field(default_factory=list)
n_pairs_evaluated: int = 0
regimen_risk: str = "none" # none | minor | moderate | major | contraindicated
model: str = "kg_walks"
# ─── multi-specialist consult ─────────────────────────────────────────────
@dataclass
class SpecialistOpinion:
specialty: str
opinion: str
confidence: float
key_concerns: list = field(default_factory=list)
recommended_next_steps: list = field(default_factory=list)
red_flags: list = field(default_factory=list)
@dataclass
class ConsultSpec:
opinions: list = field(default_factory=list)
synthesis: str = ""
panel: list = field(default_factory=list)
# ─── pharmacogenomics ─────────────────────────────────────────────────────
@dataclass
class PharmacogenAssessment:
gene: str
variant: Optional[str]
drug: str
rxcui: Optional[str] = None
expected_phenotype: str = ""
recommendation: str = ""
dose_modification: str = ""
cpic_level: str = "" # A | B | C | D | ""
evidence: str = ""
confidence: float = 0.5
source: str = "cpic" # cpic | pathway
@dataclass
class PharmacogenSpec:
assessments: list = field(default_factory=list)
n_pairs: int = 0
n_actionable: int = 0
model: str = "cpic_kg"
# ─── family / pedigree ─────────────────────────────────────────────────────
@dataclass
class RelativeRisk:
relation: str # sibling | parent | offspring | mother | son | daughter
recurrence_risk: float
carrier_screening_recommended: bool = False
rationale: str = ""
@dataclass
class FamilySpec:
inheritance_mode: str # AR | AD | XLR | XLD | MITOCHONDRIAL | DE_NOVO | UNKNOWN
relatives: list = field(default_factory=list)
notes: str = ""
# ─── reverse phenotyping ──────────────────────────────────────────────────
@dataclass
class ReversePhenoItem:
hpo_id: str
name: str
expected_frequency: float
definition: str = ""
@dataclass
class ReversePhenoSpec:
disease_orpha: Optional[str] = None
items: list = field(default_factory=list)
# ─── protocol compliance ──────────────────────────────────────────────────
@dataclass
class ComplianceGap:
category: str # therapy | exam | referral | off_protocol
expected: str
got: Optional[str] = None
priority: str = "medium" # low | medium | high
rationale: str = ""
@dataclass
class ProtocolComplianceSpec:
disease_orpha: Optional[str] = None
score: float = 0.0
gaps: list = field(default_factory=list)
notes: str = ""
# ─── simulation ───────────────────────────────────────────────────────────
@dataclass
class SimulationOutcome:
metric: str
horizon_months: int
n: int
mean: float
p05: float
p50: float
p95: float
@dataclass
class SimulationSpec:
n_runs: int = 0
intervention: Optional[dict] = None
horizon_outcomes: list = field(default_factory=list)
survival_summary: list = field(default_factory=list)
median_severity: float = 0.0
# ─── the central twin ──────────────────────────────────────────────────────
@dataclass
class GemeoTwin:
id: str = field(default_factory=_new_id)
case_id: Optional[str] = None
patient_id: Optional[str] = None
created_at: str = field(default_factory=_now)
updated_at: str = field(default_factory=_now)
embedding: Optional[list] = None
embedding_dim: int = 0
diagnoses: list = field(default_factory=list)
cohort: Optional[Cohort] = None
subgraph: Optional[Subgraph] = None
trajectory: Optional[TrajectorySpec] = None
risk: Optional[RiskSpec] = None
drugs: Optional[DrugSpec] = None
trials: Optional[TrialSpec] = None
next_questions: list = field(default_factory=list)
sus_check: Optional[SusCheck] = None
viz_data: Optional[VizData] = None
# Phase-2 case-driven additions
ddi: Optional[DdiSpec] = None
consult: Optional[ConsultSpec] = None
pharmacogen: Optional[PharmacogenSpec] = None
family: Optional[FamilySpec] = None
reverse_pheno: Optional[ReversePhenoSpec] = None
protocol_compliance: Optional[ProtocolComplianceSpec] = None
# Snapshot state
snapshot_versions: list = field(default_factory=list)
n_phenotypes: int = 0
n_genes: int = 0
n_labs: int = 0
extra: dict = field(default_factory=dict)
def to_dict(self) -> dict:
return asdict(self)