"""Gemeo dataclasses — public types. JSON-serializable, stable across versions. Internal modules can extend via `extra: dict` payloads. """ from __future__ import annotations from dataclasses import dataclass, field, asdict from datetime import datetime, timezone from typing import Optional, Any import uuid def _now() -> str: return datetime.now(timezone.utc).isoformat() def _new_id() -> str: return f"gemeo_{uuid.uuid4().hex[:10]}" # ─── retrieval / cohort ──────────────────────────────────────────────────── @dataclass class CohortMember: space_id: str similarity: float shared_phenotypes: list = field(default_factory=list) shared_diseases: list = field(default_factory=list) confirmed_diagnosis: Optional[str] = None confirmed_orpha: Optional[str] = None sus_region: Optional[str] = None source: str = "registry" # registry | space | literature @dataclass class Cohort: members: list = field(default_factory=list) method: str = "knn_fused" radius: float = 0.0 n_total_population: int = 0 centroid_disease: Optional[dict] = None # ─── reasoning / subgraph ────────────────────────────────────────────────── @dataclass class SubgraphNode: id: str label: str name: str code: Optional[str] = None weight: float = 1.0 extra: dict = field(default_factory=dict) @dataclass class SubgraphEdge: source: str target: str rel: str weight: float = 1.0 evidence: list = field(default_factory=list) @dataclass class Subgraph: nodes: list = field(default_factory=list) edges: list = field(default_factory=list) paths: list = field(default_factory=list) method: str = "cypher_sparsify" target_disease: Optional[str] = None # ─── prediction / trajectory ─────────────────────────────────────────────── @dataclass class TrajectoryHorizon: months: int state: str risk_score: float = 0.0 confidence_low: float = 0.0 confidence_high: float = 0.0 expected_phenotypes: list = field(default_factory=list) expected_complications: list = field(default_factory=list) @dataclass class TrajectorySpec: horizons: list = field(default_factory=list) model: str = "tgnn_bootstrap" natural_history_basis: list = field(default_factory=list) # ─── risk / survival ─────────────────────────────────────────────────────── @dataclass class RiskSpec: overall_severity: float = 0.0 progression_risk: float = 0.0 treatment_urgency: float = 0.0 survival_curve: list = field(default_factory=list) top_complications: list = field(default_factory=list) model: str = "rule_based" # ─── drugs / repurposing ─────────────────────────────────────────────────── @dataclass class DrugSpec: candidates: list = field(default_factory=list) model: str = "kg_walks" n_evaluated: int = 0 # ─── trials ──────────────────────────────────────────────────────────────── @dataclass class TrialSpec: matches: list = field(default_factory=list) model: str = "trialgpt_bootstrap" n_searched: int = 0 # ─── active learning ─────────────────────────────────────────────────────── @dataclass class NextQuestion: hpo_id: str name: str rationale: str information_gain: float = 0.0 discriminates_between: list = field(default_factory=list) asks_in_pcdt: bool = False # ─── SUS grounding ───────────────────────────────────────────────────────── @dataclass class SusCheck: disease_orpha: Optional[str] = None has_pcdt: bool = False pcdt_url: Optional[str] = None therapy_pcdt_recommended: list = field(default_factory=list) therapy_dispensed_in_uf: dict = field(default_factory=dict) nearest_centro: Optional[dict] = None triagem_neonatal_includes: bool = False associations: list = field(default_factory=list) # ─── viz ─────────────────────────────────────────────────────────────────── @dataclass class VizData: nodes: list = field(default_factory=list) links: list = field(default_factory=list) center_id: Optional[str] = None legend: dict = field(default_factory=dict) # ─── what-if ─────────────────────────────────────────────────────────────── @dataclass class WhatIfResult: intervention: dict = field(default_factory=dict) delta_risk: float = 0.0 delta_trajectory: list = field(default_factory=list) new_risk: Optional[RiskSpec] = None new_trajectory: Optional[TrajectorySpec] = None rationale: str = "" confidence: float = 0.5 # ─── DDI ─────────────────────────────────────────────────────────────────── @dataclass class DdiPair: drug_a: str drug_b: str rxcui_a: Optional[str] = None rxcui_b: Optional[str] = None severity: str = "unknown" # contraindicated | major | moderate | minor | unknown mechanism: str = "" evidence_level: str = "" management: str = "" references: list = field(default_factory=list) @dataclass class DdiSpec: pairs: list = field(default_factory=list) n_pairs_evaluated: int = 0 regimen_risk: str = "none" # none | minor | moderate | major | contraindicated model: str = "kg_walks" # ─── multi-specialist consult ───────────────────────────────────────────── @dataclass class SpecialistOpinion: specialty: str opinion: str confidence: float key_concerns: list = field(default_factory=list) recommended_next_steps: list = field(default_factory=list) red_flags: list = field(default_factory=list) @dataclass class ConsultSpec: opinions: list = field(default_factory=list) synthesis: str = "" panel: list = field(default_factory=list) # ─── pharmacogenomics ───────────────────────────────────────────────────── @dataclass class PharmacogenAssessment: gene: str variant: Optional[str] drug: str rxcui: Optional[str] = None expected_phenotype: str = "" recommendation: str = "" dose_modification: str = "" cpic_level: str = "" # A | B | C | D | "" evidence: str = "" confidence: float = 0.5 source: str = "cpic" # cpic | pathway @dataclass class PharmacogenSpec: assessments: list = field(default_factory=list) n_pairs: int = 0 n_actionable: int = 0 model: str = "cpic_kg" # ─── family / pedigree ───────────────────────────────────────────────────── @dataclass class RelativeRisk: relation: str # sibling | parent | offspring | mother | son | daughter recurrence_risk: float carrier_screening_recommended: bool = False rationale: str = "" @dataclass class FamilySpec: inheritance_mode: str # AR | AD | XLR | XLD | MITOCHONDRIAL | DE_NOVO | UNKNOWN relatives: list = field(default_factory=list) notes: str = "" # ─── reverse phenotyping ────────────────────────────────────────────────── @dataclass class ReversePhenoItem: hpo_id: str name: str expected_frequency: float definition: str = "" @dataclass class ReversePhenoSpec: disease_orpha: Optional[str] = None items: list = field(default_factory=list) # ─── protocol compliance ────────────────────────────────────────────────── @dataclass class ComplianceGap: category: str # therapy | exam | referral | off_protocol expected: str got: Optional[str] = None priority: str = "medium" # low | medium | high rationale: str = "" @dataclass class ProtocolComplianceSpec: disease_orpha: Optional[str] = None score: float = 0.0 gaps: list = field(default_factory=list) notes: str = "" # ─── simulation ─────────────────────────────────────────────────────────── @dataclass class SimulationOutcome: metric: str horizon_months: int n: int mean: float p05: float p50: float p95: float @dataclass class SimulationSpec: n_runs: int = 0 intervention: Optional[dict] = None horizon_outcomes: list = field(default_factory=list) survival_summary: list = field(default_factory=list) median_severity: float = 0.0 # ─── the central twin ────────────────────────────────────────────────────── @dataclass class GemeoTwin: id: str = field(default_factory=_new_id) case_id: Optional[str] = None patient_id: Optional[str] = None created_at: str = field(default_factory=_now) updated_at: str = field(default_factory=_now) embedding: Optional[list] = None embedding_dim: int = 0 diagnoses: list = field(default_factory=list) cohort: Optional[Cohort] = None subgraph: Optional[Subgraph] = None trajectory: Optional[TrajectorySpec] = None risk: Optional[RiskSpec] = None drugs: Optional[DrugSpec] = None trials: Optional[TrialSpec] = None next_questions: list = field(default_factory=list) sus_check: Optional[SusCheck] = None viz_data: Optional[VizData] = None # Phase-2 case-driven additions ddi: Optional[DdiSpec] = None consult: Optional[ConsultSpec] = None pharmacogen: Optional[PharmacogenSpec] = None family: Optional[FamilySpec] = None reverse_pheno: Optional[ReversePhenoSpec] = None protocol_compliance: Optional[ProtocolComplianceSpec] = None # Snapshot state snapshot_versions: list = field(default_factory=list) n_phenotypes: int = 0 n_genes: int = 0 n_labs: int = 0 extra: dict = field(default_factory=dict) def to_dict(self) -> dict: return asdict(self)