Spaces:
Sleeping
Sleeping
| # Copyright (c) Meta Platforms, Inc. and affiliates. | |
| # All rights reserved. | |
| # | |
| # This source code is licensed under the BSD-style license found in the | |
| # LICENSE file in the root directory of this source tree. | |
| """ | |
| Data models for the GenoTriage Environment. | |
| This environment presents genetic variant cases to an AI agent and asks it | |
| to classify them using the standard 5-tier ACMG/AMP classification system. | |
| Each episode is single-step: the agent receives a variant observation and | |
| submits exactly one classification action. | |
| """ | |
| from openenv.core.env_server.types import Action, Observation | |
| from pydantic import Field, field_validator | |
| import json | |
| from typing import Any, List, Literal, Optional | |
| class VepAction(Action): | |
| """ | |
| Action submitted by the agent to classify a genetic variant. | |
| The agent must provide a classification from the 5-tier ACMG system, | |
| along with a reasoning string and the specific criteria it used to | |
| reach its conclusion. Graders evaluate all three fields. | |
| """ | |
| classification: Literal[ | |
| "Pathogenic", | |
| "Likely_pathogenic", | |
| "Uncertain_significance", | |
| "Likely_benign", | |
| "Benign", | |
| ] = Field( | |
| ..., | |
| description=( | |
| "ACMG/AMP classification for the variant. Must be exactly one of: " | |
| "Pathogenic, Likely_pathogenic, Uncertain_significance, " | |
| "Likely_benign, Benign." | |
| ), | |
| ) | |
| reasoning: str = Field( | |
| ..., | |
| min_length=20, | |
| description=( | |
| "A clear explanation of why this classification was chosen. " | |
| "Should reference the evidence presented in the observation, " | |
| "including population frequency, molecular consequence, and " | |
| "gene-disease association. Longer, well-supported reasoning " | |
| "receives higher partial credit." | |
| ), | |
| ) | |
| criteria_used: List[str] = Field( | |
| default_factory=list, | |
| description=( | |
| "List of specific criteria that informed the classification. " | |
| "Examples: 'high population frequency', 'nonsense variant', " | |
| "'no functional studies available', 'missense in disease gene', " | |
| "'absent from gnomAD'. Each criterion should be a short phrase." | |
| ), | |
| ) | |
| def validate_criteria_list(cls, v: Any) -> Any: | |
| """ | |
| Coerces strings (JSON-formatted, bracketed, or comma-separated) into a list of strings. | |
| This fixes validation errors when users type lists manually into the web UI. | |
| """ | |
| if isinstance(v, str): | |
| v = v.strip() | |
| # Try to parse as JSON if it looks like a list | |
| if v.startswith("[") and v.endswith("]"): | |
| try: | |
| return json.loads(v) | |
| except (json.JSONDecodeError, ValueError): | |
| # If [af] is sent, json.loads fails. We strip the brackets and treat as comma-separated. | |
| v = v[1:-1].strip() | |
| # Fallback to comma-separated list | |
| if v: | |
| # Split by comma and strip extra quotes if the user typed "item1", "item2" | |
| return [item.strip().strip('"').strip("'") for item in v.split(",") if item.strip()] | |
| return [] | |
| return v | |
| class VepObservation(Observation): | |
| """ | |
| Observation presented to the agent describing a genetic variant case. | |
| On reset(), the agent receives a complete variant case including all | |
| available evidence. After step(), the feedback and reward fields are | |
| populated with grader results. done=True after the first step since | |
| this is a single-step environment. | |
| """ | |
| # --- Variant identity --- | |
| gene: str = Field( | |
| default="", | |
| description="HGNC gene symbol (e.g. BRCA1, TP53, CFTR).", | |
| ) | |
| chromosome: str = Field( | |
| default="", | |
| description="Chromosome on which the variant resides (e.g. '17', 'X').", | |
| ) | |
| position: int = Field( | |
| default=0, | |
| description="Genomic position of the variant on GRCh38.", | |
| ) | |
| ref: str = Field( | |
| default="", | |
| description="Reference allele (single nucleotide for SNPs).", | |
| ) | |
| alt: str = Field( | |
| default="", | |
| description="Alternate allele observed in the patient (single nucleotide for SNPs).", | |
| ) | |
| hgvs: str = Field( | |
| default="", | |
| description="HGVS genomic notation for the variant (e.g. NC_000017.11:g.43094692A>G).", | |
| ) | |
| # --- Functional annotation --- | |
| consequence: Optional[str] = Field( | |
| default=None, | |
| description=( | |
| "Predicted molecular consequence of the variant " | |
| "(e.g. missense_variant, nonsense, splice_donor_variant, synonymous_variant). " | |
| "None if not annotated." | |
| ), | |
| ) | |
| # --- Clinical context --- | |
| disease: str = Field( | |
| default="", | |
| description=( | |
| "Primary disease associated with this gene in ClinVar " | |
| "(e.g. 'Hereditary Breast and Ovarian Cancer syndrome')." | |
| ), | |
| ) | |
| population_frequency: Optional[float] = Field( | |
| default=None, | |
| description=( | |
| "Allele frequency in the gnomAD v4 population database (0.0–1.0). " | |
| "None if the variant was not observed in gnomAD." | |
| ), | |
| ) | |
| # --- Evidence --- | |
| evidence_snippets: List[str] = Field( | |
| default_factory=list, | |
| description=( | |
| "A list of 3–4 evidence snippets providing clinical and functional " | |
| "context for the variant. May include gene-disease association, " | |
| "consequence interpretation, population frequency context, and " | |
| "available functional/literature evidence." | |
| ), | |
| ) | |
| # --- Task instructions --- | |
| task_description: str = Field( | |
| default="", | |
| description=( | |
| "Instructions for the agent describing what it must do in this episode. " | |
| "Includes classification categories and grading criteria summary." | |
| ), | |
| ) | |
| # --- Post-step feedback (empty on reset) --- | |
| feedback: str = Field( | |
| default="", | |
| description=( | |
| "Grader feedback returned after step(). Empty string on reset(). " | |
| "After step(), contains the correct classification, score breakdown, " | |
| "and notes on which criteria were correctly identified." | |
| ), | |
| ) |