""" services/study_parser.py -------------------------- MRIStudy: ZIP extraction + BraTS sequence discovery + validation. Responsible only for turning "a ZIP file landed on disk" into "four validated NIfTI paths" (t1, t1ce, t2, flair). Knows nothing about models, meshes, or payloads — pipeline.py wires those together. """ from __future__ import annotations import os import re import zipfile from dataclasses import dataclass from typing import Optional class StudyValidationError(Exception): """Raised when a study ZIP is missing a required sequence or is malformed.""" # Sequence -> regex patterns to match against (case-insensitive) filenames. # Ordered so more-specific patterns (t1ce) are checked before substrings # that could also match t1. _SEQUENCE_PATTERNS: dict[str, list[str]] = { "t1ce": [r"t1ce", r"t1c\b", r"t1-ce", r"t1_ce"], "flair": [r"flair"], "t2": [r"(? "MRIStudy": """Extract `zip_path` into `session_dir` and discover sequences.""" os.makedirs(session_dir, exist_ok=True) try: with zipfile.ZipFile(zip_path, "r") as zf: zf.extractall(session_dir) except zipfile.BadZipFile as e: raise StudyValidationError(f"'{zip_path}' is not a valid ZIP file: {e}") study = cls(session_dir=session_dir) study._discover_sequences() study.study_id = study._infer_study_id() return study @classmethod def from_directory(cls, session_dir: str) -> "MRIStudy": """Discover sequences in an already-extracted directory (no ZIP step).""" study = cls(session_dir=session_dir) study._discover_sequences() study.study_id = study._infer_study_id() return study # ------------------------------------------------------------------ # Discovery # ------------------------------------------------------------------ def _all_nifti_files(self) -> list[str]: found = [] for root, _dirs, files in os.walk(self.session_dir): for fname in files: if fname.lower().endswith(_NIFTI_SUFFIXES): found.append(os.path.join(root, fname)) return found def _discover_sequences(self) -> None: candidates = self._all_nifti_files() for seq_name, patterns in _SEQUENCE_PATTERNS.items(): match = self._match_sequence(candidates, patterns) setattr(self, f"{seq_name}_path", match) @staticmethod def _match_sequence(candidates: list[str], patterns: list[str]) -> Optional[str]: for path in candidates: fname = os.path.basename(path).lower() for pat in patterns: if re.search(pat, fname): return path return None def _infer_study_id(self) -> str: """Best-effort patient/study identifier from any discovered filename.""" for path in (self.t1_path, self.t1ce_path, self.t2_path, self.flair_path): if path: base = os.path.basename(path) for suffix in _NIFTI_SUFFIXES: if base.lower().endswith(suffix): base = base[: -len(suffix)] break # Strip a trailing sequence tag like "-t1", "_flair", etc. base = re.sub( r"[-_]?(t1ce|t1c|t1|t2|flair)$", "", base, flags=re.IGNORECASE ) return base return "unknown_study" # ------------------------------------------------------------------ # Validation # ------------------------------------------------------------------ def validate(self) -> None: """Raise StudyValidationError if any required sequence is missing.""" missing = [ name for name, path in ( ("T1", self.t1_path), ("T1ce", self.t1ce_path), ("T2", self.t2_path), ("FLAIR", self.flair_path), ) if path is None ] if missing: raise StudyValidationError( f"Study at '{self.session_dir}' is missing required sequence(s): " f"{', '.join(missing)}." ) def as_paths(self) -> dict: return { "t1_path": self.t1_path, "t1ce_path": self.t1ce_path, "t2_path": self.t2_path, "flair_path": self.flair_path, }