Spaces:
Running
Running
| """ | |
| services/study_parser.py | |
| -------------------------- | |
| MRIStudy: ZIP extraction + BraTS sequence discovery + validation. | |
| Responsible only for turning "a ZIP file landed on disk" into | |
| "four validated NIfTI paths" (t1, t1ce, t2, flair). Knows nothing | |
| about models, meshes, or payloads — pipeline.py wires those together. | |
| """ | |
| from __future__ import annotations | |
| import os | |
| import re | |
| import zipfile | |
| from dataclasses import dataclass | |
| from typing import Optional | |
| class StudyValidationError(Exception): | |
| """Raised when a study ZIP is missing a required sequence or is malformed.""" | |
| # Sequence -> regex patterns to match against (case-insensitive) filenames. | |
| # Ordered so more-specific patterns (t1ce) are checked before substrings | |
| # that could also match t1. | |
| _SEQUENCE_PATTERNS: dict[str, list[str]] = { | |
| "t1ce": [r"t1ce", r"t1c\b", r"t1-ce", r"t1_ce"], | |
| "flair": [r"flair"], | |
| "t2": [r"(?<!t1)t2(?!ce)"], | |
| "t1": [r"(?<!t1ce)(?<!t1c)t1(?!ce)"], | |
| } | |
| _NIFTI_SUFFIXES = (".nii.gz", ".nii") | |
| class MRIStudy: | |
| """ | |
| Extracts a BraTS-style ZIP into a session directory and resolves | |
| the four required sequence files. | |
| Usage: | |
| study = MRIStudy.from_zip(zip_path, session_dir) | |
| study.validate() | |
| study.t1_path / study.t1ce_path / study.t2_path / study.flair_path | |
| """ | |
| session_dir: str | |
| t1_path: Optional[str] = None | |
| t1ce_path: Optional[str] = None | |
| t2_path: Optional[str] = None | |
| flair_path: Optional[str] = None | |
| study_id: Optional[str] = None | |
| # ------------------------------------------------------------------ | |
| # Construction | |
| # ------------------------------------------------------------------ | |
| def from_zip(cls, zip_path: str, session_dir: str) -> "MRIStudy": | |
| """Extract `zip_path` into `session_dir` and discover sequences.""" | |
| os.makedirs(session_dir, exist_ok=True) | |
| try: | |
| with zipfile.ZipFile(zip_path, "r") as zf: | |
| zf.extractall(session_dir) | |
| except zipfile.BadZipFile as e: | |
| raise StudyValidationError(f"'{zip_path}' is not a valid ZIP file: {e}") | |
| study = cls(session_dir=session_dir) | |
| study._discover_sequences() | |
| study.study_id = study._infer_study_id() | |
| return study | |
| def from_directory(cls, session_dir: str) -> "MRIStudy": | |
| """Discover sequences in an already-extracted directory (no ZIP step).""" | |
| study = cls(session_dir=session_dir) | |
| study._discover_sequences() | |
| study.study_id = study._infer_study_id() | |
| return study | |
| # ------------------------------------------------------------------ | |
| # Discovery | |
| # ------------------------------------------------------------------ | |
| def _all_nifti_files(self) -> list[str]: | |
| found = [] | |
| for root, _dirs, files in os.walk(self.session_dir): | |
| for fname in files: | |
| if fname.lower().endswith(_NIFTI_SUFFIXES): | |
| found.append(os.path.join(root, fname)) | |
| return found | |
| def _discover_sequences(self) -> None: | |
| candidates = self._all_nifti_files() | |
| for seq_name, patterns in _SEQUENCE_PATTERNS.items(): | |
| match = self._match_sequence(candidates, patterns) | |
| setattr(self, f"{seq_name}_path", match) | |
| def _match_sequence(candidates: list[str], patterns: list[str]) -> Optional[str]: | |
| for path in candidates: | |
| fname = os.path.basename(path).lower() | |
| for pat in patterns: | |
| if re.search(pat, fname): | |
| return path | |
| return None | |
| def _infer_study_id(self) -> str: | |
| """Best-effort patient/study identifier from any discovered filename.""" | |
| for path in (self.t1_path, self.t1ce_path, self.t2_path, self.flair_path): | |
| if path: | |
| base = os.path.basename(path) | |
| for suffix in _NIFTI_SUFFIXES: | |
| if base.lower().endswith(suffix): | |
| base = base[: -len(suffix)] | |
| break | |
| # Strip a trailing sequence tag like "-t1", "_flair", etc. | |
| base = re.sub( | |
| r"[-_]?(t1ce|t1c|t1|t2|flair)$", "", base, flags=re.IGNORECASE | |
| ) | |
| return base | |
| return "unknown_study" | |
| # ------------------------------------------------------------------ | |
| # Validation | |
| # ------------------------------------------------------------------ | |
| def validate(self) -> None: | |
| """Raise StudyValidationError if any required sequence is missing.""" | |
| missing = [ | |
| name | |
| for name, path in ( | |
| ("T1", self.t1_path), | |
| ("T1ce", self.t1ce_path), | |
| ("T2", self.t2_path), | |
| ("FLAIR", self.flair_path), | |
| ) | |
| if path is None | |
| ] | |
| if missing: | |
| raise StudyValidationError( | |
| f"Study at '{self.session_dir}' is missing required sequence(s): " | |
| f"{', '.join(missing)}." | |
| ) | |
| def as_paths(self) -> dict: | |
| return { | |
| "t1_path": self.t1_path, | |
| "t1ce_path": self.t1ce_path, | |
| "t2_path": self.t2_path, | |
| "flair_path": self.flair_path, | |
| } |