| """Pre-computed constants for ISLES24 dataset. | |
| The ISLES24 challenge dataset is static (case IDs will never change). | |
| Pre-computing these values avoids: | |
| 1. PyArrow streaming bug (apache/arrow#45214) that hangs on parquet iteration | |
| 2. Memory issues from downloading the full 99GB dataset | |
| See docs/specs/08-bug-hf-spaces-dataset-loop.md for full investigation. | |
| """ | |
| # Pre-computed case IDs for ISLES24 dataset | |
| # Extracted via HfFileSystem enumeration on 2025-12-08 | |
| # Order matches parquet file indices (train-00000-of-00149.parquet = index 0) | |
| ISLES24_CASE_IDS: tuple[str, ...] = ( | |
| "sub-stroke0001", | |
| "sub-stroke0002", | |
| "sub-stroke0003", | |
| "sub-stroke0004", | |
| "sub-stroke0005", | |
| "sub-stroke0006", | |
| "sub-stroke0007", | |
| "sub-stroke0008", | |
| "sub-stroke0009", | |
| "sub-stroke0010", | |
| "sub-stroke0011", | |
| "sub-stroke0012", | |
| "sub-stroke0013", | |
| "sub-stroke0014", | |
| "sub-stroke0015", | |
| "sub-stroke0016", | |
| "sub-stroke0017", | |
| "sub-stroke0019", | |
| "sub-stroke0020", | |
| "sub-stroke0021", | |
| "sub-stroke0022", | |
| "sub-stroke0025", | |
| "sub-stroke0026", | |
| "sub-stroke0027", | |
| "sub-stroke0028", | |
| "sub-stroke0030", | |
| "sub-stroke0033", | |
| "sub-stroke0036", | |
| "sub-stroke0037", | |
| "sub-stroke0038", | |
| "sub-stroke0040", | |
| "sub-stroke0043", | |
| "sub-stroke0045", | |
| "sub-stroke0047", | |
| "sub-stroke0048", | |
| "sub-stroke0049", | |
| "sub-stroke0052", | |
| "sub-stroke0053", | |
| "sub-stroke0054", | |
| "sub-stroke0055", | |
| "sub-stroke0057", | |
| "sub-stroke0062", | |
| "sub-stroke0066", | |
| "sub-stroke0068", | |
| "sub-stroke0070", | |
| "sub-stroke0071", | |
| "sub-stroke0073", | |
| "sub-stroke0074", | |
| "sub-stroke0075", | |
| "sub-stroke0076", | |
| "sub-stroke0077", | |
| "sub-stroke0078", | |
| "sub-stroke0079", | |
| "sub-stroke0080", | |
| "sub-stroke0081", | |
| "sub-stroke0082", | |
| "sub-stroke0083", | |
| "sub-stroke0084", | |
| "sub-stroke0085", | |
| "sub-stroke0086", | |
| "sub-stroke0087", | |
| "sub-stroke0088", | |
| "sub-stroke0089", | |
| "sub-stroke0090", | |
| "sub-stroke0091", | |
| "sub-stroke0092", | |
| "sub-stroke0093", | |
| "sub-stroke0094", | |
| "sub-stroke0095", | |
| "sub-stroke0096", | |
| "sub-stroke0097", | |
| "sub-stroke0098", | |
| "sub-stroke0099", | |
| "sub-stroke0100", | |
| "sub-stroke0101", | |
| "sub-stroke0102", | |
| "sub-stroke0103", | |
| "sub-stroke0104", | |
| "sub-stroke0105", | |
| "sub-stroke0106", | |
| "sub-stroke0107", | |
| "sub-stroke0108", | |
| "sub-stroke0109", | |
| "sub-stroke0110", | |
| "sub-stroke0111", | |
| "sub-stroke0112", | |
| "sub-stroke0113", | |
| "sub-stroke0114", | |
| "sub-stroke0115", | |
| "sub-stroke0116", | |
| "sub-stroke0117", | |
| "sub-stroke0118", | |
| "sub-stroke0119", | |
| "sub-stroke0133", | |
| "sub-stroke0134", | |
| "sub-stroke0135", | |
| "sub-stroke0136", | |
| "sub-stroke0137", | |
| "sub-stroke0138", | |
| "sub-stroke0139", | |
| "sub-stroke0140", | |
| "sub-stroke0141", | |
| "sub-stroke0142", | |
| "sub-stroke0143", | |
| "sub-stroke0144", | |
| "sub-stroke0145", | |
| "sub-stroke0146", | |
| "sub-stroke0147", | |
| "sub-stroke0148", | |
| "sub-stroke0149", | |
| "sub-stroke0150", | |
| "sub-stroke0151", | |
| "sub-stroke0152", | |
| "sub-stroke0153", | |
| "sub-stroke0154", | |
| "sub-stroke0155", | |
| "sub-stroke0156", | |
| "sub-stroke0157", | |
| "sub-stroke0158", | |
| "sub-stroke0159", | |
| "sub-stroke0161", | |
| "sub-stroke0162", | |
| "sub-stroke0163", | |
| "sub-stroke0164", | |
| "sub-stroke0165", | |
| "sub-stroke0166", | |
| "sub-stroke0167", | |
| "sub-stroke0168", | |
| "sub-stroke0169", | |
| "sub-stroke0170", | |
| "sub-stroke0171", | |
| "sub-stroke0172", | |
| "sub-stroke0173", | |
| "sub-stroke0174", | |
| "sub-stroke0175", | |
| "sub-stroke0176", | |
| "sub-stroke0177", | |
| "sub-stroke0178", | |
| "sub-stroke0179", | |
| "sub-stroke0180", | |
| "sub-stroke0181", | |
| "sub-stroke0182", | |
| "sub-stroke0183", | |
| "sub-stroke0184", | |
| "sub-stroke0185", | |
| "sub-stroke0186", | |
| "sub-stroke0187", | |
| "sub-stroke0188", | |
| "sub-stroke0189", | |
| ) | |
| # Mapping from case ID to parquet file index (0-indexed) | |
| # train-00000-of-00149.parquet contains sub-stroke0001 | |
| # train-00001-of-00149.parquet contains sub-stroke0002 | |
| # etc. | |
| ISLES24_CASE_INDEX: dict[str, int] = {case_id: idx for idx, case_id in enumerate(ISLES24_CASE_IDS)} | |
| # Total number of parquet files in the dataset | |
| ISLES24_NUM_FILES: int = 149 | |
| # Sanity check: ensure constants are consistent | |
| assert len(ISLES24_CASE_IDS) == ISLES24_NUM_FILES, ( | |
| f"ISLES24_CASE_IDS has {len(ISLES24_CASE_IDS)} entries but ISLES24_NUM_FILES is {ISLES24_NUM_FILES}" | |
| ) | |
| # Dataset identifier on HuggingFace Hub | |
| ISLES24_DATASET_ID: str = "hugging-science/isles24-stroke" | |