Spaces:
Sleeping
Sleeping
| """UI utility functions for the Mosaic Gradio interface. | |
| This module provides helper functions for: | |
| - OncoTree code lookup and caching | |
| - User session directory management | |
| - Settings CSV loading and validation | |
| - Data export functionality | |
| """ | |
| import tempfile | |
| from pathlib import Path | |
| import pandas as pd | |
| import gradio as gr | |
| import requests | |
| from mosaic.data_directory import get_data_directory | |
| # This path should be outside your project directory if running locally | |
| TEMP_USER_DATA_DIR = Path(tempfile.gettempdir()) / "mosaic_user_data" | |
| IHC_SUBTYPES = ["", "HR+/HER2+", "HR+/HER2-", "HR-/HER2+", "HR-/HER2-"] | |
| SEX_OPTIONS = ["Male", "Female"] | |
| SETTINGS_COLUMNS = [ | |
| "Slide", | |
| "Site Type", | |
| "Sex", | |
| "Tissue Site", | |
| "Cancer Subtype", | |
| "IHC Subtype", | |
| "Segmentation Config", | |
| ] | |
| oncotree_code_map = {} | |
| tissue_site_list = None | |
| def get_tissue_sites(): | |
| """Get the list of tissue sites from the tissue site map file. | |
| Returns: | |
| List of tissue site names. Returns ["Unknown"] if the CSV file is not found. | |
| """ | |
| global tissue_site_list | |
| if tissue_site_list is None: | |
| try: | |
| data_dir = get_data_directory() | |
| tissue_site_map_path = data_dir / "tissue_site_original_to_idx.csv" | |
| df = pd.read_csv(tissue_site_map_path) | |
| # Get unique tissue sites and sort them | |
| tissue_site_list = ["Unknown"] + sorted(df["TISSUE_SITE"].unique().tolist()) | |
| except FileNotFoundError: | |
| gr.Warning( | |
| f"Tissue site mapping file not found at {tissue_site_map_path}. " | |
| "Only 'Unknown' option will be available for tissue site selection. " | |
| "Please ensure the data files are downloaded from the model repository." | |
| ) | |
| tissue_site_list = ["Unknown"] | |
| return tissue_site_list | |
| def get_oncotree_code_name(code): | |
| """Retrieve the human-readable name for an OncoTree code. | |
| Queries the OncoTree API to get the cancer subtype name corresponding | |
| to the given code. Results are cached to avoid repeated API calls. | |
| Args: | |
| code: OncoTree code (e.g., "LUAD", "BRCA") | |
| Returns: | |
| Human-readable cancer subtype name, or "Unknown" if not found | |
| """ | |
| global oncotree_code_map | |
| if code in oncotree_code_map.keys(): | |
| return oncotree_code_map[code] | |
| url = f"https://oncotree.mskcc.org/api/tumorTypes/search/code/{code}?exactMatch=true&version=oncotree_2025_04_08" | |
| response = requests.get(url) | |
| code_name = "Unknown" | |
| if response.status_code == 200: | |
| data = response.json() | |
| if data: | |
| code_name = data[0]["name"] | |
| oncotree_code_map[code] = code_name | |
| return code_name | |
| def create_user_directory(state, request: gr.Request): | |
| """Create a unique directory for each user session. | |
| Args: | |
| state: Gradio state object (unused) | |
| request: Gradio request object containing session hash | |
| Returns: | |
| Path to user's session directory, or None if no session hash available | |
| """ | |
| session_hash = request.session_hash | |
| if session_hash is None: | |
| return None | |
| user_dir = TEMP_USER_DATA_DIR / session_hash | |
| user_dir.mkdir(parents=True, exist_ok=True) | |
| return user_dir | |
| def load_settings(slide_csv_path): | |
| """Load slide analysis settings from CSV file. | |
| Loads the CSV and ensures all required columns are present, adding defaults | |
| for optional columns if they are missing. | |
| Args: | |
| slide_csv_path: Path to the CSV file containing slide settings | |
| Returns: | |
| DataFrame with columns: Slide, Site Type, Cancer Subtype, IHC Subtype, Segmentation Config | |
| Raises: | |
| ValueError: If required columns are missing from the CSV | |
| """ | |
| settings_df = pd.read_csv(slide_csv_path, na_filter=False) | |
| if "Segmentation Config" not in settings_df.columns: | |
| settings_df["Segmentation Config"] = "Biopsy" | |
| if "Cancer Subtype" not in settings_df.columns: | |
| settings_df["Cancer Subtype"] = "Unknown" | |
| if "IHC Subtype" not in settings_df.columns: | |
| settings_df["IHC Subtype"] = "" | |
| if "Tissue Site" not in settings_df.columns: | |
| settings_df["Tissue Site"] = "Unknown" | |
| if not set(SETTINGS_COLUMNS).issubset(settings_df.columns): | |
| raise ValueError("Missing required column in CSV file") | |
| settings_df = settings_df[SETTINGS_COLUMNS] | |
| return settings_df | |
| def validate_settings( | |
| settings_df, | |
| cancer_subtype_name_map, | |
| cancer_subtypes, | |
| reversed_cancer_subtype_name_map, | |
| ): | |
| """Validate and normalize slide analysis settings. | |
| Checks each row for valid values and normalizes cancer subtype names. | |
| Generates warnings for invalid entries and replaces them with defaults. | |
| Args: | |
| settings_df: DataFrame with slide settings to validate | |
| cancer_subtype_name_map: Dict mapping subtype display names to codes | |
| cancer_subtypes: List of valid cancer subtype codes | |
| reversed_cancer_subtype_name_map: Dict mapping codes to display names | |
| Returns: | |
| Validated DataFrame with normalized values | |
| Note: | |
| Invalid entries are replaced with defaults and warnings are displayed | |
| to the user via Gradio warnings. | |
| """ | |
| settings_df.columns = SETTINGS_COLUMNS | |
| warnings = [] | |
| tissue_sites = get_tissue_sites() | |
| for idx, row in settings_df.iterrows(): | |
| slide_name = row["Slide"] | |
| subtype = row["Cancer Subtype"] | |
| if subtype in cancer_subtypes: | |
| settings_df.at[idx, "Cancer Subtype"] = reversed_cancer_subtype_name_map[ | |
| subtype | |
| ] | |
| if settings_df.at[idx, "Cancer Subtype"] not in cancer_subtype_name_map.keys(): | |
| warnings.append( | |
| f"Slide {slide_name}: Unknown cancer subtype. Valid subtypes are: {', '.join(cancer_subtype_name_map.keys())}. " | |
| ) | |
| settings_df.at[idx, "Cancer Subtype"] = "Unknown" | |
| if row["Site Type"] not in ["Metastatic", "Primary"]: | |
| warnings.append( | |
| f"Slide {slide_name}: Unknown site type. Valid types are: Metastatic, Primary. " | |
| ) | |
| settings_df.at[idx, "Site Type"] = "Primary" | |
| # Only warn about invalid sex values that are not empty/None | |
| # Empty/None will be validated at analysis time | |
| # Convert old "Unknown" values to empty string silently | |
| sex_value = row["Sex"] | |
| if sex_value == "Unknown": | |
| settings_df.at[idx, "Sex"] = "" | |
| elif sex_value and sex_value not in SEX_OPTIONS: | |
| warnings.append( | |
| f"Slide {slide_name}: Invalid sex value '{sex_value}'. Valid options are: {', '.join(SEX_OPTIONS)}. " | |
| ) | |
| settings_df.at[idx, "Sex"] = "" | |
| if row["Tissue Site"] not in tissue_sites: | |
| warnings.append( | |
| f"Slide {slide_name}: Unknown tissue site. Valid tissue sites are: {', '.join(tissue_sites)}. " | |
| ) | |
| settings_df.at[idx, "Tissue Site"] = "Unknown" | |
| if ( | |
| "Breast" not in settings_df.at[idx, "Cancer Subtype"] | |
| and row["IHC Subtype"] != "" | |
| ): | |
| warnings.append( | |
| f"Slide {slide_name}: IHC subtype should be empty for non-breast cancer subtypes. " | |
| ) | |
| settings_df.at[idx, "IHC Subtype"] = "" | |
| if row["IHC Subtype"] not in IHC_SUBTYPES: | |
| warnings.append( | |
| f"Slide {slide_name}: Unknown IHC subtype. Valid subtypes are: {', '.join(IHC_SUBTYPES)}. " | |
| ) | |
| settings_df.at[idx, "IHC Subtype"] = "" | |
| if row["Segmentation Config"] not in ["Biopsy", "Resection", "TCGA"]: | |
| warnings.append( | |
| f"Slide {slide_name}: Unknown segmentation config. Valid configs are: Biopsy, Resection, TCGA. " | |
| ) | |
| settings_df.at[idx, "Segmentation Config"] = "Biopsy" | |
| if warnings: | |
| gr.Warning("\n".join(warnings)) | |
| return settings_df | |
| def export_to_csv(df): | |
| """Export a DataFrame to CSV file for download. | |
| Args: | |
| df: DataFrame to export | |
| Returns: | |
| Path to the exported CSV file | |
| Raises: | |
| gr.Error: If the DataFrame is None or empty | |
| """ | |
| if df is None or df.empty: | |
| raise gr.Error("No data to export.") | |
| csv_path = "paladin_results.csv" | |
| df.to_csv(csv_path, index=False) | |
| return csv_path | |