"""Human-readable labels for compact codes used in cached tables.""" from __future__ import annotations import numpy as np import pandas as pd # Matches interpretation.predictions._get_modality_info letter codes (R/A/F order). # Short table-friendly labels (no long parentheticals). _MODALITY_LONG: dict[str, str] = { "RAF": "RNA + ATAC + Flux", "RA": "RNA + ATAC", "RF": "RNA + Flux", "AF": "ATAC + Flux", "R": "RNA only", "A": "ATAC only", "F": "Flux only", "None": "No modality data", "none": "No modality data", "nan": "No modality data", } # Rename row fields in inspector tables for display. _FIELD_DISPLAY: dict[str, str] = { "label": "CellTag-Multi label", } # Latent explorer: table headers and key–value inspector (exclude non-meaningful / internal cols). LATENT_TABLE_RENAME: dict[str, str] = { "label": "CellTag-Multi label", "predicted_class": "Predicted fate", "predicted_value": "Prediction score", "correct": "Prediction correct", "pct": "Dominant fate (%)", "modality_label": "Available modalities", "dataset_idx": "Dataset index", "batch_no": "Batch", "fold": "Cross Validation fold", "clone_id": "Clone ID", "clone_size": "Clone size", "cell_type": "Cell type", } LATENT_DROP_FROM_TABLES: frozenset[str] = frozenset({"umap_x", "umap_y", "modality", "pct_decile"}) _NAME_MAP = {**_FIELD_DISPLAY, **LATENT_TABLE_RENAME} def _format_scalar(v) -> str: if v is None: return "" if isinstance(v, bool): return "Yes" if v else "No" try: if pd.isna(v): return "" except (ValueError, TypeError): pass if isinstance(v, (float, np.floating)) and np.isnan(v): return "" return str(v) def _field_label(name: str, *, fallback_field_display: bool) -> str: k = str(name) if fallback_field_display: return _NAME_MAP.get(k, _FIELD_DISPLAY.get(k, k)) return _NAME_MAP.get(k, k) def expand_modality(code) -> str: """Map R/A/F codes (e.g. RAF, RA) to full names.""" if code is None: return _MODALITY_LONG["None"] try: if pd.isna(code): return _MODALITY_LONG["None"] except (ValueError, TypeError): pass if isinstance(code, (float, np.floating)) and np.isnan(code): return _MODALITY_LONG["None"] key = str(code).strip() if not key or key.lower() == "nan": return _MODALITY_LONG["None"] return _MODALITY_LONG.get(key, key) def annotate_modality_column(df, code_col: str = "modality", label_col: str = "modality_label"): """Add human-readable modality column; returns a copy.""" out = df.copy() out[label_col] = out[code_col].map(expand_modality) return out def prepare_latent_display_dataframe(df: pd.DataFrame) -> pd.DataFrame: """Drop UMAP / internal columns and rename headers for Selected-points style tables.""" drop = [c for c in df.columns if c in LATENT_DROP_FROM_TABLES or str(c).startswith("umap_")] out = df.drop(columns=drop, errors="ignore") return out.rename(columns=LATENT_TABLE_RENAME) def latent_inspector_key_value(series: pd.Series) -> pd.DataFrame: """Key–value inspector row: human names, no UMAP coordinates.""" s = series.drop( labels=[c for c in series.index if c in LATENT_DROP_FROM_TABLES or str(c).startswith("umap_")], errors="ignore", ) idx = [_field_label(i, fallback_field_display=False) for i in s.index] vals = [_format_scalar(v) for v in s.values] return pd.DataFrame({"Field": idx, "Value": vals}) def dataframe_to_arrow_safe_kv(series: pd.Series) -> pd.DataFrame: """Two string columns for Streamlit/PyArrow (avoids mixed-type single column).""" s = series.copy() idx = [_field_label(i, fallback_field_display=True) for i in s.index] vals = [_format_scalar(v) for v in s.values] return pd.DataFrame({"field": idx, "value": vals})