Spaces:

Angione-Lab
/

FateFormerExplorer

Running

App Files Files Community

FateFormerExplorer / streamlit_hf /lib /formatters.py

kaveh

changed CV to cross validation and increased font size in first page

34cacad 19 days ago

raw

history blame contribute delete

3.95 kB

	"""Human-readable labels for compact codes used in cached tables."""

	from __future__ import annotations

	import numpy as np
	import pandas as pd

	# Matches interpretation.predictions._get_modality_info letter codes (R/A/F order).
	# Short table-friendly labels (no long parentheticals).
	_MODALITY_LONG: dict[str, str] = {
	"RAF": "RNA + ATAC + Flux",
	"RA": "RNA + ATAC",
	"RF": "RNA + Flux",
	"AF": "ATAC + Flux",
	"R": "RNA only",
	"A": "ATAC only",
	"F": "Flux only",
	"None": "No modality data",
	"none": "No modality data",
	"nan": "No modality data",
	}

	# Rename row fields in inspector tables for display.
	_FIELD_DISPLAY: dict[str, str] = {
	"label": "CellTag-Multi label",
	}

	# Latent explorer: table headers and key–value inspector (exclude non-meaningful / internal cols).
	LATENT_TABLE_RENAME: dict[str, str] = {
	"label": "CellTag-Multi label",
	"predicted_class": "Predicted fate",
	"predicted_value": "Prediction score",
	"correct": "Prediction correct",
	"pct": "Dominant fate (%)",
	"modality_label": "Available modalities",
	"dataset_idx": "Dataset index",
	"batch_no": "Batch",
	"fold": "Cross Validation fold",
	"clone_id": "Clone ID",
	"clone_size": "Clone size",
	"cell_type": "Cell type",
	}

	LATENT_DROP_FROM_TABLES: frozenset[str] = frozenset({"umap_x", "umap_y", "modality", "pct_decile"})

	_NAME_MAP = {_FIELD_DISPLAY, LATENT_TABLE_RENAME}


	def _format_scalar(v) -> str:
	if v is None:
	return ""
	if isinstance(v, bool):
	return "Yes" if v else "No"
	try:
	if pd.isna(v):
	return ""
	except (ValueError, TypeError):
	pass
	if isinstance(v, (float, np.floating)) and np.isnan(v):
	return ""
	return str(v)


	def _field_label(name: str, *, fallback_field_display: bool) -> str:
	k = str(name)
	if fallback_field_display:
	return _NAME_MAP.get(k, _FIELD_DISPLAY.get(k, k))
	return _NAME_MAP.get(k, k)


	def expand_modality(code) -> str:
	"""Map R/A/F codes (e.g. RAF, RA) to full names."""
	if code is None:
	return _MODALITY_LONG["None"]
	try:
	if pd.isna(code):
	return _MODALITY_LONG["None"]
	except (ValueError, TypeError):
	pass
	if isinstance(code, (float, np.floating)) and np.isnan(code):
	return _MODALITY_LONG["None"]
	key = str(code).strip()
	if not key or key.lower() == "nan":
	return _MODALITY_LONG["None"]
	return _MODALITY_LONG.get(key, key)


	def annotate_modality_column(df, code_col: str = "modality", label_col: str = "modality_label"):
	"""Add human-readable modality column; returns a copy."""
	out = df.copy()
	out[label_col] = out[code_col].map(expand_modality)
	return out


	def prepare_latent_display_dataframe(df: pd.DataFrame) -> pd.DataFrame:
	"""Drop UMAP / internal columns and rename headers for Selected-points style tables."""
	drop = [c for c in df.columns if c in LATENT_DROP_FROM_TABLES or str(c).startswith("umap_")]
	out = df.drop(columns=drop, errors="ignore")
	return out.rename(columns=LATENT_TABLE_RENAME)


	def latent_inspector_key_value(series: pd.Series) -> pd.DataFrame:
	"""Key–value inspector row: human names, no UMAP coordinates."""
	s = series.drop(
	labels=[c for c in series.index if c in LATENT_DROP_FROM_TABLES or str(c).startswith("umap_")],
	errors="ignore",
	)
	idx = [_field_label(i, fallback_field_display=False) for i in s.index]
	vals = [_format_scalar(v) for v in s.values]
	return pd.DataFrame({"Field": idx, "Value": vals})


	def dataframe_to_arrow_safe_kv(series: pd.Series) -> pd.DataFrame:
	"""Two string columns for Streamlit/PyArrow (avoids mixed-type single column)."""
	s = series.copy()
	idx = [_field_label(i, fallback_field_display=True) for i in s.index]
	vals = [_format_scalar(v) for v in s.values]
	return pd.DataFrame({"field": idx, "value": vals})