"""Shared helpers for the Pigeon Pea Pangenome Atlas.""" import os import logging import time from pathlib import Path from functools import wraps logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") logger = logging.getLogger("pangenome") PROJECT_ROOT = Path(__file__).resolve().parent.parent DATA_DIR = PROJECT_ROOT / "data" PRECOMPUTED_DIR = PROJECT_ROOT / "precomputed" def find_file(directory: Path, pattern: str) -> Path: """Find first file matching glob pattern in directory.""" matches = list(directory.glob(pattern)) if not matches: raise FileNotFoundError(f"No file matching '{pattern}' in {directory}") return matches[0] def timer(func): """Decorator that logs execution time.""" @wraps(func) def wrapper(*args, **kwargs): t0 = time.time() result = func(*args, **kwargs) dt = time.time() - t0 logger.info(f"{func.__name__} completed in {dt:.2f}s") return result return wrapper KNOWN_COUNTRIES = { "India", "Myanmar", "Unknown", "Zaire", "Uganda", "Indonesia", "Jamaica", "South_Africa", "Puerto_Rico", "Philippines", "Sierra_Leone", "Nigeria", "Malawi", "Italy", "Kenya", "Sri_Lanka", "Thailand", "Nepal", } # Approximate centroid coordinates (lat, lon) for each country. COUNTRY_COORDS = { "India": (20.59, 78.96), "Philippines": (12.88, 121.77), "Kenya": (-1.29, 36.82), "Nepal": (28.39, 84.12), "Myanmar": (21.92, 95.96), "Uganda": (1.37, 32.29), "Zaire": (-4.04, 21.76), "Indonesia": (-0.79, 113.92), "Jamaica": (18.11, -77.30), "South_Africa": (-30.56, 22.94), "Puerto_Rico": (18.22, -66.59), "Sierra_Leone": (8.46, -11.78), "Nigeria": (9.08, 7.49), "Malawi": (-13.25, 34.30), "Italy": (41.87, 12.57), "Sri_Lanka": (7.87, 80.77), "Thailand": (15.87, 100.99), } def parse_country(line_id: str) -> str: """Extract country from line ID (last token after underscore).""" parts = line_id.rsplit("_", 1) if len(parts) == 2 and parts[1] in KNOWN_COUNTRIES: return parts[1] # Try two-word countries parts2 = line_id.rsplit("_", 2) if len(parts2) >= 3: two_word = f"{parts2[-2]}_{parts2[-1]}" if two_word in KNOWN_COUNTRIES: return two_word return "Unknown" # ===================================================================== # HTML builder helpers # ===================================================================== def build_hero_header( total_genes: int, n_lines: int, n_countries: int, n_clusters: int, ) -> str: """Return an HTML string for the hero dashboard header. Renders a dark (#1a2332) banner with large stat numbers and small uppercase labels. Uses the ``.hero-header``, ``.hero-stat``, and ``.hero-subtitle`` CSS classes defined in ``ui/theme.py``. Parameters ---------- total_genes : int Total number of genes in the pangenome. n_lines : int Number of accession lines (e.g. 89 + reference). n_countries : int Number of countries of origin. n_clusters : int Number of genomic clusters. """ stats = [ (f"{total_genes:,}", "Total Genes"), (str(n_lines), "Lines"), (str(n_countries), "Countries"), (str(n_clusters), "Clusters"), ] stat_html = "\n".join( f'' f'{value}' f'{label}' f"" for value, label in stats ) return ( '
' "An interactive exploration of presence-absence variation across " "pigeon pea accessions worldwide." "
" '