Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Centralized path configuration for Plant-mSyn (Multi-genome Synteny). | |
| Hugging Face Spaces Edition. | |
| This module provides configurable paths via environment variables with sensible | |
| fallbacks for Hugging Face Spaces deployment. Use this module for all path | |
| references to ensure cloud deployment compatibility. | |
| Environment Variables (all optional - defaults use relative paths from Scripts/): | |
| PLANTMSYN_PROJECT_DIR - Root project directory | |
| PLANTMSYN_DATA_DIR - Data directory (for HF: where dataset files are) | |
| PLANTMSYN_ANNOTATIONS_DIR - Gene annotation TSV files | |
| PLANTMSYN_MCSCAN_DIR - MCscan results | |
| PLANTMSYN_LOG_DIR - Log files directory | |
| PLANTMSYN_SQL_DIR - SQL catalogs directory | |
| Derived paths (from PLANTMSYN_MCSCAN_DIR): | |
| - bed_files_dir | |
| - i1_blocks_dir | |
| - last_filtered_dir | |
| - pep_files_dir | |
| - lifted_anchors_dir | |
| - custom_meta_dir | |
| Usage: | |
| from path_config import ( | |
| PROJECT_DIR, GENOMES_DIR, ANNOTATIONS_DIR, | |
| MCSCAN_RESULTS_DIR, LOG_DIR, get_all_paths | |
| ) | |
| # Or use the function for compatibility: | |
| paths = get_all_paths() | |
| bed_dir = paths['bed_files_dir'] | |
| """ | |
| import os | |
| from pathlib import Path | |
| from typing import Dict | |
| # Try to load .env file if python-dotenv is available | |
| try: | |
| from dotenv import load_dotenv | |
| # Look for .env in project root (parent of Scripts/) | |
| _script_dir = Path(__file__).resolve().parent | |
| _project_root = _script_dir.parent | |
| _env_file = _project_root / ".env" | |
| if _env_file.exists(): | |
| load_dotenv(_env_file) | |
| except ImportError: | |
| pass # python-dotenv not installed, rely on system env vars | |
| # ============================================================================= | |
| # DEFAULT PATH COMPUTATION (Hugging Face Spaces Adapted) | |
| # ============================================================================= | |
| def _get_default_project_dir() -> Path: | |
| """Get default project directory. | |
| On HuggingFace Spaces, scripts are in /app/ directly (not Scripts/ subfolder). | |
| So the project dir is the same as the script dir. | |
| In local dev, scripts are in Scripts/ so parent is project dir. | |
| """ | |
| script_dir = Path(__file__).resolve().parent | |
| # Check if we're in HF Spaces (scripts in /app/ directly) | |
| # by checking if 'data' folder exists at same level | |
| if (script_dir / "data").is_dir() or script_dir.name == "app": | |
| return script_dir | |
| # Local development: Scripts/ folder, parent is project dir | |
| return script_dir.parent | |
| def _get_path_from_env(env_var: str, default: Path) -> Path: | |
| """Get path from environment variable or use default.""" | |
| env_value = os.environ.get(env_var) | |
| if env_value: | |
| return Path(env_value).resolve() | |
| return default | |
| # ============================================================================= | |
| # CORE PATH CONFIGURATION (Hugging Face Spaces) | |
| # ============================================================================= | |
| # Default base paths (computed relative to this file's location) | |
| _DEFAULT_PROJECT_DIR = _get_default_project_dir() | |
| # Primary paths - configurable via environment variables | |
| PROJECT_DIR: Path = _get_path_from_env( | |
| 'PLANTMSYN_PROJECT_DIR', | |
| _DEFAULT_PROJECT_DIR | |
| ) | |
| # Data directory - for HF Spaces, data is in a separate 'data' folder | |
| DATA_DIR: Path = _get_path_from_env( | |
| 'PLANTMSYN_DATA_DIR', | |
| PROJECT_DIR / "data" | |
| ) | |
| # Genomes directory (may not be used in HF - preprocessed data instead) | |
| GENOMES_DIR: Path = _get_path_from_env( | |
| 'PLANTMSYN_GENOMES_DIR', | |
| DATA_DIR / "genomes" | |
| ) | |
| # Annotations directory - under data for HF | |
| ANNOTATIONS_DIR: Path = _get_path_from_env( | |
| 'PLANTMSYN_ANNOTATIONS_DIR', | |
| DATA_DIR / "annotations" | |
| ) | |
| # MCscan results directory - under data for HF (not protein_pairwise subfolder) | |
| MCSCAN_RESULTS_DIR: Path = _get_path_from_env( | |
| 'PLANTMSYN_MCSCAN_DIR', | |
| DATA_DIR / "mcscan_results" | |
| ) | |
| LOG_DIR: Path = _get_path_from_env( | |
| 'PLANTMSYN_LOG_DIR', | |
| PROJECT_DIR / "logs" | |
| ) | |
| # ============================================================================= | |
| # DERIVED PATHS (from MCSCAN_RESULTS_DIR) | |
| # ============================================================================= | |
| BED_FILES_DIR: Path = MCSCAN_RESULTS_DIR / "bed_files" | |
| I1_BLOCKS_DIR: Path = MCSCAN_RESULTS_DIR / "i1_blocks" | |
| LAST_FILTERED_DIR: Path = MCSCAN_RESULTS_DIR / "last_filtered" | |
| PEP_FILES_DIR: Path = MCSCAN_RESULTS_DIR / "pep_files" | |
| LIFTED_ANCHORS_DIR: Path = MCSCAN_RESULTS_DIR / "lifted_anchors" | |
| CUSTOM_META_DIR: Path = MCSCAN_RESULTS_DIR / "custom_meta" | |
| CUSTOM_SYNTENY_META_DIR: Path = MCSCAN_RESULTS_DIR / "custom_synteny_meta" | |
| # SQL metadata paths - under data for HF | |
| SQL_DIR: Path = _get_path_from_env( | |
| 'PLANTMSYN_SQL_DIR', | |
| DATA_DIR / "sql" | |
| ) | |
| SEARCH_CATALOGS_DIR: Path = SQL_DIR / "search_catalogs" | |
| METADATA_DB_PATH: Path = SQL_DIR / "plantmsyn_metadata.db" | |
| # Script directory (for reference) | |
| SCRIPT_DIR: Path = Path(__file__).resolve().parent | |
| # ============================================================================= | |
| # HELPER FUNCTIONS | |
| # ============================================================================= | |
| def get_all_paths() -> Dict[str, Path]: | |
| """ | |
| Get all configured paths as a dictionary. | |
| This function provides backward compatibility with get_project_paths() | |
| and allows accessing all paths from a single dict. | |
| Returns: | |
| Dict with all path configurations | |
| """ | |
| return { | |
| 'script_dir': SCRIPT_DIR, | |
| 'project_dir': PROJECT_DIR, | |
| 'genomes_dir': GENOMES_DIR, | |
| 'annotations_dir': ANNOTATIONS_DIR, | |
| 'mcscan_results_dir': MCSCAN_RESULTS_DIR, | |
| 'log_dir': LOG_DIR, | |
| 'bed_files_dir': BED_FILES_DIR, | |
| 'i1_blocks_dir': I1_BLOCKS_DIR, | |
| 'last_filtered_dir': LAST_FILTERED_DIR, | |
| 'pep_files_dir': PEP_FILES_DIR, | |
| 'lifted_anchors_dir': LIFTED_ANCHORS_DIR, | |
| 'custom_meta_dir': CUSTOM_META_DIR, | |
| 'custom_synteny_meta_dir': CUSTOM_SYNTENY_META_DIR, | |
| 'sql_dir': SQL_DIR, | |
| 'search_catalogs_dir': SEARCH_CATALOGS_DIR, | |
| 'metadata_db_path': METADATA_DB_PATH, | |
| } | |
| def print_config(): | |
| """Print current path configuration (useful for debugging).""" | |
| from logger import get_logger | |
| config_logger = get_logger("synteny.path_config") | |
| config_logger.info("Plant-mSyn Path Configuration") | |
| config_logger.info("=" * 50) | |
| for name, path in get_all_paths().items(): | |
| exists = "✓" if path.exists() else "✗" | |
| env_var = f"PLANTMSYN_{name.upper()}" if name in ['project_dir', 'genomes_dir', 'annotations_dir', 'log_dir'] or name == 'mcscan_results_dir' else "(derived)" | |
| if name == 'mcscan_results_dir': | |
| env_var = 'PLANTMSYN_MCSCAN_DIR' | |
| elif name == 'script_dir': | |
| env_var = "(computed)" | |
| elif 'dir' in name and env_var == "(derived)": | |
| pass # derived paths | |
| else: | |
| env_var = f"PLANTMSYN_{name.replace('_dir', '').upper()}_DIR" | |
| config_logger.info(f" [{exists}] {name}: {path}") | |
| if __name__ == "__main__": | |
| print_config() | |