Spaces:

0xZohar
/

object-assembler

Paused

File size: 12,043 Bytes

"""
Centralized Configuration for Path Management

This module provides environment-aware path management to ensure
compatibility between local development and HuggingFace Space deployment.

Usage:
    from code.cube3d.config import DATA_DIR, LABEL_MAPPINGS, get_mapping_paths

    # Get paths for a specific mapping set
    forward_path, inverse_path = get_mapping_paths("subset_1k")
"""

import os
import json
import re
import tempfile
from pathlib import Path
from typing import Dict, Tuple, Optional


# ============================================================================
# Environment Detection
# ============================================================================

def detect_environment() -> str:
    """
    Detect current runtime environment

    Returns:
        "huggingface" if running on HF Space, "local" otherwise
    """
    if os.getenv("SPACE_ID") or os.getenv("SPACE_AUTHOR_NAME"):
        return "huggingface"
    return "local"


# ============================================================================
# Path Configuration
# ============================================================================

ENVIRONMENT = detect_environment()

# Project root detection
if ENVIRONMENT == "huggingface":
    # HuggingFace Space: app runs from /home/user/app
    PROJECT_ROOT = Path("/home/user/app")
else:
    # Local: calculate from this file's location
    # config.py is at: code/cube3d/config.py
    # So PROJECT_ROOT = ../../.. from here
    PROJECT_ROOT = Path(__file__).parent.parent.parent.resolve()

# Data directory
DATA_DIR = PROJECT_ROOT / "data"

# Subdirectories
CAR_1K_DIR = DATA_DIR / "car_1k"
CAR_DATA_DIR = DATA_DIR / "1313个筛选车结构和对照渲染图"

# HuggingFace model cache directory
# CRITICAL: Must match where preload_from_hub downloads models
if ENVIRONMENT == "huggingface":
    # HuggingFace Spaces: Use HF_HUB_CACHE (matches preload_from_hub behavior)
    # preload_from_hub ALWAYS downloads to ~/.cache/huggingface/hub regardless of HF_HOME
    # See: https://huggingface.co/docs/hub/spaces-config-reference
    HF_CACHE_DIR = os.getenv(
        "HF_HUB_CACHE",
        os.path.expanduser("~/.cache/huggingface/hub")
    )
    print(f"✅ [Config] HuggingFace cache directory: {HF_CACHE_DIR}")

else:
    # Local development: use standard user cache
    HF_CACHE_DIR = os.path.expanduser("~/.cache/huggingface")
    try:
        os.makedirs(HF_CACHE_DIR, exist_ok=True)
        print(f"[Config] Local HuggingFace cache directory: {HF_CACHE_DIR}")
    except (PermissionError, OSError) as e:
        # Fallback to temp directory
        HF_CACHE_DIR = os.path.join(tempfile.gettempdir(), "huggingface")
        os.makedirs(HF_CACHE_DIR, exist_ok=True)
        print(f"⚠️  [Config] Using temp directory due to permission error: {HF_CACHE_DIR}")


# ============================================================================
# Label Mapping Paths
# ============================================================================

LABEL_MAPPINGS: Dict[str, Dict[str, Path]] = {
    "subset_self": {
        "forward": CAR_1K_DIR / "subset_self" / "label_mapping.json",
        "inverse": CAR_1K_DIR / "subset_self" / "label_inverse_mapping.json",
    },
    "subset_1k": {
        "forward": CAR_1K_DIR / "subset_1k" / "label_mapping_merge.json",
        "inverse": CAR_1K_DIR / "subset_1k" / "label_inverse_mapping_merge.json",
    },
}

# Runtime-generated mapping cache (for HuggingFace Space with storage limits)
_RUNTIME_MAPPING_CACHE: Dict[str, Tuple[str, str]] = {}


# ============================================================================
# Helper Functions
# ============================================================================

def generate_label_mappings_from_ldr(ldr_dir: Path, mapping_type: str = "subset_1k") -> Tuple[str, str]:
    """
    Generate label mappings by scanning LDR files at runtime

    This is a fallback for HuggingFace Spaces where storage limits prevent
    pre-uploading large mapping files. Mappings are cached in memory.

    Args:
        ldr_dir: Directory containing LDR files
        mapping_type: Type of mapping to generate

    Returns:
        Tuple of (forward_mapping_path, inverse_mapping_path) in /tmp
    """
    print(f"🔧 Generating label mappings from LDR files in {ldr_dir}...")

    # Check cache first
    if mapping_type in _RUNTIME_MAPPING_CACHE:
        print(f"✅ Using cached mappings for {mapping_type}")
        return _RUNTIME_MAPPING_CACHE[mapping_type]

    # Scan LDR files
    label_mapping = {}  # part_name -> ID
    label_inverse_mapping = {}  # ID -> part_name
    label_counter = 0

    ldr_files = list(ldr_dir.glob("**/*.ldr"))
    print(f"📂 Found {len(ldr_files)} LDR files to process")

    for ldr_file in ldr_files:
        try:
            with open(ldr_file, 'r', encoding='utf-8', errors='ignore') as f:
                for line in f:
                    if line.startswith('1'):  # Part data line
                        parts = line.split()
                        if len(parts) < 15:
                            continue

                        # Extract part identifier (lowercase, starting digits)
                        filename = parts[14].lower()
                        match = re.match(r'^\d+', filename)
                        part_identifier = match.group() if match else filename

                        if part_identifier not in label_mapping:
                            label_mapping[part_identifier] = label_counter
                            label_inverse_mapping[label_counter] = part_identifier
                            label_counter += 1
        except Exception as e:
            print(f"⚠️  Error processing {ldr_file}: {e}")
            continue

    print(f"✅ Generated {len(label_mapping)} unique part mappings")

    # Save to /tmp directory
    tmp_dir = Path(tempfile.gettempdir()) / "lego_mappings" / mapping_type
    tmp_dir.mkdir(parents=True, exist_ok=True)

    forward_path = tmp_dir / "label_mapping_merge.json"
    inverse_path = tmp_dir / "label_inverse_mapping_merge.json"

    with open(forward_path, 'w', encoding='utf-8') as f:
        json.dump(label_mapping, f, ensure_ascii=False, indent=2)

    # Convert int keys to str keys for JSON
    inverse_str_keys = {str(k): v for k, v in label_inverse_mapping.items()}
    with open(inverse_path, 'w', encoding='utf-8') as f:
        json.dump(inverse_str_keys, f, ensure_ascii=False, indent=2)

    print(f"💾 Saved mappings to:")
    print(f"   {forward_path}")
    print(f"   {inverse_path}")

    # Cache the paths
    result = (str(forward_path), str(inverse_path))
    _RUNTIME_MAPPING_CACHE[mapping_type] = result

    return result


def get_mapping_paths(mapping_type: str = "subset_1k") -> Tuple[str, str]:
    """
    Get label mapping file paths for a given mapping type

    Automatically generates mappings from LDR files if not found.

    Args:
        mapping_type: Either "subset_self" or "subset_1k"

    Returns:
        Tuple of (forward_mapping_path, inverse_mapping_path) as strings

    Raises:
        ValueError: If mapping_type is invalid
    """
    if mapping_type not in LABEL_MAPPINGS:
        raise ValueError(
            f"Invalid mapping_type: {mapping_type}. "
            f"Must be one of: {list(LABEL_MAPPINGS.keys())}"
        )

    forward_path = LABEL_MAPPINGS[mapping_type]["forward"]
    inverse_path = LABEL_MAPPINGS[mapping_type]["inverse"]

    # Diagnostic logging for HF Spaces debugging
    print(f"🔍 [DEBUG] get_mapping_paths() called for: {mapping_type}")
    print(f"   PROJECT_ROOT: {PROJECT_ROOT}")
    print(f"   Forward path: {forward_path}")
    print(f"   Inverse path: {inverse_path}")
    print(f"   Forward exists: {forward_path.exists()}")
    print(f"   Inverse exists: {inverse_path.exists()}")

    # Check if files exist
    if forward_path.exists() and inverse_path.exists():
        print(f"   ✅ Both files exist, returning paths")
        return str(forward_path), str(inverse_path)

    # Files don't exist - generate from LDR files as fallback
    print(f"⚠️  Label mapping files not found for {mapping_type}")
    print(f"   Missing: {forward_path}")
    print(f"   Missing: {inverse_path}")
    print(f"🔄 Generating label mappings from LDR files (this may take 1-2 minutes)...")

    # Determine LDR directory to scan
    if mapping_type == "subset_1k":
        ldr_dir = CAR_DATA_DIR / "ldr"
        if not ldr_dir.exists():
            ldr_dir = CAR_DATA_DIR  # Try parent directory
    else:
        ldr_dir = CAR_1K_DIR / mapping_type

    if not ldr_dir.exists():
        raise FileNotFoundError(
            f"Cannot generate mappings: LDR directory not found: {ldr_dir}\n"
            f"Please ensure LDR files are available."
        )

    return generate_label_mappings_from_ldr(ldr_dir, mapping_type)


def create_default_mappings(mapping_type: str = "subset_1k") -> Tuple[Dict, Dict]:
    """
    Create minimal default label mappings if files are missing

    This is a fallback for development/testing. Production should have real files.

    Args:
        mapping_type: Mapping type identifier

    Returns:
        Tuple of (label_mapping, label_inverse_mapping) dictionaries
    """
    print(f"⚠️  WARNING: Creating default empty mappings for {mapping_type}")
    print("   This is for fallback only. Production should have real mapping files.")

    # Minimal mapping structure
    label_mapping = {}
    label_inverse_mapping = {}

    return label_mapping, label_inverse_mapping


def load_mappings_safe(mapping_type: str = "subset_1k") -> Tuple[Dict, Dict]:
    """
    Safely load label mappings with fallback

    Attempts to load from files, falls back to defaults if missing.

    Args:
        mapping_type: Either "subset_self" or "subset_1k"

    Returns:
        Tuple of (label_mapping, label_inverse_mapping) dictionaries
    """
    try:
        forward_path, inverse_path = get_mapping_paths(mapping_type)

        with open(forward_path, 'r', encoding='utf-8') as f:
            label_mapping = json.load(f)

        with open(inverse_path, 'r', encoding='utf-8') as f:
            label_inverse_mapping = json.load(f)

        return label_mapping, label_inverse_mapping

    except FileNotFoundError as e:
        print(f"⚠️  {e}")
        return create_default_mappings(mapping_type)


# ============================================================================
# Debug Information
# ============================================================================

def print_config_info():
    """Print current configuration for debugging"""
    print("=" * 60)
    print("Configuration Information")
    print("=" * 60)
    print(f"Environment: {ENVIRONMENT}")
    print(f"Project Root: {PROJECT_ROOT}")
    print(f"Data Directory: {DATA_DIR}")
    print(f"Data Dir Exists: {DATA_DIR.exists()}")
    print("\nLabel Mapping Paths:")
    for mapping_type, paths in LABEL_MAPPINGS.items():
        print(f"\n  {mapping_type}:")
        for key, path in paths.items():
            exists = "✅" if path.exists() else "❌"
            print(f"    {key}: {exists} {path}")
    print("=" * 60)


# ============================================================================
# Module Test
# ============================================================================

if __name__ == "__main__":
    print_config_info()

    # Test loading mappings
    print("\n\nTesting mapping load:")
    try:
        forward, inverse = get_mapping_paths("subset_1k")
        print(f"✅ subset_1k paths retrieved successfully")
        print(f"   Forward: {forward}")
        print(f"   Inverse: {inverse}")
    except Exception as e:
        print(f"❌ Error: {e}")

    try:
        forward, inverse = get_mapping_paths("subset_self")
        print(f"✅ subset_self paths retrieved successfully")
        print(f"   Forward: {forward}")
        print(f"   Inverse: {inverse}")
    except Exception as e:
        print(f"❌ Error: {e}")