import os from pathlib import Path from dotenv import load_dotenv load_dotenv() class Settings: # Base Paths BASE_DIR = Path(__file__).parent.parent # Deployment Mode DEPLOYMENT_MODE = os.getenv('DEPLOYMENT_MODE', 'local') # API Keys OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") HF_TOKEN = os.getenv("HF_TOKEN") # HuggingFace Settings HF_SPACE = "SongLift/LyrGen2" HF_DATASET = "SongLift/LyrGen2_DB" # Local Settings LYRICS_DIR = BASE_DIR / "data" / "raw" / "lyrics" EMBEDDINGS_DIR = BASE_DIR / "data" / "processed" / "embeddings" # Model Settings EMBEDDING_MODEL = "text-embedding-ada-002" LLM_MODEL = os.getenv("LLM_MODEL", "gpt-5.1") # ChromaDB Settings CHROMA_COLLECTION_NAME = "lyrics_v1" @classmethod def is_huggingface(cls) -> bool: """Check if running in HuggingFace environment""" return cls.DEPLOYMENT_MODE == 'huggingface' @classmethod def get_embeddings_path(cls) -> Path: """Get the base embeddings path""" if cls.is_huggingface(): # In HuggingFace, first check the dataset cache data_dir = Path("/data") print(f"\nSearching for embeddings in: {data_dir}") # Look for the most recent snapshot directory containing chroma snapshot_pattern = "**/datasets--*--*/snapshots/*/chroma" print(f"Using search pattern: {snapshot_pattern}") snapshots = list(data_dir.glob(snapshot_pattern)) print(f"Found {len(snapshots)} potential snapshot directories:") for snap in snapshots: print(f"- {snap} (Modified: {snap.stat().st_mtime})") if snapshots: chosen_path = max(snapshots, key=lambda p: p.stat().st_mtime) print(f"Selected most recent: {chosen_path}") return chosen_path print("No snapshots found, using fallback location") fallback_path = data_dir / "processed/embeddings" print(f"Fallback path: {fallback_path}") return fallback_path # Local: Use project-relative path embeddings_path = cls.BASE_DIR / "data" / "processed" / "embeddings" print(f"Local embeddings path: {embeddings_path}") return embeddings_path @classmethod def get_chroma_path(cls) -> Path: """Get the Chroma DB path""" if cls.is_huggingface(): # In HuggingFace, the chroma path is the embeddings path itself return cls.get_embeddings_path() # Local: Use subdirectory return cls.get_embeddings_path() / "chroma" @classmethod def ensure_embedding_paths(cls) -> None: """Ensure all embedding-related directories exist""" if not cls.is_huggingface(): # Only create directories locally cls.get_embeddings_path().mkdir(parents=True, exist_ok=True) cls.get_chroma_path().mkdir(parents=True, exist_ok=True) @classmethod def get_chroma_settings(cls) -> dict: """Get ChromaDB settings""" chroma_path = cls.get_chroma_path() return { "anonymized_telemetry": False, "persist_directory": str(chroma_path), "collection_name": cls.CHROMA_COLLECTION_NAME } @classmethod def debug_openai_key(cls) -> None: """Print debug information about OpenAI API key""" if cls.OPENAI_API_KEY: print( f"OpenAI API Key is set. Length: {len(cls.OPENAI_API_KEY)} characters.") else: print("OpenAI API Key is NOT set.")