| import os |
| from pathlib import Path |
| from dotenv import load_dotenv |
|
|
| load_dotenv() |
|
|
|
|
| class Settings: |
| |
| BASE_DIR = Path(__file__).parent.parent |
|
|
| |
| DEPLOYMENT_MODE = os.getenv('DEPLOYMENT_MODE', 'local') |
|
|
| |
| OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") |
| HF_TOKEN = os.getenv("HF_TOKEN") |
|
|
| |
| HF_SPACE = "SongLift/LyrGen2" |
| HF_DATASET = "SongLift/LyrGen2_DB" |
|
|
| |
| LYRICS_DIR = BASE_DIR / "data" / "raw" / "lyrics" |
| EMBEDDINGS_DIR = BASE_DIR / "data" / "processed" / "embeddings" |
|
|
| |
| EMBEDDING_MODEL = "text-embedding-ada-002" |
| LLM_MODEL = os.getenv("LLM_MODEL", "gpt-5.1") |
|
|
| |
| CHROMA_COLLECTION_NAME = "lyrics_v1" |
|
|
| @classmethod |
| def is_huggingface(cls) -> bool: |
| """Check if running in HuggingFace environment""" |
| return cls.DEPLOYMENT_MODE == 'huggingface' |
|
|
| @classmethod |
| def get_embeddings_path(cls) -> Path: |
| """Get the base embeddings path""" |
| if cls.is_huggingface(): |
| |
| data_dir = Path("/data") |
| print(f"\nSearching for embeddings in: {data_dir}") |
|
|
| |
| snapshot_pattern = "**/datasets--*--*/snapshots/*/chroma" |
| print(f"Using search pattern: {snapshot_pattern}") |
|
|
| snapshots = list(data_dir.glob(snapshot_pattern)) |
| print(f"Found {len(snapshots)} potential snapshot directories:") |
| for snap in snapshots: |
| print(f"- {snap} (Modified: {snap.stat().st_mtime})") |
|
|
| if snapshots: |
| chosen_path = max(snapshots, key=lambda p: p.stat().st_mtime) |
| print(f"Selected most recent: {chosen_path}") |
| return chosen_path |
|
|
| print("No snapshots found, using fallback location") |
| fallback_path = data_dir / "processed/embeddings" |
| print(f"Fallback path: {fallback_path}") |
| return fallback_path |
|
|
| |
| embeddings_path = cls.BASE_DIR / "data" / "processed" / "embeddings" |
| print(f"Local embeddings path: {embeddings_path}") |
| return embeddings_path |
|
|
| @classmethod |
| def get_chroma_path(cls) -> Path: |
| """Get the Chroma DB path""" |
| if cls.is_huggingface(): |
| |
| return cls.get_embeddings_path() |
| |
| return cls.get_embeddings_path() / "chroma" |
|
|
| @classmethod |
| def ensure_embedding_paths(cls) -> None: |
| """Ensure all embedding-related directories exist""" |
| if not cls.is_huggingface(): |
| cls.get_embeddings_path().mkdir(parents=True, exist_ok=True) |
| cls.get_chroma_path().mkdir(parents=True, exist_ok=True) |
|
|
| @classmethod |
| def get_chroma_settings(cls) -> dict: |
| """Get ChromaDB settings""" |
| chroma_path = cls.get_chroma_path() |
| return { |
| "anonymized_telemetry": False, |
| "persist_directory": str(chroma_path), |
| "collection_name": cls.CHROMA_COLLECTION_NAME |
| } |
|
|
| @classmethod |
| def debug_openai_key(cls) -> None: |
| """Print debug information about OpenAI API key""" |
| if cls.OPENAI_API_KEY: |
| print( |
| f"OpenAI API Key is set. Length: {len(cls.OPENAI_API_KEY)} characters.") |
| else: |
| print("OpenAI API Key is NOT set.") |
|
|