LyrGen2 / config /settings.py
James-Edmunds's picture
Upload folder using huggingface_hub
fe7fbaa verified
import os
from pathlib import Path
from dotenv import load_dotenv
load_dotenv()
class Settings:
# Base Paths
BASE_DIR = Path(__file__).parent.parent
# Deployment Mode
DEPLOYMENT_MODE = os.getenv('DEPLOYMENT_MODE', 'local')
# API Keys
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
HF_TOKEN = os.getenv("HF_TOKEN")
# HuggingFace Settings
HF_SPACE = "SongLift/LyrGen2"
HF_DATASET = "SongLift/LyrGen2_DB"
# Local Settings
LYRICS_DIR = BASE_DIR / "data" / "raw" / "lyrics"
EMBEDDINGS_DIR = BASE_DIR / "data" / "processed" / "embeddings"
# Model Settings
EMBEDDING_MODEL = "text-embedding-ada-002"
LLM_MODEL = os.getenv("LLM_MODEL", "gpt-5.1")
# ChromaDB Settings
CHROMA_COLLECTION_NAME = "lyrics_v1"
@classmethod
def is_huggingface(cls) -> bool:
"""Check if running in HuggingFace environment"""
return cls.DEPLOYMENT_MODE == 'huggingface'
@classmethod
def get_embeddings_path(cls) -> Path:
"""Get the base embeddings path"""
if cls.is_huggingface():
# In HuggingFace, first check the dataset cache
data_dir = Path("/data")
print(f"\nSearching for embeddings in: {data_dir}")
# Look for the most recent snapshot directory containing chroma
snapshot_pattern = "**/datasets--*--*/snapshots/*/chroma"
print(f"Using search pattern: {snapshot_pattern}")
snapshots = list(data_dir.glob(snapshot_pattern))
print(f"Found {len(snapshots)} potential snapshot directories:")
for snap in snapshots:
print(f"- {snap} (Modified: {snap.stat().st_mtime})")
if snapshots:
chosen_path = max(snapshots, key=lambda p: p.stat().st_mtime)
print(f"Selected most recent: {chosen_path}")
return chosen_path
print("No snapshots found, using fallback location")
fallback_path = data_dir / "processed/embeddings"
print(f"Fallback path: {fallback_path}")
return fallback_path
# Local: Use project-relative path
embeddings_path = cls.BASE_DIR / "data" / "processed" / "embeddings"
print(f"Local embeddings path: {embeddings_path}")
return embeddings_path
@classmethod
def get_chroma_path(cls) -> Path:
"""Get the Chroma DB path"""
if cls.is_huggingface():
# In HuggingFace, the chroma path is the embeddings path itself
return cls.get_embeddings_path()
# Local: Use subdirectory
return cls.get_embeddings_path() / "chroma"
@classmethod
def ensure_embedding_paths(cls) -> None:
"""Ensure all embedding-related directories exist"""
if not cls.is_huggingface(): # Only create directories locally
cls.get_embeddings_path().mkdir(parents=True, exist_ok=True)
cls.get_chroma_path().mkdir(parents=True, exist_ok=True)
@classmethod
def get_chroma_settings(cls) -> dict:
"""Get ChromaDB settings"""
chroma_path = cls.get_chroma_path()
return {
"anonymized_telemetry": False,
"persist_directory": str(chroma_path),
"collection_name": cls.CHROMA_COLLECTION_NAME
}
@classmethod
def debug_openai_key(cls) -> None:
"""Print debug information about OpenAI API key"""
if cls.OPENAI_API_KEY:
print(
f"OpenAI API Key is set. Length: {len(cls.OPENAI_API_KEY)} characters.")
else:
print("OpenAI API Key is NOT set.")