James Edmunds commited on
Commit ·
1da8f51
1
Parent(s): ac8d6e6
is this it?
Browse files- config/settings.py +20 -19
- src/generator/generator.py +4 -3
config/settings.py
CHANGED
|
@@ -4,38 +4,38 @@ from dotenv import load_dotenv
|
|
| 4 |
|
| 5 |
load_dotenv()
|
| 6 |
|
|
|
|
| 7 |
class Settings:
|
| 8 |
# Base Paths
|
| 9 |
BASE_DIR = Path(__file__).parent.parent
|
| 10 |
-
|
| 11 |
# Deployment Mode
|
| 12 |
DEPLOYMENT_MODE = os.getenv('DEPLOYMENT_MODE', 'local')
|
| 13 |
-
|
| 14 |
# API Keys
|
| 15 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 17 |
-
|
| 18 |
# HuggingFace Settings
|
| 19 |
HF_SPACE = "SongLift/LyrGen2"
|
| 20 |
HF_DATASET = "SongLift/LyrGen2_DB"
|
| 21 |
-
|
| 22 |
# Local Settings
|
| 23 |
LYRICS_DIR = BASE_DIR / "data" / "raw" / "lyrics"
|
| 24 |
EMBEDDINGS_DIR = BASE_DIR / "data" / "processed" / "embeddings"
|
| 25 |
|
| 26 |
-
|
| 27 |
# Model Settings
|
| 28 |
EMBEDDING_MODEL = "text-embedding-ada-002"
|
| 29 |
LLM_MODEL = "gpt-4"
|
| 30 |
-
|
| 31 |
# ChromaDB Settings
|
| 32 |
CHROMA_COLLECTION_NAME = "lyrics_v1"
|
| 33 |
-
|
| 34 |
@classmethod
|
| 35 |
def is_huggingface(cls) -> bool:
|
| 36 |
"""Check if running in HuggingFace environment"""
|
| 37 |
return cls.DEPLOYMENT_MODE == 'huggingface'
|
| 38 |
-
|
| 39 |
@classmethod
|
| 40 |
def get_embeddings_path(cls) -> Path:
|
| 41 |
"""Get the base embeddings path"""
|
|
@@ -43,31 +43,31 @@ class Settings:
|
|
| 43 |
# In HuggingFace, first check the dataset cache
|
| 44 |
data_dir = Path("/data")
|
| 45 |
print(f"\nSearching for embeddings in: {data_dir}")
|
| 46 |
-
|
| 47 |
# Look for the most recent snapshot directory containing chroma
|
| 48 |
snapshot_pattern = "**/datasets--*--*/snapshots/*/chroma"
|
| 49 |
print(f"Using search pattern: {snapshot_pattern}")
|
| 50 |
-
|
| 51 |
snapshots = list(data_dir.glob(snapshot_pattern))
|
| 52 |
print(f"Found {len(snapshots)} potential snapshot directories:")
|
| 53 |
for snap in snapshots:
|
| 54 |
print(f"- {snap} (Modified: {snap.stat().st_mtime})")
|
| 55 |
-
|
| 56 |
if snapshots:
|
| 57 |
chosen_path = max(snapshots, key=lambda p: p.stat().st_mtime)
|
| 58 |
print(f"Selected most recent: {chosen_path}")
|
| 59 |
return chosen_path
|
| 60 |
-
|
| 61 |
print("No snapshots found, using fallback location")
|
| 62 |
fallback_path = data_dir / "processed/embeddings"
|
| 63 |
print(f"Fallback path: {fallback_path}")
|
| 64 |
return fallback_path
|
| 65 |
-
|
| 66 |
# Local: Use project-relative path
|
| 67 |
embeddings_path = cls.BASE_DIR / "data" / "processed" / "embeddings"
|
| 68 |
print(f"Local embeddings path: {embeddings_path}")
|
| 69 |
return embeddings_path
|
| 70 |
-
|
| 71 |
@classmethod
|
| 72 |
def get_chroma_path(cls) -> Path:
|
| 73 |
"""Get the Chroma DB path"""
|
|
@@ -76,14 +76,14 @@ class Settings:
|
|
| 76 |
return cls.get_embeddings_path()
|
| 77 |
# Local: Use subdirectory
|
| 78 |
return cls.get_embeddings_path() / "chroma"
|
| 79 |
-
|
| 80 |
@classmethod
|
| 81 |
def ensure_embedding_paths(cls) -> None:
|
| 82 |
"""Ensure all embedding-related directories exist"""
|
| 83 |
if not cls.is_huggingface(): # Only create directories locally
|
| 84 |
cls.get_embeddings_path().mkdir(parents=True, exist_ok=True)
|
| 85 |
cls.get_chroma_path().mkdir(parents=True, exist_ok=True)
|
| 86 |
-
|
| 87 |
@classmethod
|
| 88 |
def get_chroma_settings(cls) -> dict:
|
| 89 |
"""Get ChromaDB settings"""
|
|
@@ -93,11 +93,12 @@ class Settings:
|
|
| 93 |
"persist_directory": str(chroma_path),
|
| 94 |
"collection_name": cls.CHROMA_COLLECTION_NAME
|
| 95 |
}
|
| 96 |
-
|
| 97 |
@classmethod
|
| 98 |
def debug_openai_key(cls) -> None:
|
| 99 |
"""Print debug information about OpenAI API key"""
|
| 100 |
if cls.OPENAI_API_KEY:
|
| 101 |
-
print(
|
|
|
|
| 102 |
else:
|
| 103 |
-
print("OpenAI API Key is NOT set.")
|
|
|
|
| 4 |
|
| 5 |
load_dotenv()
|
| 6 |
|
| 7 |
+
|
| 8 |
class Settings:
|
| 9 |
# Base Paths
|
| 10 |
BASE_DIR = Path(__file__).parent.parent
|
| 11 |
+
|
| 12 |
# Deployment Mode
|
| 13 |
DEPLOYMENT_MODE = os.getenv('DEPLOYMENT_MODE', 'local')
|
| 14 |
+
|
| 15 |
# API Keys
|
| 16 |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
| 17 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
| 18 |
+
|
| 19 |
# HuggingFace Settings
|
| 20 |
HF_SPACE = "SongLift/LyrGen2"
|
| 21 |
HF_DATASET = "SongLift/LyrGen2_DB"
|
| 22 |
+
|
| 23 |
# Local Settings
|
| 24 |
LYRICS_DIR = BASE_DIR / "data" / "raw" / "lyrics"
|
| 25 |
EMBEDDINGS_DIR = BASE_DIR / "data" / "processed" / "embeddings"
|
| 26 |
|
|
|
|
| 27 |
# Model Settings
|
| 28 |
EMBEDDING_MODEL = "text-embedding-ada-002"
|
| 29 |
LLM_MODEL = "gpt-4"
|
| 30 |
+
|
| 31 |
# ChromaDB Settings
|
| 32 |
CHROMA_COLLECTION_NAME = "lyrics_v1"
|
| 33 |
+
|
| 34 |
@classmethod
|
| 35 |
def is_huggingface(cls) -> bool:
|
| 36 |
"""Check if running in HuggingFace environment"""
|
| 37 |
return cls.DEPLOYMENT_MODE == 'huggingface'
|
| 38 |
+
|
| 39 |
@classmethod
|
| 40 |
def get_embeddings_path(cls) -> Path:
|
| 41 |
"""Get the base embeddings path"""
|
|
|
|
| 43 |
# In HuggingFace, first check the dataset cache
|
| 44 |
data_dir = Path("/data")
|
| 45 |
print(f"\nSearching for embeddings in: {data_dir}")
|
| 46 |
+
|
| 47 |
# Look for the most recent snapshot directory containing chroma
|
| 48 |
snapshot_pattern = "**/datasets--*--*/snapshots/*/chroma"
|
| 49 |
print(f"Using search pattern: {snapshot_pattern}")
|
| 50 |
+
|
| 51 |
snapshots = list(data_dir.glob(snapshot_pattern))
|
| 52 |
print(f"Found {len(snapshots)} potential snapshot directories:")
|
| 53 |
for snap in snapshots:
|
| 54 |
print(f"- {snap} (Modified: {snap.stat().st_mtime})")
|
| 55 |
+
|
| 56 |
if snapshots:
|
| 57 |
chosen_path = max(snapshots, key=lambda p: p.stat().st_mtime)
|
| 58 |
print(f"Selected most recent: {chosen_path}")
|
| 59 |
return chosen_path
|
| 60 |
+
|
| 61 |
print("No snapshots found, using fallback location")
|
| 62 |
fallback_path = data_dir / "processed/embeddings"
|
| 63 |
print(f"Fallback path: {fallback_path}")
|
| 64 |
return fallback_path
|
| 65 |
+
|
| 66 |
# Local: Use project-relative path
|
| 67 |
embeddings_path = cls.BASE_DIR / "data" / "processed" / "embeddings"
|
| 68 |
print(f"Local embeddings path: {embeddings_path}")
|
| 69 |
return embeddings_path
|
| 70 |
+
|
| 71 |
@classmethod
|
| 72 |
def get_chroma_path(cls) -> Path:
|
| 73 |
"""Get the Chroma DB path"""
|
|
|
|
| 76 |
return cls.get_embeddings_path()
|
| 77 |
# Local: Use subdirectory
|
| 78 |
return cls.get_embeddings_path() / "chroma"
|
| 79 |
+
|
| 80 |
@classmethod
|
| 81 |
def ensure_embedding_paths(cls) -> None:
|
| 82 |
"""Ensure all embedding-related directories exist"""
|
| 83 |
if not cls.is_huggingface(): # Only create directories locally
|
| 84 |
cls.get_embeddings_path().mkdir(parents=True, exist_ok=True)
|
| 85 |
cls.get_chroma_path().mkdir(parents=True, exist_ok=True)
|
| 86 |
+
|
| 87 |
@classmethod
|
| 88 |
def get_chroma_settings(cls) -> dict:
|
| 89 |
"""Get ChromaDB settings"""
|
|
|
|
| 93 |
"persist_directory": str(chroma_path),
|
| 94 |
"collection_name": cls.CHROMA_COLLECTION_NAME
|
| 95 |
}
|
| 96 |
+
|
| 97 |
@classmethod
|
| 98 |
def debug_openai_key(cls) -> None:
|
| 99 |
"""Print debug information about OpenAI API key"""
|
| 100 |
if cls.OPENAI_API_KEY:
|
| 101 |
+
print(
|
| 102 |
+
f"OpenAI API Key is set. Length: {len(cls.OPENAI_API_KEY)} characters.")
|
| 103 |
else:
|
| 104 |
+
print("OpenAI API Key is NOT set.")
|
src/generator/generator.py
CHANGED
|
@@ -53,10 +53,11 @@ class LyricGenerator:
|
|
| 53 |
def _create_embeddings_with_retry(self):
|
| 54 |
"""Create OpenAI embeddings with retry logic"""
|
| 55 |
try:
|
|
|
|
| 56 |
return OpenAIEmbeddings(
|
| 57 |
-
openai_api_key=
|
| 58 |
-
timeout=60,
|
| 59 |
-
openai_proxy=None
|
| 60 |
)
|
| 61 |
except Exception as e:
|
| 62 |
print(f"Error creating embeddings: {type(e).__name__}: {str(e)}")
|
|
|
|
| 53 |
def _create_embeddings_with_retry(self):
|
| 54 |
"""Create OpenAI embeddings with retry logic"""
|
| 55 |
try:
|
| 56 |
+
api_key = Settings.OPENAI_API_KEY.strip() # Clean the key
|
| 57 |
return OpenAIEmbeddings(
|
| 58 |
+
openai_api_key=api_key,
|
| 59 |
+
timeout=60,
|
| 60 |
+
openai_proxy=None
|
| 61 |
)
|
| 62 |
except Exception as e:
|
| 63 |
print(f"Error creating embeddings: {type(e).__name__}: {str(e)}")
|