James Edmunds commited on
Commit
1da8f51
·
1 Parent(s): ac8d6e6

is this it?

Browse files
Files changed (2) hide show
  1. config/settings.py +20 -19
  2. src/generator/generator.py +4 -3
config/settings.py CHANGED
@@ -4,38 +4,38 @@ from dotenv import load_dotenv
4
 
5
  load_dotenv()
6
 
 
7
  class Settings:
8
  # Base Paths
9
  BASE_DIR = Path(__file__).parent.parent
10
-
11
  # Deployment Mode
12
  DEPLOYMENT_MODE = os.getenv('DEPLOYMENT_MODE', 'local')
13
-
14
  # API Keys
15
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
16
  HF_TOKEN = os.getenv("HF_TOKEN")
17
-
18
  # HuggingFace Settings
19
  HF_SPACE = "SongLift/LyrGen2"
20
  HF_DATASET = "SongLift/LyrGen2_DB"
21
-
22
  # Local Settings
23
  LYRICS_DIR = BASE_DIR / "data" / "raw" / "lyrics"
24
  EMBEDDINGS_DIR = BASE_DIR / "data" / "processed" / "embeddings"
25
 
26
-
27
  # Model Settings
28
  EMBEDDING_MODEL = "text-embedding-ada-002"
29
  LLM_MODEL = "gpt-4"
30
-
31
  # ChromaDB Settings
32
  CHROMA_COLLECTION_NAME = "lyrics_v1"
33
-
34
  @classmethod
35
  def is_huggingface(cls) -> bool:
36
  """Check if running in HuggingFace environment"""
37
  return cls.DEPLOYMENT_MODE == 'huggingface'
38
-
39
  @classmethod
40
  def get_embeddings_path(cls) -> Path:
41
  """Get the base embeddings path"""
@@ -43,31 +43,31 @@ class Settings:
43
  # In HuggingFace, first check the dataset cache
44
  data_dir = Path("/data")
45
  print(f"\nSearching for embeddings in: {data_dir}")
46
-
47
  # Look for the most recent snapshot directory containing chroma
48
  snapshot_pattern = "**/datasets--*--*/snapshots/*/chroma"
49
  print(f"Using search pattern: {snapshot_pattern}")
50
-
51
  snapshots = list(data_dir.glob(snapshot_pattern))
52
  print(f"Found {len(snapshots)} potential snapshot directories:")
53
  for snap in snapshots:
54
  print(f"- {snap} (Modified: {snap.stat().st_mtime})")
55
-
56
  if snapshots:
57
  chosen_path = max(snapshots, key=lambda p: p.stat().st_mtime)
58
  print(f"Selected most recent: {chosen_path}")
59
  return chosen_path
60
-
61
  print("No snapshots found, using fallback location")
62
  fallback_path = data_dir / "processed/embeddings"
63
  print(f"Fallback path: {fallback_path}")
64
  return fallback_path
65
-
66
  # Local: Use project-relative path
67
  embeddings_path = cls.BASE_DIR / "data" / "processed" / "embeddings"
68
  print(f"Local embeddings path: {embeddings_path}")
69
  return embeddings_path
70
-
71
  @classmethod
72
  def get_chroma_path(cls) -> Path:
73
  """Get the Chroma DB path"""
@@ -76,14 +76,14 @@ class Settings:
76
  return cls.get_embeddings_path()
77
  # Local: Use subdirectory
78
  return cls.get_embeddings_path() / "chroma"
79
-
80
  @classmethod
81
  def ensure_embedding_paths(cls) -> None:
82
  """Ensure all embedding-related directories exist"""
83
  if not cls.is_huggingface(): # Only create directories locally
84
  cls.get_embeddings_path().mkdir(parents=True, exist_ok=True)
85
  cls.get_chroma_path().mkdir(parents=True, exist_ok=True)
86
-
87
  @classmethod
88
  def get_chroma_settings(cls) -> dict:
89
  """Get ChromaDB settings"""
@@ -93,11 +93,12 @@ class Settings:
93
  "persist_directory": str(chroma_path),
94
  "collection_name": cls.CHROMA_COLLECTION_NAME
95
  }
96
-
97
  @classmethod
98
  def debug_openai_key(cls) -> None:
99
  """Print debug information about OpenAI API key"""
100
  if cls.OPENAI_API_KEY:
101
- print(f"OpenAI API Key is set. Length: {len(cls.OPENAI_API_KEY)} characters.")
 
102
  else:
103
- print("OpenAI API Key is NOT set.")
 
4
 
5
  load_dotenv()
6
 
7
+
8
  class Settings:
9
  # Base Paths
10
  BASE_DIR = Path(__file__).parent.parent
11
+
12
  # Deployment Mode
13
  DEPLOYMENT_MODE = os.getenv('DEPLOYMENT_MODE', 'local')
14
+
15
  # API Keys
16
  OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
17
  HF_TOKEN = os.getenv("HF_TOKEN")
18
+
19
  # HuggingFace Settings
20
  HF_SPACE = "SongLift/LyrGen2"
21
  HF_DATASET = "SongLift/LyrGen2_DB"
22
+
23
  # Local Settings
24
  LYRICS_DIR = BASE_DIR / "data" / "raw" / "lyrics"
25
  EMBEDDINGS_DIR = BASE_DIR / "data" / "processed" / "embeddings"
26
 
 
27
  # Model Settings
28
  EMBEDDING_MODEL = "text-embedding-ada-002"
29
  LLM_MODEL = "gpt-4"
30
+
31
  # ChromaDB Settings
32
  CHROMA_COLLECTION_NAME = "lyrics_v1"
33
+
34
  @classmethod
35
  def is_huggingface(cls) -> bool:
36
  """Check if running in HuggingFace environment"""
37
  return cls.DEPLOYMENT_MODE == 'huggingface'
38
+
39
  @classmethod
40
  def get_embeddings_path(cls) -> Path:
41
  """Get the base embeddings path"""
 
43
  # In HuggingFace, first check the dataset cache
44
  data_dir = Path("/data")
45
  print(f"\nSearching for embeddings in: {data_dir}")
46
+
47
  # Look for the most recent snapshot directory containing chroma
48
  snapshot_pattern = "**/datasets--*--*/snapshots/*/chroma"
49
  print(f"Using search pattern: {snapshot_pattern}")
50
+
51
  snapshots = list(data_dir.glob(snapshot_pattern))
52
  print(f"Found {len(snapshots)} potential snapshot directories:")
53
  for snap in snapshots:
54
  print(f"- {snap} (Modified: {snap.stat().st_mtime})")
55
+
56
  if snapshots:
57
  chosen_path = max(snapshots, key=lambda p: p.stat().st_mtime)
58
  print(f"Selected most recent: {chosen_path}")
59
  return chosen_path
60
+
61
  print("No snapshots found, using fallback location")
62
  fallback_path = data_dir / "processed/embeddings"
63
  print(f"Fallback path: {fallback_path}")
64
  return fallback_path
65
+
66
  # Local: Use project-relative path
67
  embeddings_path = cls.BASE_DIR / "data" / "processed" / "embeddings"
68
  print(f"Local embeddings path: {embeddings_path}")
69
  return embeddings_path
70
+
71
  @classmethod
72
  def get_chroma_path(cls) -> Path:
73
  """Get the Chroma DB path"""
 
76
  return cls.get_embeddings_path()
77
  # Local: Use subdirectory
78
  return cls.get_embeddings_path() / "chroma"
79
+
80
  @classmethod
81
  def ensure_embedding_paths(cls) -> None:
82
  """Ensure all embedding-related directories exist"""
83
  if not cls.is_huggingface(): # Only create directories locally
84
  cls.get_embeddings_path().mkdir(parents=True, exist_ok=True)
85
  cls.get_chroma_path().mkdir(parents=True, exist_ok=True)
86
+
87
  @classmethod
88
  def get_chroma_settings(cls) -> dict:
89
  """Get ChromaDB settings"""
 
93
  "persist_directory": str(chroma_path),
94
  "collection_name": cls.CHROMA_COLLECTION_NAME
95
  }
96
+
97
  @classmethod
98
  def debug_openai_key(cls) -> None:
99
  """Print debug information about OpenAI API key"""
100
  if cls.OPENAI_API_KEY:
101
+ print(
102
+ f"OpenAI API Key is set. Length: {len(cls.OPENAI_API_KEY)} characters.")
103
  else:
104
+ print("OpenAI API Key is NOT set.")
src/generator/generator.py CHANGED
@@ -53,10 +53,11 @@ class LyricGenerator:
53
  def _create_embeddings_with_retry(self):
54
  """Create OpenAI embeddings with retry logic"""
55
  try:
 
56
  return OpenAIEmbeddings(
57
- openai_api_key=Settings.OPENAI_API_KEY,
58
- timeout=60, # Increase timeout
59
- openai_proxy=None # Try with no proxy
60
  )
61
  except Exception as e:
62
  print(f"Error creating embeddings: {type(e).__name__}: {str(e)}")
 
53
  def _create_embeddings_with_retry(self):
54
  """Create OpenAI embeddings with retry logic"""
55
  try:
56
+ api_key = Settings.OPENAI_API_KEY.strip() # Clean the key
57
  return OpenAIEmbeddings(
58
+ openai_api_key=api_key,
59
+ timeout=60,
60
+ openai_proxy=None
61
  )
62
  except Exception as e:
63
  print(f"Error creating embeddings: {type(e).__name__}: {str(e)}")