James Edmunds commited on
Commit
a24c09f
·
1 Parent(s): 53b2b1d

Trying to fix this HF dataset fiasco

Browse files
Files changed (1) hide show
  1. src/generator/generator.py +5 -6
src/generator/generator.py CHANGED
@@ -47,12 +47,12 @@ class LyricGenerator:
47
  """Download and setup embeddings from HuggingFace dataset"""
48
  print("\n=== Setting up embeddings from HuggingFace dataset ===")
49
  try:
50
- # Load dataset directly into the desired directory
51
  dataset = load_dataset("SongLift/LyrGen2_DB", split='train', cache_dir="/data")
52
  print("Dataset loaded successfully into cache directory.")
53
 
54
  # Verify the contents of the cache directory
55
- chroma_dir = Path("/data")
56
  print(f"Persistent storage contents: {list(chroma_dir.glob('**/*'))}")
57
 
58
  except Exception as e:
@@ -66,17 +66,16 @@ class LyricGenerator:
66
  try:
67
  print("\n=== Loading Embeddings ===")
68
 
69
- # If in HuggingFace environment, ensure embeddings are set up
70
  if Settings.is_huggingface():
71
  print("HuggingFace environment detected, setting up embeddings...")
72
  self._setup_embeddings_from_hf()
 
73
  else:
74
  print("Local environment detected")
75
  print(f"Base directory: {Settings.BASE_DIR}")
 
76
 
77
- print(f"\nLoading vector store from: {self.embeddings_dir}")
78
- # Check Chroma directory structure
79
- chroma_dir = Path("/data/chroma")
80
  print(f"Checking Chroma directory: {chroma_dir}")
81
  print(f"Absolute path: {chroma_dir.absolute()}")
82
 
 
47
  """Download and setup embeddings from HuggingFace dataset"""
48
  print("\n=== Setting up embeddings from HuggingFace dataset ===")
49
  try:
50
+ # Load the latest version of the dataset into the desired directory
51
  dataset = load_dataset("SongLift/LyrGen2_DB", split='train', cache_dir="/data")
52
  print("Dataset loaded successfully into cache directory.")
53
 
54
  # Verify the contents of the cache directory
55
+ chroma_dir = Path("/data/chroma")
56
  print(f"Persistent storage contents: {list(chroma_dir.glob('**/*'))}")
57
 
58
  except Exception as e:
 
66
  try:
67
  print("\n=== Loading Embeddings ===")
68
 
69
+ # Determine the environment and set paths accordingly
70
  if Settings.is_huggingface():
71
  print("HuggingFace environment detected, setting up embeddings...")
72
  self._setup_embeddings_from_hf()
73
+ chroma_dir = Path("/data/chroma") # Assuming /data is the root for persistent storage
74
  else:
75
  print("Local environment detected")
76
  print(f"Base directory: {Settings.BASE_DIR}")
77
+ chroma_dir = Path("/data/processed/embeddings/chroma") # Local environment path
78
 
 
 
 
79
  print(f"Checking Chroma directory: {chroma_dir}")
80
  print(f"Absolute path: {chroma_dir.absolute()}")
81