James Edmunds commited on
Commit ·
a24c09f
1
Parent(s): 53b2b1d
Trying to fix this HF dataset fiasco
Browse files
src/generator/generator.py
CHANGED
|
@@ -47,12 +47,12 @@ class LyricGenerator:
|
|
| 47 |
"""Download and setup embeddings from HuggingFace dataset"""
|
| 48 |
print("\n=== Setting up embeddings from HuggingFace dataset ===")
|
| 49 |
try:
|
| 50 |
-
# Load
|
| 51 |
dataset = load_dataset("SongLift/LyrGen2_DB", split='train', cache_dir="/data")
|
| 52 |
print("Dataset loaded successfully into cache directory.")
|
| 53 |
|
| 54 |
# Verify the contents of the cache directory
|
| 55 |
-
chroma_dir = Path("/data")
|
| 56 |
print(f"Persistent storage contents: {list(chroma_dir.glob('**/*'))}")
|
| 57 |
|
| 58 |
except Exception as e:
|
|
@@ -66,17 +66,16 @@ class LyricGenerator:
|
|
| 66 |
try:
|
| 67 |
print("\n=== Loading Embeddings ===")
|
| 68 |
|
| 69 |
-
#
|
| 70 |
if Settings.is_huggingface():
|
| 71 |
print("HuggingFace environment detected, setting up embeddings...")
|
| 72 |
self._setup_embeddings_from_hf()
|
|
|
|
| 73 |
else:
|
| 74 |
print("Local environment detected")
|
| 75 |
print(f"Base directory: {Settings.BASE_DIR}")
|
|
|
|
| 76 |
|
| 77 |
-
print(f"\nLoading vector store from: {self.embeddings_dir}")
|
| 78 |
-
# Check Chroma directory structure
|
| 79 |
-
chroma_dir = Path("/data/chroma")
|
| 80 |
print(f"Checking Chroma directory: {chroma_dir}")
|
| 81 |
print(f"Absolute path: {chroma_dir.absolute()}")
|
| 82 |
|
|
|
|
| 47 |
"""Download and setup embeddings from HuggingFace dataset"""
|
| 48 |
print("\n=== Setting up embeddings from HuggingFace dataset ===")
|
| 49 |
try:
|
| 50 |
+
# Load the latest version of the dataset into the desired directory
|
| 51 |
dataset = load_dataset("SongLift/LyrGen2_DB", split='train', cache_dir="/data")
|
| 52 |
print("Dataset loaded successfully into cache directory.")
|
| 53 |
|
| 54 |
# Verify the contents of the cache directory
|
| 55 |
+
chroma_dir = Path("/data/chroma")
|
| 56 |
print(f"Persistent storage contents: {list(chroma_dir.glob('**/*'))}")
|
| 57 |
|
| 58 |
except Exception as e:
|
|
|
|
| 66 |
try:
|
| 67 |
print("\n=== Loading Embeddings ===")
|
| 68 |
|
| 69 |
+
# Determine the environment and set paths accordingly
|
| 70 |
if Settings.is_huggingface():
|
| 71 |
print("HuggingFace environment detected, setting up embeddings...")
|
| 72 |
self._setup_embeddings_from_hf()
|
| 73 |
+
chroma_dir = Path("/data/chroma") # Assuming /data is the root for persistent storage
|
| 74 |
else:
|
| 75 |
print("Local environment detected")
|
| 76 |
print(f"Base directory: {Settings.BASE_DIR}")
|
| 77 |
+
chroma_dir = Path("/data/processed/embeddings/chroma") # Local environment path
|
| 78 |
|
|
|
|
|
|
|
|
|
|
| 79 |
print(f"Checking Chroma directory: {chroma_dir}")
|
| 80 |
print(f"Absolute path: {chroma_dir.absolute()}")
|
| 81 |
|