James Edmunds commited on
Commit
5d9132a
·
1 Parent(s): 1b1c18c

more embeddings path tweaks

Browse files
scripts/browse_hf_data.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ from pathlib import Path
4
+
5
+ def list_files_in_directory(directory):
6
+ """List all files in the given directory and its subdirectories."""
7
+ files = []
8
+ for root, dirs, filenames in os.walk(directory):
9
+ for filename in filenames:
10
+ files.append(os.path.join(root, filename))
11
+ return files
12
+
13
+ def main():
14
+ st.title("Embeddings File Browser")
15
+
16
+ # Directory to browse
17
+ directory = "/data" # Persistent storage directory
18
+ st.write(f"Browsing directory: {directory}")
19
+
20
+ # List files
21
+ files = list_files_in_directory(directory)
22
+
23
+ if files:
24
+ st.write("Files found:")
25
+ for file in files:
26
+ st.write(file)
27
+ else:
28
+ st.write("No files found in the directory.")
29
+
30
+ if __name__ == "__main__":
31
+ main()
src/generator/generator.py CHANGED
@@ -82,7 +82,7 @@ class LyricGenerator:
82
  if Settings.is_huggingface():
83
  print("HuggingFace environment detected, setting up embeddings...")
84
  self._setup_embeddings_from_hf()
85
- chroma_dir = Path("/data/chroma") # Assuming /data is the root for persistent storage
86
  else:
87
  print("Local environment detected")
88
  print(f"Base directory: {Settings.BASE_DIR}")
@@ -130,8 +130,7 @@ class LyricGenerator:
130
  for item in chroma_dir.glob('**/*'):
131
  print(f" {item}")
132
  if item.is_file():
133
- print(
134
- f" Size: {item.stat().st_size / (1024*1024):.2f} MB")
135
 
136
  raise RuntimeError(
137
  "Chroma DB is empty. Please ensure embeddings "
 
82
  if Settings.is_huggingface():
83
  print("HuggingFace environment detected, setting up embeddings...")
84
  self._setup_embeddings_from_hf()
85
+ chroma_dir = Path("/data/processed/embeddings/chroma")
86
  else:
87
  print("Local environment detected")
88
  print(f"Base directory: {Settings.BASE_DIR}")
 
130
  for item in chroma_dir.glob('**/*'):
131
  print(f" {item}")
132
  if item.is_file():
133
+ print(f" Size: {item.stat().st_size / (1024*1024):.2f} MB")
 
134
 
135
  raise RuntimeError(
136
  "Chroma DB is empty. Please ensure embeddings "