LyrGen2 / scripts /browse_hf_data.py
James Edmunds
Working local. Added new fixes for HF emebeddings
686446c
import os
import streamlit as st
from pathlib import Path
from datasets import load_dataset
from dotenv import load_dotenv
def list_files_in_directory(directory):
"""List all files in the given directory and its subdirectories."""
files = []
for root, dirs, filenames in os.walk(directory):
for filename in filenames:
files.append(os.path.join(root, filename))
return files
def main():
st.title("Embeddings File Browser")
# Load environment variables
load_dotenv()
# Retrieve the Hugging Face token
hf_token = os.getenv("HF_TOKEN")
if not hf_token:
st.error("HF_TOKEN not found in environment variables.")
return
# Load the dataset using the token
try:
dataset = load_dataset("SongLift/LyrGen2_DB", use_auth_token=hf_token)
st.write("Dataset loaded successfully.")
except Exception as e:
st.error(f"Error loading dataset: {str(e)}")
return
# Directory to browse
directory = "/data" # Persistent storage directory
st.write(f"Browsing directory: {directory}")
# List files
files = list_files_in_directory(directory)
if files:
st.write("Files found:")
for file in files:
st.write(file)
else:
st.write("No files found in the directory.")
if __name__ == "__main__":
main()