trygithubactions / utils /data_loader.py
subashpoudel's picture
Added formatted dataset
61a4f47
raw
history blame
292 Bytes
from datasets import load_dataset
print("Loading dataset and indexing FAISS...") # Optional: for debugging
dataset = load_dataset("subashdvorak/tiktok-formatted-story", revision="embedded")
data = dataset['train'].add_faiss_index('embeddings')
def load_influencer_data():
return data