trygithubactions / utils /data_loader.py
subashpoudel's picture
Next commit after refinements
a6ebaaf
raw
history blame
295 Bytes
from datasets import load_dataset
print("Loading dataset and indexing FAISS...") # Optional: for debugging
dataset = load_dataset("subashdvorak/tiktok-formatted-story-v2", revision="embedded")
data = dataset['train'].add_faiss_index('embeddings')
def load_influencer_data():
return data