ORA / scripts /inspect_dataset.py
Abdalkaderdev's picture
Initial ORA deployment
5e0532d
from datasets import load_dataset
def inspect():
print("Loading dataset (streaming)...")
# Streaming to avoid downloading everything if it's huge
ds = load_dataset("bible-nlp/biblenlp-corpus", split="train", streaming=True, trust_remote_code=True)
print("Fetching first 5 examples...")
count = 0
for example in ds:
print(example)
count += 1
if count >= 5:
break
if __name__ == "__main__":
inspect()