handwritten-notes-ocr / src /test_dataset.py
lakshmi-charan's picture
Upload 15 files
2411029 verified
raw
history blame contribute delete
360 Bytes
from datasets import load_dataset
def main():
ds = load_dataset("Teklia/IAM-line")
print(ds)
sample = ds["train"][0]
print("Keys:", sample.keys())
print("Text:", sample["text"])
# image is a PIL object inside HF datasets
img = sample["image"]
print("Image size:", img.size)
if __name__ == "__main__":
main()