Spaces:
Running
Running
File size: 612 Bytes
49cf970 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 | from llama_index.readers.docling import DoclingReader
import os
from pathlib import Path
def check_metadata():
pdf_path = "nvidia_q4_fy24.pdf"
if not os.path.exists(pdf_path):
print("PDF not found.")
return
reader = DoclingReader()
documents = reader.load_data(file_path=Path(pdf_path))
print(f"Loaded {len(documents)} documents.")
for i, doc in enumerate(documents[:2]): # Just check first two
print(f"Doc {i} Metadata: {doc.metadata}")
# print(f"Doc {i} Text Preview: {doc.text[:200]}...")
if __name__ == "__main__":
check_metadata()
|