Spaces:
Sleeping
Sleeping
File size: 1,408 Bytes
e6ea8c6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
import pymupdf
import os
def create_test_pdf(filename="test_document.pdf"):
doc = pymupdf.open()
page = doc.new_page()
# Title
page.insert_text((50, 50), "PDF Structure Inspector Test Document", fontsize=24)
# Normal paragraph
text = "This is a normal paragraph of text to test basic extraction."
page.insert_text((50, 80), text, fontsize=12)
# Math-like text to test math detection
math_text = "Here is some math: f(x) = sum(x_i) for i in N. Also x^2 + y^2 = r^2."
page.insert_text((50, 110), math_text, fontsize=12)
# Text that might look like a header/column
page.insert_text((50, 150), "Column 1", fontsize=14)
page.insert_text((300, 150), "Column 2", fontsize=14)
page.insert_text((50, 170), "Left side content.", fontsize=12)
page.insert_text((300, 170), "Right side content.", fontsize=12)
# Add a drawing (vector)
page.draw_rect((50, 200, 150, 250), color=(0, 0, 1))
page.insert_text((55, 225), "Vector Box", fontsize=10, color=(1, 1, 1))
# Add a second page for batch testing
page2 = doc.new_page()
page2.insert_text((50, 50), "Page 2 - Batch Analysis Test", fontsize=24)
page2.insert_text((50, 80), "Just another page to verify multi-page processing.", fontsize=12)
doc.save(filename)
print(f"Created {filename}")
if __name__ == "__main__":
create_test_pdf()
|