Spaces:
Sleeping
Sleeping
| import pymupdf | |
| import os | |
| def create_test_pdf(filename="test_document.pdf"): | |
| doc = pymupdf.open() | |
| page = doc.new_page() | |
| # Title | |
| page.insert_text((50, 50), "PDF Structure Inspector Test Document", fontsize=24) | |
| # Normal paragraph | |
| text = "This is a normal paragraph of text to test basic extraction." | |
| page.insert_text((50, 80), text, fontsize=12) | |
| # Math-like text to test math detection | |
| math_text = "Here is some math: f(x) = sum(x_i) for i in N. Also x^2 + y^2 = r^2." | |
| page.insert_text((50, 110), math_text, fontsize=12) | |
| # Text that might look like a header/column | |
| page.insert_text((50, 150), "Column 1", fontsize=14) | |
| page.insert_text((300, 150), "Column 2", fontsize=14) | |
| page.insert_text((50, 170), "Left side content.", fontsize=12) | |
| page.insert_text((300, 170), "Right side content.", fontsize=12) | |
| # Add a drawing (vector) | |
| page.draw_rect((50, 200, 150, 250), color=(0, 0, 1)) | |
| page.insert_text((55, 225), "Vector Box", fontsize=10, color=(1, 1, 1)) | |
| # Add a second page for batch testing | |
| page2 = doc.new_page() | |
| page2.insert_text((50, 50), "Page 2 - Batch Analysis Test", fontsize=24) | |
| page2.insert_text((50, 80), "Just another page to verify multi-page processing.", fontsize=12) | |
| doc.save(filename) | |
| print(f"Created {filename}") | |
| if __name__ == "__main__": | |
| create_test_pdf() | |