File size: 1,408 Bytes
e6ea8c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41

import pymupdf
import os

def create_test_pdf(filename="test_document.pdf"):
    doc = pymupdf.open()
    page = doc.new_page()
    
    # Title
    page.insert_text((50, 50), "PDF Structure Inspector Test Document", fontsize=24)
    
    # Normal paragraph
    text = "This is a normal paragraph of text to test basic extraction."
    page.insert_text((50, 80), text, fontsize=12)
    
    # Math-like text to test math detection
    math_text = "Here is some math: f(x) = sum(x_i) for i in N. Also x^2 + y^2 = r^2."
    page.insert_text((50, 110), math_text, fontsize=12)
    
    # Text that might look like a header/column
    page.insert_text((50, 150), "Column 1", fontsize=14)
    page.insert_text((300, 150), "Column 2", fontsize=14)
    
    page.insert_text((50, 170), "Left side content.", fontsize=12)
    page.insert_text((300, 170), "Right side content.", fontsize=12)
    
    # Add a drawing (vector)
    page.draw_rect((50, 200, 150, 250), color=(0, 0, 1))
    page.insert_text((55, 225), "Vector Box", fontsize=10, color=(1, 1, 1))

    # Add a second page for batch testing
    page2 = doc.new_page()
    page2.insert_text((50, 50), "Page 2 - Batch Analysis Test", fontsize=24)
    page2.insert_text((50, 80), "Just another page to verify multi-page processing.", fontsize=12)
    
    doc.save(filename)
    print(f"Created {filename}")

if __name__ == "__main__":
    create_test_pdf()