File size: 1,406 Bytes
046e3b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import sys
import fitz  # PyMuPDF
import math

def main():
    if len(sys.argv) < 3:
        print("Usage: python find_by_font.py <input.pdf> <font_name> [font_size]")
        sys.exit(1)

    pdf_path = sys.argv[1]
    target_font = sys.argv[2]
    target_size = float(sys.argv[3]) if len(sys.argv) > 3 else None
    
    doc = fitz.open(pdf_path)
    
    print(f"Searching for:")
    print(f"  Font: '{target_font}'")
    print(f"  Size: {target_size if target_size else 'ANY'}")
    print("-" * 60)
    print(f"{'PAGE':<6} {'SIZE':<8} {'TEXT'}")
    print("-" * 60)

    for page in doc:
        blocks = page.get_text("dict")["blocks"]
        for b in blocks:
            if "lines" not in b: continue
            for l in b["lines"]:
                for s in l["spans"]:
                    # Check Font Name (partial match or exact?)
                    # Let's do partial match to be friendly
                    if target_font.lower() in s["font"].lower():
                        
                        # Check Size (with tolerance)
                        if target_size:
                            if not math.isclose(s["size"], target_size, rel_tol=1e-2):
                                continue
                        
                        print(f"{page.number + 1:<6} {s['size']:<8.2f} '{s['text']}'")

if __name__ == "__main__":
    main()