File size: 1,000 Bytes
046e3b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import sys
import fitz  # PyMuPDF

def main():
    if len(sys.argv) < 3:
        print("Usage: python inspect_bytes.py <input.pdf> \"<search_string>\"")
        sys.exit(1)

    pdf_path = sys.argv[1]
    search_str = sys.argv[2]
    
    doc = fitz.open(pdf_path)
    
    print(f"Searching for string containing: '{search_str}'")
    print("-" * 60)
    
    for page in doc:
        blocks = page.get_text("dict")["blocks"]
        for b in blocks:
            if "lines" not in b: continue
            for l in b["lines"]:
                for s in l["spans"]:
                    text = s["text"]
                    if search_str in text:
                        print(f"Page {page.number + 1}:")
                        print(f"  Visual: '{text}'")
                        print(f"  Raw:    {ascii(text)}")
                        print(f"  Hex:    { [hex(ord(c)) for c in text] }")
                        print("-" * 20)

if __name__ == "__main__":
    main()