File size: 2,368 Bytes
046e3b8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import sys
import fitz  # PyMuPDF

def main():
    if len(sys.argv) < 3:
        print("Usage: python find_preceding.py <input.pdf> \"<anchor_text>\"")
        sys.exit(1)

    pdf_path = sys.argv[1]
    anchor = sys.argv[2]
    
    doc = fitz.open(pdf_path)
    prev_span = None
    prev_page_num = -1
    
    found = False
    
    print(f"Searching for anchor containing: '{anchor}'")
    
    for page in doc:
        blocks = page.get_text("dict")["blocks"]
        for b in blocks:
            if "lines" not in b: continue
            for l in b["lines"]:
                for s in l["spans"]:
                    current_text = s["text"]
                    
                    if anchor in current_text:
                        print(f"\n[!] MATCH FOUND on Page {page.number + 1}")
                        print(f"    Anchor Span Text: '{current_text.strip()}'")
                        
                        if prev_span:
                            print(f"\n--- PRECEDING ELEMENT DETAILS ---")
                            print(f"Text:   '{prev_span['text']}'")
                            print(f"Page:   {prev_page_num}")
                            print(f"Font:   {prev_span['font']}")
                            print(f"Size:   {prev_span['size']:.4f}")
                            print(f"Color:  {hex(prev_span['color'])}")
                            print(f"BBox:   {prev_span['bbox']}")
                            print(f"Flags:  {prev_span['flags']}")
                            
                            # Helper for recipe creation
                            print(f"\n--- SUGGESTED RECIPE FILTER ---")
                            print(f"[[heading]]")
                            print(f"font.name = \"{prev_span['font']}\"")
                            print(f"font.size = {prev_span['size']}")
                        else:
                            print("\n[!] No preceding text element found (this might be the first element).")
                        
                        found = True
                    
                    # Update tracker
                    prev_span = s
                    prev_page_num = page.number + 1
    
    if not found:
        print(f"\nAnchor text '{anchor}' not found in document.")

if __name__ == "__main__":
    main()