Spaces:
Sleeping
Sleeping
| import sys | |
| import fitz # PyMuPDF | |
| def main(): | |
| if len(sys.argv) < 3: | |
| print("Usage: python inspect_bytes.py <input.pdf> \"<search_string>\"") | |
| sys.exit(1) | |
| pdf_path = sys.argv[1] | |
| search_str = sys.argv[2] | |
| doc = fitz.open(pdf_path) | |
| print(f"Searching for string containing: '{search_str}'") | |
| print("-" * 60) | |
| for page in doc: | |
| blocks = page.get_text("dict")["blocks"] | |
| for b in blocks: | |
| if "lines" not in b: continue | |
| for l in b["lines"]: | |
| for s in l["spans"]: | |
| text = s["text"] | |
| if search_str in text: | |
| print(f"Page {page.number + 1}:") | |
| print(f" Visual: '{text}'") | |
| print(f" Raw: {ascii(text)}") | |
| print(f" Hex: { [hex(ord(c)) for c in text] }") | |
| print("-" * 20) | |
| if __name__ == "__main__": | |
| main() | |