Spaces:
Sleeping
Sleeping
| import sys | |
| import fitz # PyMuPDF | |
| import math | |
| def main(): | |
| if len(sys.argv) < 3: | |
| print("Usage: python find_by_font.py <input.pdf> <font_name> [font_size]") | |
| sys.exit(1) | |
| pdf_path = sys.argv[1] | |
| target_font = sys.argv[2] | |
| target_size = float(sys.argv[3]) if len(sys.argv) > 3 else None | |
| doc = fitz.open(pdf_path) | |
| print(f"Searching for:") | |
| print(f" Font: '{target_font}'") | |
| print(f" Size: {target_size if target_size else 'ANY'}") | |
| print("-" * 60) | |
| print(f"{'PAGE':<6} {'SIZE':<8} {'TEXT'}") | |
| print("-" * 60) | |
| for page in doc: | |
| blocks = page.get_text("dict")["blocks"] | |
| for b in blocks: | |
| if "lines" not in b: continue | |
| for l in b["lines"]: | |
| for s in l["spans"]: | |
| # Check Font Name (partial match or exact?) | |
| # Let's do partial match to be friendly | |
| if target_font.lower() in s["font"].lower(): | |
| # Check Size (with tolerance) | |
| if target_size: | |
| if not math.isclose(s["size"], target_size, rel_tol=1e-2): | |
| continue | |
| print(f"{page.number + 1:<6} {s['size']:<8.2f} '{s['text']}'") | |
| if __name__ == "__main__": | |
| main() | |