import os import json import shutil import fitz # PyMuPDF import io from PIL import Image, ImageDraw, ImageFont import config def setup_debug_dir(): if os.path.exists(config.DEBUG_DIR): shutil.rmtree(config.DEBUG_DIR) os.makedirs(config.DEBUG_DIR) print(f"Debug directory cleared: {config.DEBUG_DIR}/") def save_debug_image(image_bytes, name): path = os.path.join(config.DEBUG_DIR, f"{name}.jpg") with open(path, "wb") as f: f.write(image_bytes) return path def save_debug_json(data, name): path = os.path.join(config.DEBUG_DIR, f"{name}.json") with open(path, "w") as f: json.dump(data, f, indent=2) def normalize_bbox_to_top_left(bbox, page_height): """Convert PDF Bottom-Left coords to Image Top-Left coords.""" return { "x0": bbox["x0"], "y0": page_height - bbox["y1"], "x1": bbox["x1"], "y1": page_height - bbox["y0"] } def get_words_from_page(page): return page.get_text("words") def calculate_smart_anchors(field_bbox, words, page_height): norm_bbox = normalize_bbox_to_top_left(field_bbox, page_height) fx0, fy0, fx1, fy1 = norm_bbox["x0"], norm_bbox["y0"], norm_bbox["x1"], norm_bbox["y1"] SEARCH_RADIUS = 150 Y_ALIGNMENT_TOLERANCE = 12 closest_left = [] closest_right = [] closest_above = [] for w in words: wx0, wy0, wx1, wy1, text = w[0], w[1], w[2], w[3], w[4] w_center_y = (wy0 + wy1) / 2 f_center_y = (fy0 + fy1) / 2 # Left if wx1 < fx0 and abs(w_center_y - f_center_y) < Y_ALIGNMENT_TOLERANCE: if fx0 - wx1 < SEARCH_RADIUS: closest_left.append((fx0 - wx1, text)) # Right if wx0 > fx1 and abs(w_center_y - f_center_y) < Y_ALIGNMENT_TOLERANCE: if wx0 - fx1 < SEARCH_RADIUS: closest_right.append((wx0 - fx1, text)) # Above overlap = max(0, min(fx1, wx1) - max(fx0, wx0)) if wy1 < fy0 and overlap > 0: if fy0 - wy1 < SEARCH_RADIUS: closest_above.append((fy0 - wy1, text)) closest_left.sort(key=lambda x: x[0]) closest_right.sort(key=lambda x: x[0]) closest_above.sort(key=lambda x: x[0]) def join_text(candidates): return " ".join([c[1] for c in candidates[:4]]) return { "left": join_text(closest_left), "right": join_text(closest_right), "above": join_text(closest_above) } def render_hollow_debug_image(doc, page_num, fields): if page_num >= len(doc): return None page = doc[page_num] zoom = 2.0 pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom)) img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) draw = ImageDraw.Draw(img) scale_x = pix.width / page.rect.width scale_y = pix.height / page.rect.height page_h = page.rect.height try: font = ImageFont.truetype("arial.ttf", 30) except: font = ImageFont.load_default() for f in fields: vis_id = f["temp_id"] bbox = f["bbox"] x0_bl = bbox["x0"] * scale_x y0_bl = (page_h - bbox["y1"]) * scale_y x1_bl = bbox["x1"] * scale_x y1_bl = (page_h - bbox["y0"]) * scale_y draw.rectangle([x0_bl, y0_bl, x1_bl, y1_bl], outline=config.BOX_COLOR, width=config.BOX_WIDTH) badge_w, badge_h = 50, 35 bx0, by0 = x0_bl - 10, y0_bl - badge_h - 2 draw.rectangle([bx0, by0, bx0 + badge_w, by0 + badge_h], fill=config.BADGE_BG) draw.text((bx0 + 10, by0 + 5), str(vis_id), fill=config.BADGE_COLOR, font=font) buffer = io.BytesIO() img.save(buffer, format="JPEG", quality=85) return buffer.getvalue()