Spaces:

tsalkar
/

field_semantic_mapping

Runtime error

field_semantic_mapping / utils_geometry.py

Tanishq Salkar

initial visual mapping code added to hf

db81e28 about 1 month ago

3.67 kB

	import os
	import json
	import shutil
	import fitz # PyMuPDF
	import io
	from PIL import Image, ImageDraw, ImageFont
	import config

	def setup_debug_dir():
	if os.path.exists(config.DEBUG_DIR):
	shutil.rmtree(config.DEBUG_DIR)
	os.makedirs(config.DEBUG_DIR)
	print(f"Debug directory cleared: {config.DEBUG_DIR}/")

	def save_debug_image(image_bytes, name):
	path = os.path.join(config.DEBUG_DIR, f"{name}.jpg")
	with open(path, "wb") as f:
	f.write(image_bytes)
	return path

	def save_debug_json(data, name):
	path = os.path.join(config.DEBUG_DIR, f"{name}.json")
	with open(path, "w") as f:
	json.dump(data, f, indent=2)

	def normalize_bbox_to_top_left(bbox, page_height):
	"""Convert PDF Bottom-Left coords to Image Top-Left coords."""
	return {
	"x0": bbox["x0"],
	"y0": page_height - bbox["y1"],
	"x1": bbox["x1"],
	"y1": page_height - bbox["y0"]
	}

	def get_words_from_page(page):
	return page.get_text("words")

	def calculate_smart_anchors(field_bbox, words, page_height):
	norm_bbox = normalize_bbox_to_top_left(field_bbox, page_height)
	fx0, fy0, fx1, fy1 = norm_bbox["x0"], norm_bbox["y0"], norm_bbox["x1"], norm_bbox["y1"]

	SEARCH_RADIUS = 150
	Y_ALIGNMENT_TOLERANCE = 12

	closest_left = []
	closest_right = []
	closest_above = []

	for w in words:
	wx0, wy0, wx1, wy1, text = w[0], w[1], w[2], w[3], w[4]
	w_center_y = (wy0 + wy1) / 2
	f_center_y = (fy0 + fy1) / 2

	# Left
	if wx1 < fx0 and abs(w_center_y - f_center_y) < Y_ALIGNMENT_TOLERANCE:
	if fx0 - wx1 < SEARCH_RADIUS: closest_left.append((fx0 - wx1, text))
	# Right
	if wx0 > fx1 and abs(w_center_y - f_center_y) < Y_ALIGNMENT_TOLERANCE:
	if wx0 - fx1 < SEARCH_RADIUS: closest_right.append((wx0 - fx1, text))
	# Above
	overlap = max(0, min(fx1, wx1) - max(fx0, wx0))
	if wy1 < fy0 and overlap > 0:
	if fy0 - wy1 < SEARCH_RADIUS: closest_above.append((fy0 - wy1, text))

	closest_left.sort(key=lambda x: x[0])
	closest_right.sort(key=lambda x: x[0])
	closest_above.sort(key=lambda x: x[0])

	def join_text(candidates): return " ".join([c[1] for c in candidates[:4]])

	return {
	"left": join_text(closest_left),
	"right": join_text(closest_right),
	"above": join_text(closest_above)
	}

	def render_hollow_debug_image(doc, page_num, fields):
	if page_num >= len(doc): return None
	page = doc[page_num]
	zoom = 2.0
	pix = page.get_pixmap(matrix=fitz.Matrix(zoom, zoom))
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	draw = ImageDraw.Draw(img)

	scale_x = pix.width / page.rect.width
	scale_y = pix.height / page.rect.height
	page_h = page.rect.height

	try: font = ImageFont.truetype("arial.ttf", 30)
	except: font = ImageFont.load_default()

	for f in fields:
	vis_id = f["temp_id"]
	bbox = f["bbox"]
	x0_bl = bbox["x0"] * scale_x
	y0_bl = (page_h - bbox["y1"]) * scale_y
	x1_bl = bbox["x1"] * scale_x
	y1_bl = (page_h - bbox["y0"]) * scale_y

	draw.rectangle([x0_bl, y0_bl, x1_bl, y1_bl], outline=config.BOX_COLOR, width=config.BOX_WIDTH)

	badge_w, badge_h = 50, 35
	bx0, by0 = x0_bl - 10, y0_bl - badge_h - 2
	draw.rectangle([bx0, by0, bx0 + badge_w, by0 + badge_h], fill=config.BADGE_BG)
	draw.text((bx0 + 10, by0 + 5), str(vis_id), fill=config.BADGE_COLOR, font=font)

	buffer = io.BytesIO()
	img.save(buffer, format="JPEG", quality=85)
	return buffer.getvalue()