vdoc_rag / app /visual_highlight.py
aditya9128
Initial commit: VDoc-RAG - Intelligent Document Q&A with RAG
4e3cee0
import os
import uuid
import json
import ast
from pdf2image import convert_from_path
from PIL import Image, ImageDraw
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def load_calibration(config_path="highlight_calibration.json"):
"""Load calibration values from JSON or fallback to defaults."""
if os.path.exists(config_path):
with open(config_path, "r") as f:
calib = json.load(f)
print(f"✅ Loaded calibration: {calib}")
return calib
else:
print("⚠️ No calibration file found. Using defaults.")
return {"x_offset": 0, "x_scale": 1.0, "y_offset": 0, "y_scale": 1.0}
def render_highlighted_pages(pdf_path, hits, output_dir=None, dpi=150):
"""
Render PDF pages as images and highlight bounding boxes with calibration applied.
Crops the output image tightly around highlighted area (+20 px padding).
"""
if output_dir is None:
output_dir = os.path.join(BASE_DIR, "highlighted")
os.makedirs(output_dir, exist_ok=True)
calib = load_calibration()
X_OFFSET = calib.get("x_offset", 0)
X_SCALE = calib.get("x_scale", 1.0)
Y_OFFSET = calib.get("y_offset", 0)
Y_SCALE = calib.get("y_scale", 1.0)
# Clean previous outputs
for old in os.listdir(output_dir):
try:
os.remove(os.path.join(output_dir, old))
except Exception:
pass
hits = hits[:1]
pages_to_render = sorted({h["metadata"]["page"] for h in hits})
pdf_images = convert_from_path(pdf_path, dpi=dpi)
result_paths = []
for page_num in pages_to_render:
page_index = page_num - 1
img = pdf_images[page_index].convert("RGBA")
w_img, h_img = img.size
overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
draw = ImageDraw.Draw(overlay)
page_bboxes = []
for h in hits:
meta = h.get("metadata", {})
if meta.get("page") != page_num:
continue
bbox = meta.get("bbox")
# Debug raw bbox
print(f"[DEBUG] page {page_num} raw bbox type: {type(bbox)} value: {bbox}")
# Safe parsing: accept list/tuple or stringified list
try:
if isinstance(bbox, str):
bbox = ast.literal_eval(bbox)
if not bbox or not isinstance(bbox, (list, tuple)) or len(bbox) != 4:
print(f"[WARN] Invalid bbox for page {page_num}: {bbox}")
continue
# Apply calibration
x0, y0, x1, y1 = [float(v) for v in bbox]
x0 = x0 * X_SCALE + X_OFFSET
x1 = x1 * X_SCALE + X_OFFSET
y0 = y0 * Y_SCALE + Y_OFFSET
y1 = y1 * Y_SCALE + Y_OFFSET
except Exception as e:
print(f"[ERROR] Failed to parse bbox for page {page_num}: {bbox} -> {e}")
continue
left, top = max(0, min(x0, x1)), max(0, min(y0, y1))
right, bottom = min(w_img, max(x0, x1)), min(h_img, max(y0, y1))
if right <= left or bottom <= top:
continue
page_bboxes.append((left, top, right, bottom))
draw.rectangle(
[left, top, right, bottom],
outline=(255, 0, 0),
width=4,
fill=(255, 0, 0, 100)
)
# Merge highlights with image
highlighted = Image.alpha_composite(img, overlay)
# --- 🧭 Crop around highlighted region (+20px padding) ---
if page_bboxes:
min_x = min(b[0] for b in page_bboxes)
min_y = min(b[1] for b in page_bboxes)
max_x = max(b[2] for b in page_bboxes)
max_y = max(b[3] for b in page_bboxes)
pad = 100
crop_box = (
max(0, int(min_x - pad)),
max(0, int(min_y - pad)),
int(min(max_x + pad, w_img)),
int(min(max_y + pad, h_img)),
)
cropped = highlighted.crop(crop_box)
else:
cropped = highlighted # fallback if no bbox
# Log how many boxes were drawn
print(f"✅ Drew {len(page_bboxes)} boxes on page {page_num}")
out_path = os.path.join(output_dir, f"highlight_page{page_num}_{uuid.uuid4().hex}.png")
cropped.convert("RGB").save(out_path)
result_paths.append(out_path)
print(f"✅ Highlighted and cropped page {page_num}: {out_path}")
return result_paths
# Example usage
if __name__ == "__main__":
hits = [
{"metadata": {"page": 2, "bbox": [87, 222, 592, 250], "type": "text"}},
]
render_highlighted_pages("samples/vdoc_rag_test.pdf", hits)