Spaces:

chiruu12
/

Digital-Image-Processing-OCR

Sleeping

Digital-Image-Processing-OCR / src /temp.py

chiruu12

Initial commit of clean OCR application

9543569 7 months ago

2.97 kB

	import torch
	import cv2
	import numpy as np
	import os
	import shutil
	from pdf2image import convert_from_path
	from tqdm import tqdm

	from src import utils
	from config import settings
	from src.model_loader import load_all_models

	PDF_PATH = "sample_documents/books/Applied-Machine-Learning-and-AI-for-Engineers.pdf"
	PAGE_TO_DEBUG = 2
	DEBUG_OUTPUT_DIR = "debug_output/"


	def main():
	print("--- Starting Pipeline Debugging Session ---")

	if os.path.exists(DEBUG_OUTPUT_DIR): shutil.rmtree(DEBUG_OUTPUT_DIR)
	os.makedirs(DEBUG_OUTPUT_DIR)
	print(f"Debug artifacts will be saved in: '{DEBUG_OUTPUT_DIR}'")

	try:
	models = load_all_models()
	except FileNotFoundError as e:
	print(f"\nFATAL ERROR: {e}")
	return

	try:
	print(f"\nConverting page {PAGE_TO_DEBUG} of the PDF to an image...")
	poppler_path = os.path.join(settings.POPPLER_PATH, "bin") if settings.POPPLER_PATH else None
	pil_image = \
	convert_from_path(PDF_PATH, first_page=PAGE_TO_DEBUG, last_page=PAGE_TO_DEBUG, poppler_path=poppler_path)[0]
	image_bgr = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
	except Exception as e:
	print(f"PDF conversion failed: {e}")
	return

	gray_image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
	_, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
	bounding_boxes = utils.segment_characters(binary_image)

	segmentation_viz_image = image_bgr.copy()
	for x, y, w, h in bounding_boxes:
	cv2.rectangle(segmentation_viz_image, (x, y), (x + w, y + h), (0, 0, 255), 2)
	viz_path = os.path.join(DEBUG_OUTPUT_DIR, "_SEGMENTATION_RESULT.png")
	cv2.imwrite(viz_path, segmentation_viz_image)
	print(f"\nSAVED VISUAL EVIDENCE: Segmentation result saved to '{viz_path}'")

	print("\n--- Character-by-Character Recognition Log ---")

	for i, box in enumerate(tqdm(bounding_boxes, desc="Debugging Pipeline")):
	x, y, w, h = box

	char_crop = binary_image[y:y + h, x:x + w]
	crop_path = os.path.join(DEBUG_OUTPUT_DIR, f"contour_{i:04d}_input.png")
	cv2.imwrite(crop_path, char_crop)

	char_tensor = utils.prepare_char_for_model(char_crop)

	with torch.no_grad():
	triage_output = models['triage'](char_tensor)
	_, triage_idx = torch.max(triage_output, 1)
	triage_decision = settings.TRIAGE_OUTPUT_MAP[triage_idx.item()]

	expert_model = models[triage_decision]
	expert_output = expert_model(char_tensor)
	_, expert_idx = torch.max(expert_output, 1)

	character_map = settings.EXPERT_CHARACTER_MAPS[triage_decision]
	final_prediction = character_map.get(expert_idx.item(), '?')

	print(f"Contour #{i:03d} \| Triage Decision: {triage_decision:<10} \| Final Prediction: '{final_prediction}'")

	print("\n--- Debugging Session Complete ---")


	if __name__ == "__main__":
	main()