chiruu12
Initial commit of clean OCR application
9543569
Raw
History Blame Contribute Delete
2.97 kB
import torch
import cv2
import numpy as np
import os
import shutil
from pdf2image import convert_from_path
from tqdm import tqdm
from src import utils
from config import settings
from src.model_loader import load_all_models
PDF_PATH = "sample_documents/books/Applied-Machine-Learning-and-AI-for-Engineers.pdf"
PAGE_TO_DEBUG = 2
DEBUG_OUTPUT_DIR = "debug_output/"
def main():
print("--- Starting Pipeline Debugging Session ---")
if os.path.exists(DEBUG_OUTPUT_DIR): shutil.rmtree(DEBUG_OUTPUT_DIR)
os.makedirs(DEBUG_OUTPUT_DIR)
print(f"Debug artifacts will be saved in: '{DEBUG_OUTPUT_DIR}'")
try:
models = load_all_models()
except FileNotFoundError as e:
print(f"\nFATAL ERROR: {e}")
return
try:
print(f"\nConverting page {PAGE_TO_DEBUG} of the PDF to an image...")
poppler_path = os.path.join(settings.POPPLER_PATH, "bin") if settings.POPPLER_PATH else None
pil_image = \
convert_from_path(PDF_PATH, first_page=PAGE_TO_DEBUG, last_page=PAGE_TO_DEBUG, poppler_path=poppler_path)[0]
image_bgr = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
except Exception as e:
print(f"PDF conversion failed: {e}")
return
gray_image = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2GRAY)
_, binary_image = cv2.threshold(gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
bounding_boxes = utils.segment_characters(binary_image)
segmentation_viz_image = image_bgr.copy()
for x, y, w, h in bounding_boxes:
cv2.rectangle(segmentation_viz_image, (x, y), (x + w, y + h), (0, 0, 255), 2)
viz_path = os.path.join(DEBUG_OUTPUT_DIR, "_SEGMENTATION_RESULT.png")
cv2.imwrite(viz_path, segmentation_viz_image)
print(f"\nSAVED VISUAL EVIDENCE: Segmentation result saved to '{viz_path}'")
print("\n--- Character-by-Character Recognition Log ---")
for i, box in enumerate(tqdm(bounding_boxes, desc="Debugging Pipeline")):
x, y, w, h = box
char_crop = binary_image[y:y + h, x:x + w]
crop_path = os.path.join(DEBUG_OUTPUT_DIR, f"contour_{i:04d}_input.png")
cv2.imwrite(crop_path, char_crop)
char_tensor = utils.prepare_char_for_model(char_crop)
with torch.no_grad():
triage_output = models['triage'](char_tensor)
_, triage_idx = torch.max(triage_output, 1)
triage_decision = settings.TRIAGE_OUTPUT_MAP[triage_idx.item()]
expert_model = models[triage_decision]
expert_output = expert_model(char_tensor)
_, expert_idx = torch.max(expert_output, 1)
character_map = settings.EXPERT_CHARACTER_MAPS[triage_decision]
final_prediction = character_map.get(expert_idx.item(), '?')
print(f"Contour #{i:03d} | Triage Decision: {triage_decision:<10} | Final Prediction: '{final_prediction}'")
print("\n--- Debugging Session Complete ---")
if __name__ == "__main__":
main()