from pdf2image import convert_from_path
import os
import numpy as np
import cv2
from utils import segment_characters, prepare_char_for_model, MODEL_IMAGE_SIZE
from config import settings

def test_image_processing_utils():
    print("Testing image processing utilities with a PDF...")


    POPPLER_PATH = settings.POPPLER_PATH
    project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), ''))
    pdf_name = "Applied-Machine-Learning-and-AI-for-Engineers.pdf"
    sample_pdf_path = os.path.join(project_root, "sample_documents", pdf_name)

    try:
        print(f"Reading first page from '{sample_pdf_path}'...")
        page_image_pil = convert_from_path(
            sample_pdf_path,
            first_page=1,
            last_page=2,
            poppler_path=os.path.join(POPPLER_PATH, "bin")
        )[1]

        page_image_bgr = cv2.cvtColor(np.array(page_image_pil), cv2.COLOR_RGB2BGR)
        print("Successfully converted PDF page to image.")

        gray_image = cv2.cvtColor(page_image_bgr, cv2.COLOR_BGR2GRAY)
        _, binary_img = cv2.threshold(
            gray_image, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU
        )
        print(f"Successfully preprocessed image. Shape: {binary_img.shape}")

        boxes = segment_characters(binary_img)
        print(f"Found {len(boxes)} potential character bounding boxes.")

        for x, y, w, h in boxes:
            cv2.rectangle(page_image_bgr, (x, y), (x + w, y + h), (0, 255, 0), 2)

        output_path = os.path.join(project_root, "sample_documents", "pdf_segmentation_result.png")
        cv2.imwrite(output_path, page_image_bgr)
        print(f"Segmentation visualization saved to: {output_path}")

        if boxes:
            x, y, w, h = boxes[0]
            first_char_crop = binary_img[y:y + h, x:x + w]
            char_tensor = prepare_char_for_model(first_char_crop)
            print(f"Prepared first character for model. Tensor shape: {char_tensor.shape}")
            assert char_tensor.shape == (1, 1, MODEL_IMAGE_SIZE, MODEL_IMAGE_SIZE)
            print("Tensor shape is correct.")

    except Exception as e:
        print(f"An error occurred: {e}")
        print(f"\nPlease ensure the PDF file exists at the absolute path: '{sample_pdf_path}'")
        print("Also check that your POPPLER_PATH is correct.")


if __name__ == "__main__":
    test_image_processing_utils()