import pytesseract
from PIL import Image
import io
import os

class OCREngine:
    def __init__(self):
        # On Render, tesseract is usually in /usr/bin/tesseract
        # On Windows, we use the path provided by the user
        if os.name == 'nt':
            pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

    def extract_text(self, image_content: bytes) -> str:
        try:
            image = Image.open(io.BytesIO(image_content))
            
            # Basic preprocessing: Resize if too large
            if image.width > 2000 or image.height > 2000:
                image.thumbnail((2000, 2000))
            
            # Convert to grayscale for better OCR
            image = image.convert('L')
            
            text = pytesseract.image_to_string(image)
            return text.strip()
        except Exception as e:
            print(f"OCR Error: {e}")
            return f"Error extracting text: {str(e)}"

ocr_engine = OCREngine()