"""Image file processor with OCR capabilities."""

import os
import logging
from typing import Dict, Any

from .base import BaseProcessor
from ..result import ConversionResult
from ..exceptions import ConversionError, FileNotFoundError
from ..pipeline.ocr_service import OCRServiceFactory

# Configure logging
logger = logging.getLogger(__name__)


class ImageProcessor(BaseProcessor):
    """Processor for image files (JPG, PNG, etc.) with OCR capabilities."""
    
    def __init__(self, preserve_layout: bool = True, include_images: bool = False, ocr_enabled: bool = True, use_markdownify: bool = None, ocr_service=None):
        super().__init__(preserve_layout, include_images, ocr_enabled, use_markdownify)
        self._ocr_service = ocr_service
    
    def can_process(self, file_path: str) -> bool:
        """Check if this processor can handle the given file.
        
        Args:
            file_path: Path to the file to check
            
        Returns:
            True if this processor can handle the file
        """
        if not os.path.exists(file_path):
            return False
        
        # Check file extension - ensure file_path is a string
        file_path_str = str(file_path)
        _, ext = os.path.splitext(file_path_str.lower())
        return ext in ['.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp', '.gif']
    
    def _get_ocr_service(self):
        """Get OCR service instance."""
        if self._ocr_service is not None:
            return self._ocr_service
        self._ocr_service = OCRServiceFactory.create_service()
        return self._ocr_service
    
    def process(self, file_path: str) -> ConversionResult:
        """Process image file with OCR capabilities.
        
        Args:
            file_path: Path to the image file
            
        Returns:
            ConversionResult with extracted content
        """
        try:
            if not os.path.exists(file_path):
                raise FileNotFoundError(f"Image file not found: {file_path}")
            
            logger.info(f"Processing image file: {file_path}")
            
            # Get OCR service
            ocr_service = self._get_ocr_service()
            
            # Extract text with layout awareness if enabled
            if self.ocr_enabled and self.preserve_layout:
                logger.info("Extracting text with layout awareness")
                extracted_text = ocr_service.extract_text_with_layout(file_path)
            elif self.ocr_enabled:
                logger.info("Extracting text without layout awareness")
                extracted_text = ocr_service.extract_text(file_path)
            else:
                logger.warning("OCR is disabled, returning empty content")
                extracted_text = ""
            
            # Create result
            result = ConversionResult(
                content=extracted_text,
                metadata={
                    'file_path': file_path,
                    'file_type': 'image',
                    'ocr_enabled': self.ocr_enabled,
                    'preserve_layout': self.preserve_layout
                }
            )
            
            logger.info(f"Image processing completed. Extracted {len(extracted_text)} characters")
            return result
            
        except Exception as e:
            logger.error(f"Failed to process image file {file_path}: {e}")
            raise ConversionError(f"Image processing failed: {e}")
    
    @staticmethod
    def predownload_ocr_models():
        """Pre-download OCR models by running a dummy prediction."""
        try:
            from docstrange.services.ocr_service import OCRServiceFactory
            ocr_service = OCRServiceFactory.create_service()
            # Create a blank image for testing
            from PIL import Image
            import tempfile
            with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as tmp:
                img = Image.new('RGB', (100, 100), color='white')
                img.save(tmp.name)
                ocr_service.extract_text_with_layout(tmp.name)
                os.unlink(tmp.name)
            logger.info("OCR models pre-downloaded and cached.")
        except Exception as e:
            logger.error(f"Failed to pre-download OCR models: {e}")