#!/usr/bin/env python3
"""
============== COMPLETE OCR PIPELINE (Multi-Line Support) ==============

This pipeline combines:
1. YOLO-based number plate detection
2. Character segmentation using contour detection
3. OCR using a ResNet18-based model
4. Multi-line plate support (for Nepali plates)

Usage:
    python main.py <image_path>
    python main.py <image_path> --no-yolo  # Skip YOLO detection
    python main.py <image_path> --save     # Save results
"""

import cv2
import numpy as np
import matplotlib.pyplot as plt
import argparse
import os
from pathlib import Path
from typing import List, Dict, Optional, Tuple
import json

# Local imports
from config.config import (
    CONTOUR_CONFIG, INFERENCE_CONFIG, VIZ_CONFIG,
    OCR_MODEL_PATH, LABEL_MAP_PATH, YOLO_MODEL_PATH,
    setup_directories, get_device, RESULTS_DIR, CONTOURS_BW_DIR
)
from model.ocr import CharacterRecognizer
from model.plate_detector import get_detector
from utils.helper import (
    detect_contours, filter_contours_by_size, extract_roi,
    convert_to_binary, remove_overlapping_centers,
    group_contours_by_line, format_plate_number,
    draw_detections, calculate_confidence_stats, save_contour_images
)


class NumberPlateOCR:
    """
    Complete Number Plate OCR Pipeline.
    
    Supports:
    - YOLO-based plate detection (optional)
    - Multi-line plate recognition
    - Nepali and English characters
    - Embossed number plates
    """
    
    def __init__(self, use_yolo: bool = True, verbose: bool = True):
        """
        Initialize the OCR pipeline.
        
        Args:
            use_yolo: Whether to use YOLO for plate detection
            verbose: Print progress messages
        """
        self.verbose = verbose
        self.device = get_device()
        
        # Setup directories
        setup_directories()
        
        # Initialize OCR model
        self._log("Loading OCR model...")
        self.ocr = CharacterRecognizer(
            model_path=str(OCR_MODEL_PATH),
            label_map_path=str(LABEL_MAP_PATH),
            device=self.device
        )
        
        # Initialize plate detector
        self.use_yolo = use_yolo
        if use_yolo:
            self._log("Loading YOLO plate detector...")
            self.detector = get_detector(use_yolo=True, model_path=str(YOLO_MODEL_PATH))
        else:
            self.detector = None
        
        self._log("✓ Pipeline initialized successfully!")

    @staticmethod
    def _is_nepali_token(token: str) -> bool:
        """Check if token is Nepali (Devanagari) or Nepali-specific label."""
        if not token:
            return False
        if token == "Nepali Flag":
            return True
        return any('\u0900' <= ch <= '\u097F' for ch in token)

    @staticmethod
    def _is_english_token(token: str) -> bool:
        """Check if token is plain English alphanumeric."""
        if not token:
            return False
        return all(('0' <= ch <= '9') or ('A' <= ch <= 'Z') or ('a' <= ch <= 'z') for ch in token)

    @staticmethod
    def _english_digit_to_nepali(token: str) -> str:
        """Convert English digits to Nepali digits (keeps non-digits unchanged)."""
        digit_map = str.maketrans("0123456789", "०१२३४५६७८९")
        return token.translate(digit_map)

    def _apply_nepali_dominant_correction(self, line_results: List[Dict]):
        """
        If a line is predominantly Nepali, replace English predictions using
        next Nepali top-k prediction from OCR model.
        """
        if not line_results:
            return

        nepali_count = sum(1 for r in line_results if self._is_nepali_token(r['char']))
        english_count = sum(1 for r in line_results if self._is_english_token(r['char']))

        if nepali_count <= english_count:
            return

        for r in line_results:
            curr_char = r['char']
            if not self._is_english_token(curr_char):
                continue

            replacement_char = None
            replacement_conf = None

            top_k = self.ocr.get_top_k_predictions(r['_roi_bw'], k=5)
            for candidate_char, candidate_conf in top_k[1:]:
                if self._is_nepali_token(candidate_char):
                    replacement_char = candidate_char
                    replacement_conf = candidate_conf
                    break

            if replacement_char is None and any(ch.isdigit() for ch in curr_char):
                replacement_char = self._english_digit_to_nepali(curr_char)
                replacement_conf = r['conf']

            if replacement_char is not None:
                r['char'] = replacement_char
                r['conf'] = float(replacement_conf)
    
    def _log(self, message: str):
        """Print log message if verbose."""
        if self.verbose:
            print(message)
    
    def process_image(self, image_path: str, 
                     save_contours: bool = False,
                     show_visualization: bool = True) -> Dict:
        """
        Process an image and extract plate number.
        
        Args:
            image_path: Path to input image
            save_contours: Whether to save extracted character images
            show_visualization: Whether to display matplotlib visualizations
        
        Returns:
            Dict with recognition results
        """
        # Load image
        self._log(f"\n{'='*60}")
        self._log(f"Processing: {image_path}")
        self._log(f"{'='*60}")
        
        orig_image = cv2.imread(image_path)
        gray_image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
        
        if orig_image is None:
            raise ValueError(f"Could not load image: {image_path}")
        
        # Step 1: Detect plates (optional YOLO step)
        if self.use_yolo and self.detector:
            self._log("\n📍 Step 1: Detecting number plates with YOLO...")
            plates = self._detect_plates(orig_image)
            
            if not plates:
                self._log("⚠ No plates detected by YOLO, processing full image...")
                plates = [{'plate_image': orig_image, 'bbox': None, 'confidence': 1.0}]
        else:
            self._log("\n📍 Step 1: Using full image (YOLO disabled)...")
            plates = [{'plate_image': orig_image, 'bbox': None, 'confidence': 1.0}]
        
        # Process each detected plate
        all_results = []
        for plate_idx, plate_data in enumerate(plates):
            self._log(f"\n📋 Processing Plate {plate_idx + 1}/{len(plates)}")
            
            plate_img = plate_data['plate_image']
            plate_gray = cv2.cvtColor(plate_img, cv2.COLOR_BGR2GRAY) if len(plate_img.shape) == 3 else plate_img
            
            # Step 2: Extract character contours
            self._log("📍 Step 2: Detecting character contours...")
            contours = self._extract_contours(plate_gray, plate_img)
            
            if not contours:
                self._log("⚠ No characters detected in plate")
                continue
            
            # Save contours if requested
            if save_contours:
                self._log(f"   Saving contour images to {CONTOURS_BW_DIR}")
                save_contour_images(contours, plate_img, str(CONTOURS_BW_DIR))
            
            # Step 3: Group by lines
            self._log("📍 Step 3: Grouping characters by lines...")
            lines = group_contours_by_line(contours)
            self._log(f"   Detected {len(lines)} line(s)")
            for i, line in enumerate(lines):
                self._log(f"   Line {i+1}: {len(line)} characters")
            
            # Step 4: Run OCR
            self._log("📍 Step 4: Running OCR on characters...")
            ocr_results = self._run_ocr(lines, plate_img)
            
            # Step 5: Format results
            formatted = format_plate_number(lines, ocr_results)
            confidence_stats = calculate_confidence_stats(ocr_results)
            
            result = {
                'plate_index': plate_idx,
                'plate_bbox': plate_data['bbox'],
                'plate_confidence': plate_data.get('confidence', 1.0),
                'plate_image': plate_img,
                'lines': formatted['lines'],
                'multiline_text': formatted['multiline'],
                'singleline_text': formatted['singleline'],
                'num_lines': formatted['num_lines'],
                'total_chars': formatted['total_chars'],
                'details': formatted['details'],
                'confidence_stats': confidence_stats,
                'raw_ocr_results': ocr_results
            }
            all_results.append(result)
            
            # Visualize
            if show_visualization:
                self._visualize_plate(plate_img, lines, ocr_results, plate_idx)
        
        # Print final summary
        self._print_results(all_results)
        
        return {
            'image_path': image_path,
            'num_plates': len(all_results),
            'plates': all_results
        }
    
    def _detect_plates(self, image: np.ndarray) -> List[Dict]:
        """Detect plates using YOLO."""
        detections = self.detector.detect(image)
        
        self._log(f"   Found {len(detections)} plate(s)")
        for i, det in enumerate(detections):
            self._log(f"   Plate {i+1}: confidence={det['confidence']:.2%}")
        
        return detections
    
    def _extract_contours(self, gray_image: np.ndarray, 
                         color_image: np.ndarray) -> List[Dict]:
        """Extract and filter character contours."""
        
        # Detect contours
        contours, hierarchy, thresh = detect_contours(gray_image)
        self._log(f"   Total contours found: {len(contours)}")
        
        # Filter by size
        filtered = filter_contours_by_size(contours, gray_image.shape)
        self._log(f"   After size filter: {len(filtered)}")
        
        # Sort by x position
        sorted_contours = sorted(filtered, key=lambda c: (c['x'], c['y']))

        # Remove only true edge artifacts (do not blindly drop first contours)
        remove_edge_artifacts = CONTOUR_CONFIG.get("remove_edge_artifacts", True)
        edge_margin = CONTOUR_CONFIG.get("edge_margin", 2)
        if remove_edge_artifacts and len(sorted_contours) > 4:
            image_h, image_w = gray_image.shape[:2]
            non_edge_contours = [
                c for c in sorted_contours
                if (
                    c['x'] > edge_margin and
                    c['y'] > edge_margin and
                    (c['x'] + c['w']) < (image_w - edge_margin) and
                    (c['y'] + c['h']) < (image_h - edge_margin)
                )
            ]

            # Keep edge filtering only if it does not remove too many candidates
            if len(non_edge_contours) >= max(3, int(0.6 * len(sorted_contours))):
                sorted_contours = non_edge_contours
                self._log(f"   After edge-artifact filter: {len(sorted_contours)}")
        
        # Extract ROI for each contour
        for c in sorted_contours:
            roi = extract_roi(color_image, c)
            c['roi_bw'] = convert_to_binary(roi)
        
        # Remove overlapping centers (like inner hole of '0')
        final_contours = remove_overlapping_centers(sorted_contours, verbose=self.verbose)
        removed = len(sorted_contours) - len(final_contours)
        if removed > 0:
            self._log(f"   Removed {removed} overlapping contours")
        
        return final_contours
    
    def _run_ocr(self, lines: List[List[Dict]], 
                plate_image: np.ndarray) -> List[List[Dict]]:
        """Run OCR on grouped character lines."""
        
        min_confidence = INFERENCE_CONFIG["min_confidence"]
        results_by_line = []
        
        for line_idx, line in enumerate(lines):
            line_results = []
            
            for c in line:
                char, conf, processed_img = self.ocr.predict(c['roi_bw'])
                
                if conf > min_confidence:
                    line_results.append({
                        'char': char,
                        'conf': conf,
                        'x': c['x'],
                        'y': c['y'],
                        'w': c['w'],
                        'h': c['h'],
                        'processed_img': processed_img,
                        '_roi_bw': c['roi_bw']
                    })

            self._apply_nepali_dominant_correction(line_results)

            for r in line_results:
                r.pop('_roi_bw', None)
            
            results_by_line.append(line_results)
        
        total_chars = sum(len(line) for line in results_by_line)
        self._log(f"   Characters with confidence > {min_confidence*100:.0f}%: {total_chars}")
        
        return results_by_line
    
    def _visualize_plate(self, plate_image: np.ndarray,
                        lines: List[List[Dict]],
                        ocr_results: List[List[Dict]],
                        plate_idx: int):
        """Visualize OCR results."""
        
        if not VIZ_CONFIG["show_plots"]:
            return
        
        # Show original plate
        plt.figure(figsize=VIZ_CONFIG["figure_size"])
        plt.imshow(cv2.cvtColor(plate_image, cv2.COLOR_BGR2RGB))
        plt.title(f'Plate {plate_idx + 1} - {len(lines)} Line(s) Detected')
        plt.axis('off')
        plt.show()
        
        # Show OCR results for each line
        for line_idx, line_results in enumerate(ocr_results):
            n = len(line_results)
            if n > 0:
                cols = min(VIZ_CONFIG["max_cols"], n)
                rows = (n + cols - 1) // cols
                
                fig, axes = plt.subplots(rows, cols, figsize=(cols*1.5, rows*2))
                axes = np.array(axes).reshape(-1) if n > 1 else [axes]
                
                for i, r in enumerate(line_results):
                    axes[i].imshow(r['processed_img'], cmap='gray')
                    axes[i].set_title(f'"{r["char"]}" ({r["conf"]:.0%})', 
                                     fontsize=VIZ_CONFIG["font_size"])
                    axes[i].axis('off')
                
                # Hide empty subplots
                for i in range(n, len(axes)):
                    axes[i].axis('off')
                
                line_text = "".join([r['char'] for r in line_results])
                plt.suptitle(f'Line {line_idx+1}: "{line_text}"', fontsize=12)
                plt.tight_layout()
                plt.show()
    
    def _print_results(self, results: List[Dict]):
        """Print formatted results."""
        
        print("\n" + "="*60)
        print("📋 PLATE NUMBER RECOGNITION RESULTS")
        print("="*60)
        
        for result in results:
            plate_idx = result['plate_index'] + 1
            
            print(f"\n🏷️  PLATE {plate_idx}:")
            print("-"*40)
            
            for line_detail in result['details']:
                print(f"\n  📌 Line {line_detail['line_num']}:")
                for i, char_info in enumerate(line_detail['characters']):
                    print(f"      {i+1}. '{char_info['char']}' ({char_info['conf']:.1%})")
                print(f"     → Result: {line_detail['text']}")
            
            # Final result
            print("\n" + "-"*40)
            if result['num_lines'] > 1:
                print("  Multi-line format:")
                for i, line in enumerate(result['lines']):
                    print(f"    Line {i+1}: {line}")
                print(f"\n  Single-line: {result['singleline_text']}")
            else:
                text = result['lines'][0] if result['lines'] else 'No characters detected'
                print(f"  Result: {text}")
            
            # Confidence stats
            stats = result['confidence_stats']
            print(f"\n  Confidence: avg={stats['mean']:.1%}, min={stats['min']:.1%}, max={stats['max']:.1%}")
        
        print("\n" + "="*60)
    
    def process_from_plate_image(self, plate_image: np.ndarray,
                                 show_visualization: bool = True) -> Dict:
        """
        Process a pre-cropped plate image (skip YOLO detection).
        
        Args:
            plate_image: Cropped plate image (BGR)
            show_visualization: Whether to show plots
        
        Returns:
            Recognition result dict
        """
        plate_gray = cv2.cvtColor(plate_image, cv2.COLOR_BGR2GRAY) if len(plate_image.shape) == 3 else plate_image
        
        # Extract contours
        contours = self._extract_contours(plate_gray, plate_image)
        
        if not contours:
            return {'lines': [], 'singleline_text': '', 'total_chars': 0}
        
        # Group by lines
        lines = group_contours_by_line(contours)
        
        # Run OCR
        ocr_results = self._run_ocr(lines, plate_image)
        
        # Format results
        formatted = format_plate_number(lines, ocr_results)
        
        if show_visualization:
            self._visualize_plate(plate_image, lines, ocr_results, 0)
        
        return {
            'lines': formatted['lines'],
            'multiline_text': formatted['multiline'],
            'singleline_text': formatted['singleline'],
            'num_lines': formatted['num_lines'],
            'total_chars': formatted['total_chars'],
            'details': formatted['details'],
            'confidence_stats': calculate_confidence_stats(ocr_results)
        }


def main():
    """Main entry point."""
    parser = argparse.ArgumentParser(
        description="Number Plate OCR Pipeline",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    python main.py image.jpg
    python main.py image.jpg --no-yolo
    python main.py image.jpg --save --no-viz
    python main.py image.jpg --output results.json
        """
    )
    
    parser.add_argument('image', type=str, help='Path to input image')
    parser.add_argument('--no-yolo', action='store_true', 
                       help='Skip YOLO plate detection')
    parser.add_argument('--save', action='store_true',
                       help='Save extracted character images')
    parser.add_argument('--no-viz', action='store_true',
                       help='Disable visualization')
    parser.add_argument('--output', '-o', type=str,
                       help='Save results to JSON file')
    parser.add_argument('--quiet', '-q', action='store_true',
                       help='Suppress progress messages')
    
    args = parser.parse_args()
    
    # Validate input
    if not os.path.exists(args.image):
        print(f"Error: Image not found: {args.image}")
        return 1
    
    # Initialize pipeline
    pipeline = NumberPlateOCR(
        use_yolo=not args.no_yolo,
        verbose=not args.quiet
    )
    
    # Process image
    results = pipeline.process_image(
        args.image,
        save_contours=args.save,
        show_visualization=not args.no_viz
    )
    
    # Save results if requested
    if args.output:
        # Remove non-serializable items
        save_results = {
            'image_path': results['image_path'],
            'num_plates': results['num_plates'],
            'plates': []
        }
        
        for plate in results['plates']:
            save_plate = {
                'plate_index': plate['plate_index'],
                'plate_bbox': plate['plate_bbox'],
                'lines': plate['lines'],
                'multiline_text': plate['multiline_text'],
                'singleline_text': plate['singleline_text'],
                'num_lines': plate['num_lines'],
                'total_chars': plate['total_chars'],
                'confidence_stats': plate['confidence_stats']
            }
            save_results['plates'].append(save_plate)
        
        with open(args.output, 'w', encoding='utf-8') as f:
            json.dump(save_results, f, indent=2, ensure_ascii=False)
        print(f"\n✓ Results saved to: {args.output}")
    
    return 0


if __name__ == "__main__":
    exit(main())